[yum-cvs] yum/repomd repoMDObject.py,1.2,1.3

Seth Vidal skvidal at linux.duke.edu
Wed May 24 21:24:45 UTC 2006


Update of /home/groups/yum/cvs/yum/repomd
In directory login1.linux.duke.edu:/tmp/cvs-serv10580

Modified Files:
	repoMDObject.py 
Log Message:

- make repomd.xml parsing use celementTree
- vastly simplify adding data types
- need to clean up yumRepo.py and friends for this change


Index: repoMDObject.py
===================================================================
RCS file: /home/groups/yum/cvs/yum/repomd/repoMDObject.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- repoMDObject.py	29 May 2004 21:39:30 -0000	1.2
+++ repoMDObject.py	24 May 2006 21:24:43 -0000	1.3
@@ -12,171 +12,106 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2003 Duke University
+# Copyright 2006 Duke University
 
-
-import libxml2
+# TODO - exception raising
+from cElementTree import iterparse
 from mdErrors import RepoMDError
 
+import sys
+
+def ns_cleanup(qn):
+    if qn.find('}') == -1: return qn 
+    return qn.split('}')[1]
+
+class RepoData:
+    """represents anything beneath a <data> tag"""
+    def __init__(self, elem):
+        self.type = elem.attrib.get('type')
+        self.location = ""
+        self.checksums = [] # type,value
+        self.openchecksums = [] # type,value
+        self.timestamp = None
+    
+        self.parse(elem)
 
+    def parse(self, elem):
+        
+        for child in elem:
+            child_name = ns_cleanup(child.tag)
+            if child_name == 'location':
+                self.location = child.attrib.get('href')
+            
+            elif child_name == 'checksum':
+                csum_value = child.text
+                csum_type = child.attrib.get('type')
+                self.checksums.append((csum_type,csum_value))
+
+            elif child_name == 'open-checksum':
+                csum_value = child.text
+                csum_type = child.attrib.get('type')
+                self.openchecksums.append((csum_type, csum_value))
+            
+            elif child_name == 'timestamp':
+                self.timestamp = child.text
+    
+        
 class RepoMD:
     """represents the repomd xml file"""
-    def __init__(self, repoid, file):
+    
+    def __init__(self, repoid, srcfile):
         """takes a repoid and a filename for the repomd.xml"""
         
         self.repoid = repoid
         self.repoData = {}
-        try:
-            doc = libxml2.parseFile(file)
-        except libxml2.parserError:
-            raise RepoMDError, 'Error: could not parse file %s' % file
-        root = doc.getRootElement()
-        xmlfiletype = root.name
-        node = root.children
-        if xmlfiletype == 'repomd':
-            self.loadRepoMD(node)
+        
+        if type(srcfile) == type('str'):
+            # srcfile is a filename string
+            infile = open(srcfile, 'rt')
         else:
-            raise RepoMDError, 'Error: other unknown root element %s' % xmlfiletype 
-        doc.freeDoc()
+            # srcfile is a file object
+            infile = srcfile
+        
+        parser = iterparse(infile)
 
-    def _returnData(self, mdtype, request):
-        """ return the data from the repository Data"""
-        if self.repoData.has_key(mdtype):
-            ds = self.repoData[mdtype]
-            if ds.has_key(request):
-                return ds[request]
-            else:
-                raise RepoMDError, "Error: request %s not in %s data" % (request, mdtype)
-        else:
-            raise RepoMDError, "Error: odd MDtype requested: %s" % mdtype
-            
+        for event, elem in parser:
+            elem_name = ns_cleanup(elem.tag)
             
+            if elem_name == "data":
+                thisdata = RepoData(elem=elem)
+                self.repoData[thisdata.type] = thisdata
             
-    
-    def _storeRepoData(self, mdtype, dataname, data):
-        """stores repository data
-           mdtype = primary, filelists, other, group
-           dataname = checksum, timestamp, basepath, relativepath
-        """
-        if self.repoData.has_key(mdtype):
-            ds = self.repoData[mdtype]
-            if not ds.has_key(dataname):
-                ds[dataname] = data
-            else:
-                raise RepoMDError, "Warning: duplicate data of %s description inputted" % dataname
-        else:
-            raise RepoMDError, "Warning: odd mdtype being put in %s" % mdtype
-            
-                
-                
-                
-    def loadRepoDataNode(self, node):
-        """loads a repository data node into the class"""
-        mdtype = node.prop('type') # get the 'type' property for the datanode
-        if not self.repoData.has_key(mdtype):
-            self.repoData[mdtype] = {}
-            
-        datanode = node.children            
-        while datanode is not None:
-            if datanode.type != 'element':
-                datanode = datanode.next
-                continue
-            
-            if datanode.name  == 'location':
-                base = datanode.prop('base')
-                relative = datanode.prop('href')    
-                self._storeRepoData(mdtype, 'basepath', base)
-                self._storeRepoData(mdtype, 'relativepath', relative)
-            elif datanode.name == 'checksum':
-                csumType = datanode.prop('type')
-                csum = datanode.content
-                self._storeRepoData(mdtype, 'checksum', (csumType, csum))
-            elif datanode.name == 'timestamp':
-                timestamp = datanode.content
-                self._storeRepoData(mdtype, 'timestamp', timestamp)
-
-            datanode = datanode.next    
-            continue
-
-    def loadRepoMD(self, node):
-        """iterates through the data nodes and populates some simple data areas"""
-                
-        while node is not None:
-            if node.type != 'element':
-                node = node.next
-                continue
-            
-            if node.name == 'data':
-                self.loadRepoDataNode(node)
-                    
-            node = node.next
-            continue
-                
-    def _checksum(self, mdtype):
-        """returns a tuple of (checksum type, checksum) for the specified Metadata
-           file"""
-        return self._returnData(mdtype, 'checksum')
-        
-        
-    def _location(self, mdtype):
-        """returns location to specified metadata file, (base, relative)"""
-        base = self._returnData(mdtype, 'basepath')
-        relative = self._returnData(mdtype, 'relativepath')
-        
-        return (base, relative)
-        
-    def _timestamp(self, mdtype):
-        """returns timestamp for specified metadata file"""
-        return self._returnData(mdtype, 'timestamp')
-        
-    def otherChecksum(self):
-        """returns a tuple of (checksum type, checksum) for the other Metadata file"""
-        return self._checksum('other')
-        
-    def otherLocation(self):
-        """returns location to other metadata file, (base, relative)"""
-        return self._location('other')
-        
-    def otherTimestamp(self):
-        """returns timestamp for other metadata file"""
-        return self._timestamp('other')
-        
-    def primaryChecksum(self):
-        """returns a tuple of (checksum type, checksum) for the primary Metadata file"""
-        return self._checksum('primary')
-        
-    def primaryLocation(self):
-        """returns location to primary metadata file, (base, relative)"""
-        return self._location('primary')
-        
-    def primaryTimestamp(self):
-        """returns timestamp for primary metadata file"""
-        return self._timestamp('primary')
-
-    def filelistsChecksum(self):
-        """returns a tuple of (checksum type, checksum) for the filelists Metadata file"""
-        return self._checksum('filelists')
-        
-    def filelistsLocation(self):
-        """returns location to filelists metadata file, (base, relative)"""
-        return self._location('filelists')
-        
-    def filelistsTimestamp(self):
-        """returns timestamp for filelists metadata file"""
-        return self._timestamp('filelists')
-
-    def groupChecksum(self):
-        """returns a tuple of (checksum type, checksum) for the group Metadata file"""
-        return self._checksum('group')
-        
-    def groupLocation(self):
-        """returns location to group metadata file, (base, relative)"""
-        return self._location('group')
-        
-    def groupTimestamp(self):
-        """returns timestamp for group metadata file"""
-        return self._timestamp('group')
-
     def fileTypes(self):
         """return list of metadata file types available"""
         return self.repoData.keys()
+    
+    def dump(self):
+        """dump fun output"""
+        
+        for ft in self.fileTypes():
+            thisdata = self.repoData[ft]
+            print 'datatype: %s' % thisdata.type
+            print 'location: %s' % thisdata.location
+            print 'timestamp: %s' % thisdata.timestamp
+            print 'checksums:'
+            for (type, value) in thisdata.checksums:
+                print '  %s - %s' % (type, value)
+            print 'open checksums:'
+            for (type, value) in thisdata.openchecksums:
+                print '  %s - %s' % (type, value)
+            
+
+def main():
+
+    try:
+        print sys.argv[1]
+        p = RepoMD('repoid', sys.argv[1])
+        p.dump()
+        
+    except IOError:
+        print >> sys.stderr, "newcomps.py: No such file:\'%s\'" % sys.argv[1]
+        sys.exit(1)
+        
+if __name__ == '__main__':
+    main()
+




More information about the Yum-cvs-commits mailing list