[Rpm-metadata] createrepo/__init__.py createrepo/readMetadata.py createrepo/utils.py modifyrepo.py

Seth Vidal skvidal at linux.duke.edu
Fri Feb 1 00:15:08 UTC 2008

 createrepo/__init__.py     |   81 ++++++++++++++++++++++++++++++++-------------
 createrepo/readMetadata.py |   18 ++++++----
 createrepo/utils.py        |   15 ++++++++
 modifyrepo.py              |   30 +++++++++-------
 4 files changed, 101 insertions(+), 43 deletions(-)

New commits:
commit 3902e8b6aa2c0fc23da2dc2459f151b2b43414e5
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Jan 31 19:14:34 2008 -0500

    - make sure group files are compressed/sha-named
    - add group_gz section for compressed group file
    - add addArbitraryMetadata() method to MetaDataGenerator class
    - fix up modifyrepo to generate sha-named files
    - make modifyrepo act a bit more like createrepo for its operations

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 1df6923..2889412 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -37,7 +37,7 @@ except ImportError:
-from utils import _gzipOpen, bzipFile, checkAndMakeDir
+from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, checksum_and_rename
 __version__ = '0.9.4'
@@ -461,8 +461,60 @@ class MetaDataGenerator:
+    def addArbitraryMetadata(self, mdfile, mdtype, xml_node, compress=True):
+        """add random metadata to the repodata dir and repomd.xml
+           mdfile = complete path to file
+           mdtype = the metadata type to use
+           xml_node = the node of the repomd xml object to append this 
+                      data onto
+           compress = compress the file before including it
+        """
+        # copy the file over here
+        sfile = os.path.basename(mdfile)
+        fo = open(mdfile, 'r')
+        outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
+        if compress:
+            sfile = '%s.gz' % sfile
+            outfn = os.path.join(outdir, sfile)
+            output = GzipFile(filename = outfn, mode='wb')
+        else:
+            outfn  = os.path.join(outdir, sfile)
+            output = open(outfn, 'w')
+        output.write(fo.read())
+        output.close()
+        fo.seek(0)
+        open_csum = misc.checksum(self.conf.sumtype, fo)
+        fo.close()
+        if self.conf.unique_md_filenames:
+            (csum, outfn) = checksum_and_rename(outfn)
+            sfile = os.path.basename(outfn)
+        else:
+            if compress:
+                csum = misc.checksum(self.conf.sumtype, outfn)            
+            else:
+                csum = open_csum
+        timest = os.stat(outfn)[8]
+        # add all this garbage into the xml node like:
+        data = xml_node.newChild(None, 'data', None)
+        data.newProp('type', mdtype)
+        location = data.newChild(None, 'location', None)
+        if self.conf.baseurl is not None:
+            location.newProp('xml:base', self.conf.baseurl)
+        location.newProp('href', os.path.join(self.conf.finaldir, sfile))
+        checksum = data.newChild(None, 'checksum', csum)
+        checksum.newProp('type', self.conf.sumtype)
+        if compress:
+            opencsum = data.newChild(None, 'open-checksum', open_csum)
+            opencsum.newProp('type', self.conf.sumtype)
+        timestamp = data.newChild(None, 'timestamp', str(timest))
     def doRepoMetadata(self):
         """wrapper to generate the repomd.xml file that stores the info on the other files"""
         repodoc = libxml2.newDoc("1.0")
@@ -587,28 +639,11 @@ class MetaDataGenerator:
         if not self.conf.quiet and self.conf.database: self.callback.log('Sqlite DBs complete')        
-        # if we've got a group file then checksum it once and be done
-        if self.conf.groupfile is not None:
-            grpfile = self.conf.groupfile
-            timestamp = os.stat(grpfile)[8]
-            sfile = os.path.basename(grpfile)
-            fo = open(grpfile, 'r')
-            output = open(os.path.join(self.conf.outputdir, self.conf.tempdir, sfile), 'w')
-            output.write(fo.read())
-            output.close()
-            fo.seek(0)
-            csum = misc.checksum(sumtype, fo)
-            fo.close()
-            data = reporoot.newChild(None, 'data', None)
-            data.newProp('type', 'group')
-            location = data.newChild(None, 'location', None)
-            if self.conf.baseurl is not None:
-                location.newProp('xml:base', self.conf.baseurl)
-            location.newProp('href', os.path.join(self.conf.finaldir, sfile))
-            checksum = data.newChild(None, 'checksum', csum)
-            checksum.newProp('type', sumtype)
-            timestamp = data.newChild(None, 'timestamp', str(timestamp))
+        if self.conf.groupfile is not None:
+            self.addArbitraryMetadata(self.conf.groupfile, 'group_gz', reporoot)
+            self.addArbitraryMetadata(self.conf.groupfile, 'group', reporoot, compress=False)            
         # save it down
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
index ea2c400..faffe69 100644
--- a/createrepo/readMetadata.py
+++ b/createrepo/readMetadata.py
@@ -33,13 +33,17 @@ class MetadataIndex(object):
         self.outputdir = outputdir
         repodatadir = self.outputdir + '/repodata'
         myrepomdxml = repodatadir + '/repomd.xml'
-        repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
-        b = repomd.getData('primary').location[1]
-        f = repomd.getData('filelists').location[1]
-        o = repomd.getData('other').location[1]
-        basefile = os.path.join(self.outputdir, b)
-        filelistfile = os.path.join(self.outputdir, f)
-        otherfile = os.path.join(self.outputdir, o)
+        if os.path.exists(myrepomdxml):
+            repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
+            b = repomd.getData('primary').location[1]
+            f = repomd.getData('filelists').location[1]
+            o = repomd.getData('other').location[1]
+            basefile = os.path.join(self.outputdir, b)
+            filelistfile = os.path.join(self.outputdir, f)
+            otherfile = os.path.join(self.outputdir, o)
+        else:
+            basefile = filelistfile = otherfile = ""
         self.files = {'base' : basefile,
                       'filelist' : filelistfile,
                       'other' : otherfile}
diff --git a/createrepo/utils.py b/createrepo/utils.py
index 13d1d67..1b8c147 100644
--- a/createrepo/utils.py
+++ b/createrepo/utils.py
@@ -17,10 +17,12 @@
 import os
+import os.path
 import sys
 import bz2
 import gzip
 from gzip import write32u, FNAME
+from yum import misc
 def errorprint(stuff):
     print >> sys.stderr, stuff
@@ -122,3 +124,16 @@ def checkAndMakeDir(dir):
             result = True
     return result
+def checksum_and_rename(fn_path):
+    """checksum the file rename the file to contain the checksum as a prefix
+       return the new filename"""
+    csum = misc.checksum('sha', fn_path)
+    fn = os.path.basename(fn_path)
+    fndir = os.path.dirname(fn_path)
+    csum_fn = csum + '-' + fn
+    csum_path = os.path.join(fndir, csum_fn)
+    os.rename(fn_path, csum_path)
+    return (csum, csum_path)
diff --git a/modifyrepo.py b/modifyrepo.py
index d4b3b00..b86f53e 100755
--- a/modifyrepo.py
+++ b/modifyrepo.py
@@ -19,11 +19,13 @@
 # (C) Copyright 2006  Red Hat, Inc.
 # Luke Macken <lmacken at redhat.com>
+# modified by Seth Vidal 2008
 import os
 import sys
-import sha
-import gzip
+from createrepo.utils import checksum_and_rename, GzipFile
+from yum.misc import checksum
 from xml.dom import minidom
@@ -73,15 +75,17 @@ class RepoMetadata:
         mdname += '.gz'
         mdtype = mdname.split('.')[0]
         destmd = os.path.join(self.repodir, mdname)
-        newmd = gzip.GzipFile(destmd, 'wb')
-        newmd.write(md.encode('utf-8'))
+        newmd = GzipFile(filename=destmd, mode='wb')
+        newmd.write(md)
         print "Wrote:", destmd
-        ## Read the gzipped metadata
-        f = file(destmd, 'r')
-        newmd = f.read()
-        f.close()
+        open_csum = checksum('sha', metadata)
+        csum, destmd = checksum_and_rename(destmd)
+        base_destmd = os.path.basename(destmd)
         ## Remove any stale metadata
         for elem in self.doc.getElementsByTagName('data'):
@@ -95,25 +99,25 @@ class RepoMetadata:
         data.appendChild(self.doc.createTextNode("\n    "))
         self._insert_element(data, 'location',
-                             attrs={ 'href' : 'repodata/' + mdname })
+                             attrs={ 'href' : 'repodata/' + base_destmd })
         data.appendChild(self.doc.createTextNode("\n    "))
         self._insert_element(data, 'checksum', attrs={ 'type' : 'sha' },
-                             text=sha.new(newmd).hexdigest())
+                             text=csum)
         data.appendChild(self.doc.createTextNode("\n    "))
         self._insert_element(data, 'timestamp',
         data.appendChild(self.doc.createTextNode("\n    "))
         self._insert_element(data, 'open-checksum', attrs={ 'type' : 'sha' },
-                             text=sha.new(md).hexdigest())
+                             text=open_csum)
         data.appendChild(self.doc.createTextNode("\n  "))
         print "           type =", mdtype 
         print "       location =", 'repodata/' + mdname
-        print "       checksum =", sha.new(newmd).hexdigest()
+        print "       checksum =", csum
         print "      timestamp =", str(os.stat(destmd).st_mtime)
-        print "  open-checksum =", sha.new(md.encode('utf-8')).hexdigest()
+        print "  open-checksum =", open_csum
         ## Write the updated repomd.xml
         outmd = file(self.repomdxml, 'w')

