[Rpm-metadata] createrepo/__init__.py createrepo/utils.py createrepo/yumbased.py genpkgmetadata.py

Seth Vidal skvidal at linux.duke.edu
Thu Jan 3 17:27:26 UTC 2008


 createrepo/__init__.py |  394 +++++++++++++++++++++++++------------------------
 createrepo/utils.py    |   23 ++
 createrepo/yumbased.py |   51 ++----
 genpkgmetadata.py      |  360 +++++++++++++++-----------------------------
 4 files changed, 372 insertions(+), 456 deletions(-)

New commits:
commit ff26a86251cac6da7fc5a8aa19c567df5ecd3924
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Jan 3 12:26:45 2008 -0500

    - port to optionparser from getopt
    - redo config class to make use outside of cli more do-able
    - handle repomd.xml creation in class, too
    
    - still have a lot of changes to complete

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 2ab8708..7f30d91 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -7,10 +7,11 @@ import fnmatch
 import hashlib
 import rpm
 import yumbased
+from optparse import OptionContainer
 
 
 from yum import misc
-from utils import _
+from utils import _, errorprint
 import readMetadata
 
 try:
@@ -33,10 +34,44 @@ class MDError(exceptions.Exception):
     def __str__(self):
         return self.value
 
+class MetaDataConfig(object):
+    def __init__(self):
+        self.quiet = False
+        self.verbose = False
+        self.excludes = []
+        self.baseurl = ''
+        self.groupfile = None
+        self.sumtype = 'sha'
+        self.noepoch = False #???
+        self.pretty = False
+        self.cachedir = None
+        self.basedir = os.getcwd()
+        self.use_cache = False
+        self.checkts = False
+        self.split = False        
+        self.update = False
+        self.database = False
+        self.outputdir = None
+        self.file_patterns = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+        self.dir_patterns = ['.*bin\/.*', '^\/etc\/.*']
+        self.skip_symlinks = False
+        self.pkglist = []
+        self.primaryfile = 'primary.xml.gz'
+        self.filelistsfile = 'filelists.xml.gz'
+        self.otherfile = 'other.xml.gz'
+        self.repomdfile = 'repomd.xml'
+        self.tempdir = '.repodata'
+        self.finaldir = 'repodata'
+        self.olddir = '.olddata'
+        self.mdtimestamp = 0
+
 
 class MetaDataGenerator:
-    def __init__(self, cmds):
-        self.cmds = cmds
+    def __init__(self, config_obj=None):
+        self.conf = config_obj
+        if config_obj == None:
+            self.conf = MetaDataConfig()
+            
         self.ts = rpm.TransactionSet()
         self.pkgcount = 0
         self.files = []
@@ -67,7 +102,7 @@ class MetaDataGenerator:
             for fn in names:
                 if os.path.isdir(fn):
                     continue
-                if self.cmds['skip-symlinks'] and os.path.islink(fn):
+                if self.conf.skip_symlinks and os.path.islink(fn):
                     continue
                 elif fn[-extlen:].lower() == '%s' % (ext):
                     relativepath = dirname.replace(startdir, "", 1)
@@ -78,23 +113,30 @@ class MetaDataGenerator:
         startdir = os.path.join(basepath, directory) + '/'
         self._os_path_walk(startdir, extension_visitor, filelist)
         return filelist
-    #module
-    def checkTimeStamps(self, directory):
-        if self.cmds['checkts']:
-            files = self.getFileList(self.cmds['basedir'], directory, '.rpm')
+
+
+    def checkTimeStamps(self):
+        """check the timestamp of our target dir. If it is not newer than the repodata
+           return False, else True"""
+        if self.conf.checkts:
+            files = self.getFileList(self.conf.basedir, self.conf.directory, '.rpm')
             files = self.trimRpms(files)
             for f in files:
-                fn = os.path.join(self.cmds['basedir'], directory, f)
+                fn = os.path.join(self.conf.basedir, self.conf.directory, f)
                 if not os.path.exists(fn):
+                    #FIXME - raise don't print here
                     errorprint(_('cannot get to file: %s') % fn)
-                if os.path.getctime(fn) > self.cmds['mdtimestamp']:
+                if os.path.getctime(fn) > self.conf.mdtimestamp:
                     return False
-        return True
-    #module
+                else:
+                    return True
+                
+        return False
+
     def trimRpms(self, files):
         badrpms = []
         for file in files:
-            for glob in self.cmds['excludes']:
+            for glob in self.conf.excludes:
                 if fnmatch.fnmatch(file, glob):
                     # print 'excluded: %s' % file
                     if file not in badrpms:
@@ -104,26 +146,28 @@ class MetaDataGenerator:
                 files.remove(file)
         return files
 
-    def doPkgMetadata(self, directory):
+    def doPkgMetadata(self, directory=None):
         """all the heavy lifting for the package metadata"""
-
+        if not directory:
+            directory = self.conf.directory
+            
         # rpms we're going to be dealing with
-        if self.cmds['update']:
+        if self.conf.update:
             #build the paths
-            primaryfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile'])
-            flfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['filelistsfile'])
-            otherfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['otherfile'])
+            primaryfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.primaryfile)
+            flfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.filelistsfile)
+            otherfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.otherfile)
             opts = {
-                'verbose' : self.cmds['verbose'],
-                'pkgdir' : os.path.normpath(os.path.join(self.cmds['basedir'], directory))
+                'verbose' : self.conf.verbose,
+                'pkgdir' : os.path.normpath(os.path.join(self.conf.basedir, directory))
             }
             #and scan the old repo
-            self.oldData = readMetadata.MetadataIndex(self.cmds['outputdir'],
+            self.oldData = readMetadata.MetadataIndex(self.conf.outputdir,
                                                       primaryfile, flfile, otherfile, opts)
-        if self.cmds['pkglist']:
-            packages = self.cmds['pkglist']
+        if self.conf.pkglist:
+            packages = self.conf.pkglist
         else:
-            packages = self.getFileList(self.cmds['basedir'], directory, '.rpm')
+            packages = self.getFileList(self.conf.basedir, directory, '.rpm')
             
         packages = self.trimRpms(packages)
         self.pkgcount = len(packages)
@@ -139,7 +183,7 @@ class MetaDataGenerator:
 
     def _setupPrimary(self):
         # setup the primary metadata file
-        primaryfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile'])
+        primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.primaryfile)
         fo = _gzipOpen(primaryfilepath, 'w')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
@@ -148,7 +192,7 @@ class MetaDataGenerator:
 
     def _setupFilelists(self):
         # setup the filelist file
-        filelistpath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['filelistsfile'])
+        filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.filelistsfile)
         fo = _gzipOpen(filelistpath, 'w')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">\n' %
@@ -157,45 +201,18 @@ class MetaDataGenerator:
         
     def _setupOther(self):
         # setup the other file
-        otherfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['otherfile'])
+        otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.otherfile)
         fo = _gzipOpen(otherfilepath, 'w')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">\n' %
                        self.pkgcount)
         return fo
         
-    def _getNodes(self, pkg, directory, current):
-        # delete function since it seems to nothing anymore
-        basenode = None
-        filesnode = None
-        othernode = None
-        try:
-            rpmdir= os.path.join(self.cmds['basedir'], directory)
-            mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, pkg, self.cmds)
-        except dumpMetadata.MDError, e:
-            errorprint('\n%s - %s' % (e, pkg))
-            return None
-        try:
-            basenode = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype'])
-        except dumpMetadata.MDError, e:
-            errorprint(_('\nAn error occurred creating primary metadata: %s') % e)
-            return None
-        try:
-            filesnode = dumpMetadata.fileListXML(self.filesdoc, self.filesroot, mdobj)
-        except dumpMetadata.MDError, e:
-            errorprint(_('\nAn error occurred creating filelists: %s') % e)
-            return None
-        try:
-            othernode = dumpMetadata.otherXML(self.otherdoc, self.otherroot, mdobj)
-        except dumpMetadata.MDError, e:
-            errorprint(_('\nAn error occurred: %s') % e)
-            return None
-        return basenode,filesnode,othernode
 
     def read_in_package(self, directory, rpmfile):
         # XXX fixme try/excepts here
         # directory is stupid - just make it part of the class
-        rpmfile = '%s/%s/%s' % (self.cmds['basedir'], directory, rpmfile)
+        rpmfile = '%s/%s/%s' % (self.conf.basedir, directory, rpmfile)
         po = yumbased.CreateRepoPackage(self.ts, rpmfile)
         return po
 
@@ -210,7 +227,7 @@ class MetaDataGenerator:
             
             # look to see if we can get the data from the old repodata
             # if so write this one out that way
-            if self.cmds['update']:
+            if self.conf.update:
                 #see if we can pull the nodes from the old repo
                 nodes = self.oldData.getNodes(pkg)
                 if nodes is not None:
@@ -221,7 +238,8 @@ class MetaDataGenerator:
             if not recycled:
                 #scan rpm files
                 po = self.read_in_package(directory, pkg)
-                self.primaryfile.write(po.do_primary_xml_dump())
+                reldir = os.path.join(self.conf.basedir, directory)
+                self.primaryfile.write(po.do_primary_xml_dump(reldir, baseurl=self.conf.baseurl))
                 self.flfile.write(po.do_filelists_xml_dump())
                 self.otherfile.write(po.do_other_xml_dump())
             else:
@@ -233,14 +251,14 @@ class MetaDataGenerator:
                                       (othernode,self.otherfile)):
                     if node is None:
                         break
-                    output = node.serialize('UTF-8', self.cmds['pretty'])
+                    output = node.serialize('UTF-8', self.conf.pretty)
                     outfile.write(output)
                     outfile.write('\n')
   
                     self.oldData.freeNodes(pkg)
 
-            if not self.cmds['quiet']:
-                if self.cmds['verbose']:
+            if not self.conf.quiet:
+                if self.conf.verbose:
                     print '%d/%d %s %s' % (current, self.pkgcount, sep, pkg)
                 else:
                     sys.stdout.write('\r' + ' ' * 80)
@@ -251,39 +269,146 @@ class MetaDataGenerator:
 
 
     def closeMetadataDocs(self):
-        if not self.cmds['quiet']:
+        if not self.conf.quiet:
             print ''
 
         # save them up to the tmp locations:
-        if not self.cmds['quiet']:
+        if not self.conf.quiet:
             print _('Saving Primary metadata')
         self.primaryfile.write('\n</metadata>')
         self.primaryfile.close()
 
-        if not self.cmds['quiet']:
+        if not self.conf.quiet:
             print _('Saving file lists metadata')
         self.flfile.write('\n</filelists>')
         self.flfile.close()
 
-        if not self.cmds['quiet']:
+        if not self.conf.quiet:
             print _('Saving other metadata')
         self.otherfile.write('\n</otherdata>')
         self.otherfile.close()
 
+
+
     def doRepoMetadata(self):
         """wrapper to generate the repomd.xml file that stores the info on the other files"""
         repodoc = libxml2.newDoc("1.0")
         reporoot = repodoc.newChild(None, "repomd", None)
         repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None)
         reporoot.setNs(repons)
-        repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['repomdfile'])
+        repopath = os.path.join(self.conf.outputdir, self.conf.tempdir)
+        repofilepath = os.path.join(repopath, self.conf.repomdfile)
+
+        sumtype = self.conf.sumtype
+        workfiles = [(self.conf.otherfile, 'other',),
+                     (self.conf.filelistsfile, 'filelists'),
+                     (self.conf.primaryfile, 'primary')]
+        repoid='garbageid'
+        
+        if self.conf.database:
+            try:
+                dbversion = str(sqlitecachec.DBVERSION)
+            except AttributeError:
+                dbversion = '9'
+            rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
+
+        for (file, ftype) in workfiles:
+            complete_path = os.path.join(repopath, file)
+            
+            zfo = _gzipOpen(complete_path)
+            uncsum = misc.checksum(sumtype, zfo)
+            zfo.close()
+            csum = misc.checksum(sumtype, complete_path)
+            timestamp = os.stat(complete_path)[8]
+            
+            db_csums = {}
+            db_compressed_sums = {}
+            
+            if self.conf.database:
+                if ftype == 'primary':
+                    rp.getPrimary(complete_path, csum)
+                                
+                elif ftype == 'filelists':
+                    rp.getFilelists(complete_path, csum)
+                    
+                elif ftype == 'other':
+                    rp.getOtherdata(complete_path, csum)
+                
 
-        try:
-            repoXML(reporoot, self.cmds)
-        except MDError, e:
-            errorprint(_('Error generating repo xml file: %s') % e)
-            sys.exit(1)
+                tmp_result_name = '%s.xml.gz.sqlite' % ftype
+                tmp_result_path = os.path.join(repopath, tmp_result_name)
+                good_name = '%s.sqlite' % ftype
+                resultpath = os.path.join(repopath, good_name)
+                
+                # rename from silly name to not silly name
+                os.rename(tmp_result_path, resultpath)
+                compressed_name = '%s.bz2' % good_name
+                result_compressed = os.path.join(repopath, compressed_name)
+                db_csums[ftype] = misc.checksum(sumtype, resultpath)
+                
+                # compress the files
+                bzipFile(resultpath, result_compressed)
+                # csum the compressed file
+                db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed)
+                # remove the uncompressed file
+                os.unlink(resultpath)
+
+                # timestamp the compressed file
+                db_timestamp = os.stat(result_compressed)[8]
+                
+                # add this data as a section to the repomdxml
+                db_data_type = '%s_db' % ftype
+                data = reporoot.newChild(None, 'data', None)
+                data.newProp('type', db_data_type)
+                location = data.newChild(None, 'location', None)
+                if self.conf.baseurl is not None:
+                    location.newProp('xml:base', self.conf.baseurl)
+                
+                location.newProp('href', os.path.join(self.conf.finaldir, compressed_name))
+                checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
+                checksum.newProp('type', sumtype)
+                db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
+                unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
+                unchecksum.newProp('type', sumtype)
+                database_version = data.newChild(None, 'database_version', dbversion)
+                
+                
+            data = reporoot.newChild(None, 'data', None)
+            data.newProp('type', ftype)
+            location = data.newChild(None, 'location', None)
+            if self.conf.baseurl is not None:
+                location.newProp('xml:base', self.conf.baseurl)
+            location.newProp('href', os.path.join(self.conf.finaldir, file))
+            checksum = data.newChild(None, 'checksum', csum)
+            checksum.newProp('type', sumtype)
+            timestamp = data.newChild(None, 'timestamp', str(timestamp))
+            unchecksum = data.newChild(None, 'open-checksum', uncsum)
+            unchecksum.newProp('type', sumtype)
+        
+        # if we've got a group file then checksum it once and be done
+        if self.conf.groupfile is not None:
+            grpfile = self.conf.groupfile
+            timestamp = os.stat(grpfile)[8]
+            sfile = os.path.basename(grpfile)
+            fo = open(grpfile, 'r')
+            output = open(os.path.join(self.conf.outputdir, self.conf.tempdir, sfile), 'w')
+            output.write(fo.read())
+            output.close()
+            fo.seek(0)
+            csum = misc.checksum(sumtype, fo)
+            fo.close()
+
+            data = reporoot.newChild(None, 'data', None)
+            data.newProp('type', 'group')
+            location = data.newChild(None, 'location', None)
+            if self.conf.baseurl is not None:
+                location.newProp('xml:base', self.conf.baseurl)
+            location.newProp('href', os.path.join(self.conf.finaldir, sfile))
+            checksum = data.newChild(None, 'checksum', csum)
+            checksum.newProp('type', sumtype)
+            timestamp = data.newChild(None, 'timestamp', str(timestamp))
 
+        # save it down
         try:
             repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1)
         except:
@@ -294,8 +419,8 @@ class MetaDataGenerator:
 
 class SplitMetaDataGenerator(MetaDataGenerator):
 
-    def __init__(self, cmds):
-        MetaDataGenerator.__init__(self, cmds)
+    def __init__(self, config_obj=None):
+        MetaDataGenerator.__init__(self, config_obj=conf)
 
     def _getFragmentUrl(self, url, fragment):
         import urlparse
@@ -332,135 +457,22 @@ class SplitMetaDataGenerator(MetaDataGenerator):
             return
         filematrix = {}
         for mydir in directories:
-            filematrix[mydir] = self.getFileList(self.cmds['basedir'], mydir, '.rpm')
+            filematrix[mydir] = self.getFileList(self.conf.basedir, mydir, '.rpm')
             self.trimRpms(filematrix[mydir])
             self.pkgcount += len(filematrix[mydir])
 
         mediano = 1
         current = 0
-        self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
+        self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
         self.openMetadataDocs()
-        original_basedir = self.cmds['basedir']
+        original_basedir = self.conf.basedir
         for mydir in directories:
-            self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
+            self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
             current = self.writeMetadataDocs(filematrix[mydir], mydir, current)
             mediano += 1
-        self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1)
+        self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, 1)
         self.closeMetadataDocs()
 
 
 
-def repoXML(node, cmds):
-    """generate the repomd.xml file that stores the info on the other files"""
-    sumtype = cmds['sumtype']
-    workfiles = [(cmds['otherfile'], 'other',),
-                 (cmds['filelistsfile'], 'filelists'),
-                 (cmds['primaryfile'], 'primary')]
-    repoid='garbageid'
-    
-    repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
-    
-    if cmds['database']:
-        try:
-            dbversion = str(sqlitecachec.DBVERSION)
-        except AttributeError:
-            dbversion = '9'
-        rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
-
-    for (file, ftype) in workfiles:
-        complete_path = os.path.join(repopath, file)
-        
-        zfo = _gzipOpen(complete_path)
-        uncsum = misc.checksum(sumtype, zfo)
-        zfo.close()
-        csum = misc.checksum(sumtype, complete_path)
-        timestamp = os.stat(complete_path)[8]
-        
-        db_csums = {}
-        db_compressed_sums = {}
-        
-        if cmds['database']:
-            if ftype == 'primary':
-                rp.getPrimary(complete_path, csum)
-                            
-            elif ftype == 'filelists':
-                rp.getFilelists(complete_path, csum)
-                
-            elif ftype == 'other':
-                rp.getOtherdata(complete_path, csum)
-            
-
-            tmp_result_name = '%s.xml.gz.sqlite' % ftype
-            tmp_result_path = os.path.join(repopath, tmp_result_name)
-            good_name = '%s.sqlite' % ftype
-            resultpath = os.path.join(repopath, good_name)
-            
-            # rename from silly name to not silly name
-            os.rename(tmp_result_path, resultpath)
-            compressed_name = '%s.bz2' % good_name
-            result_compressed = os.path.join(repopath, compressed_name)
-            db_csums[ftype] = misc.checksum(sumtype, resultpath)
-            
-            # compress the files
-            bzipFile(resultpath, result_compressed)
-            # csum the compressed file
-            db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed)
-            # remove the uncompressed file
-            os.unlink(resultpath)
-
-            # timestamp the compressed file
-            db_timestamp = os.stat(result_compressed)[8]
-            
-            # add this data as a section to the repomdxml
-            db_data_type = '%s_db' % ftype
-            data = node.newChild(None, 'data', None)
-            data.newProp('type', db_data_type)
-            location = data.newChild(None, 'location', None)
-            if cmds['baseurl'] is not None:
-                location.newProp('xml:base', cmds['baseurl'])
-            
-            location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
-            checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
-            checksum.newProp('type', sumtype)
-            db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
-            unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
-            unchecksum.newProp('type', sumtype)
-            database_version = data.newChild(None, 'database_version', dbversion)
-            
-            
-        data = node.newChild(None, 'data', None)
-        data.newProp('type', ftype)
-        location = data.newChild(None, 'location', None)
-        if cmds['baseurl'] is not None:
-            location.newProp('xml:base', cmds['baseurl'])
-        location.newProp('href', os.path.join(cmds['finaldir'], file))
-        checksum = data.newChild(None, 'checksum', csum)
-        checksum.newProp('type', sumtype)
-        timestamp = data.newChild(None, 'timestamp', str(timestamp))
-        unchecksum = data.newChild(None, 'open-checksum', uncsum)
-        unchecksum.newProp('type', sumtype)
-    
-    # if we've got a group file then checksum it once and be done
-    if cmds['groupfile'] is not None:
-        grpfile = cmds['groupfile']
-        timestamp = os.stat(grpfile)[8]
-        sfile = os.path.basename(grpfile)
-        fo = open(grpfile, 'r')
-        output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
-        output.write(fo.read())
-        output.close()
-        fo.seek(0)
-        csum = misc.checksum(sumtype, fo)
-        fo.close()
-
-        data = node.newChild(None, 'data', None)
-        data.newProp('type', 'group')
-        location = data.newChild(None, 'location', None)
-        if cmds['baseurl'] is not None:
-            location.newProp('xml:base', cmds['baseurl'])
-        location.newProp('href', os.path.join(cmds['finaldir'], sfile))
-        checksum = data.newChild(None, 'checksum', csum)
-        checksum.newProp('type', sumtype)
-        timestamp = data.newChild(None, 'timestamp', str(timestamp))
-
 
diff --git a/createrepo/utils.py b/createrepo/utils.py
index bb3939c..51e56cd 100644
--- a/createrepo/utils.py
+++ b/createrepo/utils.py
@@ -98,4 +98,27 @@ def utf8String(string):
             newstring = newstring + char
     return newstring
 
+def checkAndMakeDir(dir):
+    """
+     check out the dir and make it, if possible, return 1 if done, else return 0
+    """
+    if os.path.exists(dir):
+        if not os.path.isdir(dir):
+            errorprint(_('%s is not a dir') % dir)
+            result = False
+        else:
+            if not os.access(dir, os.W_OK):
+                errorprint(_('%s is not writable') % dir)
+                result = False
+            else:
+                result = True
+    else:
+        try:
+            os.mkdir(dir)
+        except OSError, e:
+            errorprint(_('Error creating dir %s: %s') % (dir, e))
+            result = False
+        else:
+            result = True
+    return result
 
diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
index ea2b9aa..ec6db90 100644
--- a/createrepo/yumbased.py
+++ b/createrepo/yumbased.py
@@ -14,6 +14,7 @@ from yum import misc
 from rpmUtils.transaction import initReadOnlyTransaction
 from rpmUtils.miscutils import flagToString, stringToVersion
 
+#FIXME - merge into class with config stuff
 fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
 file_re = []
 for glob in fileglobs:
@@ -96,7 +97,19 @@ class CreateRepoPackage(YumLocalPackage):
     hdrend = property(fget=lambda self: self._get_header_byte_range()[1])
     hdrstart = property(fget=lambda self: self._get_header_byte_range()[0])
     
-    def _dump_base_items(self):
+    def _dump_base_items(self, basedir, baseurl=None):
+        """Takes an optional baseurl and required basedir.
+           basedir is the relative path to remove from the location
+           baseurl is whether or not this package already has a
+           baseurl rather than just '.'"""
+        
+        # if we start seeing fullpaths in the location tag - this is the culprit
+        if self.localpath.startswith(basedir):
+            relpath = self.localpath.replace(basedir, '')
+            if relpath[0] == '/': relpath = relpath[1:]
+        else:
+            relpath = self.localpath
+                    
         msg = """
   <name>%s</name>
   <arch>%s</arch>
@@ -108,12 +121,16 @@ class CreateRepoPackage(YumLocalPackage):
   <url>%s</url>
   <time file="%s" build="%s"/>
   <size package="%s" installed="%s" archive="%s"/>
-  <location href="%s"/>
+
   """ % (self.name, self.arch, self.epoch, self.ver, self.rel, self.checksum, 
          self._xml(self.summary), self._xml(self.description), 
          self._xml(self.packager), self._xml(self.url), self.filetime,
-         self.buildtime, self.packagesize, self.size, self.archivesize, 
-         self.localpath )
+         self.buildtime, self.packagesize, self.size, self.archivesize)
+        if baseurl:
+            msg += """<location xml:base="%s" href="%s"/>\n""" % (self._xml(baseurl), relpath)
+        else:
+            msg += """<location href="%s"/>\n""" % relpath
+            
         return msg
 
     def _dump_format_items(self):
@@ -262,9 +279,9 @@ class CreateRepoPackage(YumLocalPackage):
                          (self._xml(author), ts, self._xml(content))
         return msg                                                 
 
-    def do_primary_xml_dump(self):
+    def do_primary_xml_dump(self, basedir, baseurl=None):
         msg = """\n<package type="rpm">"""
-        msg += self._dump_base_items()
+        msg += self._dump_base_items(basedir, baseurl)
         msg += self._dump_format_items()
         msg += """\n</package>\n"""
         return msg
@@ -285,28 +302,6 @@ class CreateRepoPackage(YumLocalPackage):
         msg += "\n</package>\n"
         return msg
        
-class CreateRepoConfig(object):
-    def __init__(self):
-        self.quiet = False
-        self.verbose = False
-        self.excludes = []
-        self.baseurl = None
-        self.groupfile = None
-        self.sumtype = 'sha'
-        self.noepoch = False #???
-        self.pretty = False
-        self.cachedir = None
-        self.basedir = os.getcwd()
-        self.use_cache = False
-        self.checkts = False
-        self.split = False        
-        self.update = False
-        self.make_database = False
-        self.outputdir = None
-        self.file_pattern_match = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
-        self.dir_pattern_match = ['.*bin\/.*', '^\/etc\/.*']
-        self.skip_symlinks = False
-        self.pkglist = []
         
            
 class YumCreateRepo(object):
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index f5e8462..6e43aa6 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -20,7 +20,7 @@
 
 import os
 import sys
-import getopt
+from optparse import OptionParser
 import shutil
 
 
@@ -30,319 +30,207 @@ from createrepo import MDError
 import createrepo.yumbased
 import createrepo.utils
 
-from createrepo.utils import _gzipOpen, errorprint, _
+from createrepo.utils import _gzipOpen, errorprint, _, checkAndMakeDir
 
 __version__ = '0.9'
 
-# cli
-def usage(retval=1):
-    print _("""
-    createrepo [options] directory-of-packages
-
-    Options:
-     -u, --baseurl <url> = optional base url location for all files
-     -o, --outputdir <dir> = optional directory to output to
-     -x, --exclude = files globs to exclude, can be specified multiple times
-     -q, --quiet = run quietly
-     -n, --noepoch = don't add zero epochs for non-existent epochs
-                    (incompatible with yum and smart but required for
-                     systems with rpm < 4.2.1)
-     -g, --groupfile <filename> to point to for group information (precreated)
-                    (<filename> relative to directory-of-packages)
-     -v, --verbose = run verbosely
-     -c, --cachedir <dir> = specify which dir to use for the checksum cache
-     -C, --checkts = don't generate repo metadata, if their ctimes are newer
-                     than the rpm ctimes.
-     -i, --pkglist = use only these files from the directory specified
-     -h, --help = show this help
-     -V, --version = output version
-     -p, --pretty = output xml files in pretty format.
-     --update = update existing metadata (if present)
-     -d, --database = generate the sqlite databases.
-    """)
-
-    sys.exit(retval)
-
-# module
-
-def checkAndMakeDir(dir):
-    """
-     check out the dir and make it, if possible, return 1 if done, else return 0
+def parseArgs(args, conf):
     """
-    if os.path.exists(dir):
-        if not os.path.isdir(dir):
-            errorprint(_('%s is not a dir') % dir)
-            result = False
-        else:
-            if not os.access(dir, os.W_OK):
-                errorprint(_('%s is not writable') % dir)
-                result = False
-            else:
-                result = True
-    else:
-        try:
-            os.mkdir(dir)
-        except OSError, e:
-            errorprint(_('Error creating dir %s: %s') % (dir, e))
-            result = False
-        else:
-            result = True
-    return result
-
-def parseArgs(args):
-    """
-       Parse the command line args return a commands dict and directory.
+       Parse the command line args. return a config object.
        Sanity check all the things being passed in.
     """
-    cmds = {}
-    cmds['quiet'] = 0
-    cmds['verbose'] = 0
-    cmds['excludes'] = []
-    cmds['baseurl'] = None
-    cmds['groupfile'] = None
-    cmds['sumtype'] = 'sha'
-    cmds['noepoch'] = False
-    cmds['pretty'] = 0
-#    cmds['updategroupsonly'] = 0
-    cmds['cachedir'] = None
-    cmds['basedir'] = os.getcwd()
-    cmds['cache'] = False
-    cmds['checkts'] = False
-    cmds['mdtimestamp'] = 0
-    cmds['split'] = False
-    cmds['update'] = False
-    cmds['outputdir'] = ""
-    cmds['database'] = False
-    cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
-    cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*']
-    cmds['skip-symlinks'] = False
-    cmds['pkglist'] = []
-
-    try:
-        gopts, argsleft = getopt.getopt(args, 'phqVvndg:s:x:u:c:o:CSi:', ['help', 'exclude=',
-                                                                  'quiet', 'verbose', 'cachedir=', 'basedir=',
-                                                                  'baseurl=', 'groupfile=', 'checksum=',
-                                                                  'version', 'pretty', 'split', 'outputdir=',
-                                                                  'noepoch', 'checkts', 'database', 'update',
-                                                                  'skip-symlinks', 'pkglist='])
-    except getopt.error, e:
-        errorprint(_('Options Error: %s.') % e)
-        usage()
-
-    try:
-        for arg,a in gopts:
-            if arg in ['-h','--help']:
-                usage(retval=0)
-            elif arg in ['-V', '--version']:
-                print '%s' % __version__
-                sys.exit(0)
-            elif arg == '--split':
-                cmds['split'] = True
-    except ValueError, e:
-        errorprint(_('Options Error: %s') % e)
-        usage()
-
-
-    # make sure our dir makes sense before we continue
-    if len(argsleft) > 1 and not cmds['split']:
+    
+    parser = OptionParser(version = "createrepo %s" % __version__)
+    # query options
+    parser.add_option("-q", "--quiet", default=False, action="store_true",
+                      help="output nothing except for serious errors")
+    parser.add_option("-v", "--verbose", default=False, action="store_true",
+                      help="output more debugging info.")
+    parser.add_option("-x", "--excludes", default=[], action="append",
+                      help="files to exclude")
+    parser.add_option("-u", "--baseurl", default=None)
+    parser.add_option("-g", "--groupfile", default=None)
+    parser.add_option("-s", "--checksum", default="sha", dest='sumtype')
+    parser.add_option("-n", "--noepoch", default=False, action="store_true")
+    parser.add_option("-p", "--pretty", default=False, action="store_true")
+    parser.add_option("-c", "--cachedir", default=None)
+    parser.add_option("--basedir", default=os.getcwd())
+    parser.add_option("-C", "--checkts", default=False, action="store_true")
+    parser.add_option("-d", "--database", default=False, action="store_true")
+    parser.add_option("--update", default=False, action="store_true")
+    parser.add_option("--split", default=False, action="store_true")
+    parser.add_option("-i", "--pkglist", default=False, action="store_true")
+    parser.add_option("-o", "--outputdir", default="")
+    parser.add_option("-S", "--skip-symlinks", dest="skip_symlinks",
+                      default=False, action="store_true")
+
+    (opts, argsleft) = parser.parse_args()
+    if len(argsleft) > 1 and not opts.split:
         errorprint(_('Error: Only one directory allowed per run.'))
-        usage()
+        parser.print_usage()
+        sys.exit(1)
+        
     elif len(argsleft) == 0:
         errorprint(_('Error: Must specify a directory to index.'))
-        usage()
+        parser.print_usage()
+        sys.exit(1)
+        
     else:
         directories = argsleft
-
-    try:
-        for arg,a in gopts:
-            if arg in ['-v', '--verbose']:
-                cmds['verbose'] = 1
-            elif arg in ["-q", '--quiet']:
-                cmds['quiet'] = 1
-            elif arg in ['-u', '--baseurl']:
-                if cmds['baseurl'] is not None:
-                    errorprint(_('Error: Only one baseurl allowed.'))
-                    usage()
-                else:
-                    cmds['baseurl'] = a
-            elif arg in ['-g', '--groupfile']:
-                if cmds['groupfile'] is not None:
-                    errorprint(_('Error: Only one groupfile allowed.'))
-                    usage()
-                else:
-                    cmds['groupfile'] = a
-            elif arg in ['-x', '--exclude']:
-                cmds['excludes'].append(a)
-            elif arg in ['-p', '--pretty']:
-                cmds['pretty'] = 1
-#            elif arg in ['--update-groups-only']:
-#                cmds['updategroupsonly'] = 1
-            elif arg in ['-s', '--checksum']:
-                errorprint(_('This option is deprecated'))
-            elif arg in ['-c', '--cachedir']:
-                cmds['cache'] = True
-                cmds['cachedir'] = a
-            elif arg == '--update':
-                cmds['update'] = True
-            elif arg in ['-C', '--checkts']:
-                cmds['checkts'] = True
-            elif arg == '--basedir':
-                cmds['basedir'] = a
-            elif arg in ['-o','--outputdir']:
-                cmds['outputdir'] = a
-            elif arg in ['-n', '--noepoch']:
-                cmds['noepoch'] = True
-            elif arg in ['-d', '--database']:
-                cmds['database'] = True
-            elif arg in ['-S', '--skip-symlinks']:
-                cmds['skip-symlinks'] = True
-            elif arg in ['-i', '--pkglist']:
-                cmds['pkglist'] = a
-                                
-    except ValueError, e:
-        errorprint(_('Options Error: %s') % e)
-        usage()
-
-    if cmds['split'] and cmds['checkts']:
+    
+    if opts.split and opts.checkts:
         errorprint(_('--split and --checkts options are mutually exclusive'))
         sys.exit(1)
 
-    directory = directories[0]
 
+    # let's switch over to using the conf object - put all the opts options into it
+    for opt in parser.option_list:
+        if opt.dest is None: # this is fairly silly
+            continue
+        setattr(conf, opt.dest, getattr(opts, opt.dest))
+    
+    directory = directories[0]
+    print directory
     directory = os.path.normpath(directory)
-    if cmds['split']:
+    print directory
+    if conf.split:
         pass
     elif os.path.isabs(directory):
-        cmds['basedir'] = os.path.dirname(directory)
+        conf.basedir = os.path.dirname(directory)
         directory = os.path.basename(directory)
     else:
-        cmds['basedir'] = os.path.realpath(cmds['basedir'])
-    if not cmds['outputdir']:
-        cmds['outputdir'] = os.path.join(cmds['basedir'], directory)
-    if cmds['groupfile']:
-        a = cmds['groupfile']
-        if cmds['split']:
-            a = os.path.join(cmds['basedir'], directory, cmds['groupfile'])
+        conf.basedir = os.path.realpath(conf.basedir)
+
+    print directory
+    print conf.basedir    
+    
+    if not opts.outputdir:
+        conf.outputdir = os.path.join(conf.basedir, directory)
+    if conf.groupfile:
+        a = conf.groupfile
+        if conf.split:
+            a = os.path.join(conf.basedir, directory, conf.groupfile)
         elif not os.path.isabs(a):
-            a = os.path.join(cmds['basedir'], directory, cmds['groupfile'])
+            a = os.path.join(conf.basedir, directory, conf.groupfile)
         if not os.path.exists(a):
             errorprint(_('Error: groupfile %s cannot be found.' % a))
             usage()
-        cmds['groupfile'] = a
-    if cmds['cachedir']:
-        a = cmds ['cachedir']
+        conf.groupfile = a
+    if conf.cachedir:
+        conf.cache = True
+        a = conf.cachedir
         if not os.path.isabs(a):
-            a = os.path.join(cmds['outputdir'] ,a)
+            a = os.path.join(conf.outputdir ,a)
         if not checkAndMakeDir(a):
             errorprint(_('Error: cannot open/write to cache dir %s' % a))
-            usage()
-        cmds['cachedir'] = a
+            parser.print_usage()
+        conf.cachedir = a
 
-    if cmds['pkglist']:
+    if conf.pkglist:
         lst = []
-        pfo = open(cmds['pkglist'], 'r')
+        pfo = open(conf.pkglist, 'r')
         for line in pfo.readlines():
             line = line.replace('\n', '')
             lst.append(line)
         pfo.close()
             
-        cmds['pkglist'] = lst
+        conf.pkglist = lst
         
     #setup some defaults
-    cmds['primaryfile'] = 'primary.xml.gz'
-    cmds['filelistsfile'] = 'filelists.xml.gz'
-    cmds['otherfile'] = 'other.xml.gz'
-    cmds['repomdfile'] = 'repomd.xml'
-    cmds['tempdir'] = '.repodata'
-    cmds['finaldir'] = 'repodata'
-    cmds['olddir'] = '.olddata'
 
     # Fixup first directory
     directories[0] = directory
-    return cmds, directories
+    conf.directory = directory
+    conf.directories = directories
+
+    return conf
 
 def main(args):
-    cmds, directories = parseArgs(args)
-    directory = directories[0]
-    testdir = os.path.realpath(os.path.join(cmds['basedir'], directory))
+    conf = createrepo.MetaDataConfig()
+    conf = parseArgs(args, conf)
+        
+    testdir = os.path.realpath(os.path.join(conf.basedir, conf.directory))
     # start the sanity/stupidity checks
     if not os.path.exists(testdir):
-        errorprint(_('Directory %s must exist') % (directory,))
+        errorprint(_('Directory %s must exist') % (conf.directory,))
         sys.exit(1)
 
     if not os.path.isdir(testdir):
         errorprint(_('%s - must be a directory') 
-                   % (directory,))
+                   % (conf.directory,))
         sys.exit(1)
 
-    if not os.access(cmds['outputdir'], os.W_OK):
-        errorprint(_('Directory %s must be writable.') % (cmds['outputdir'],))
+    if not os.access(conf.outputdir, os.W_OK):
+        errorprint(_('Directory %s must be writable.') % (conf.outputdir,))
         sys.exit(1)
 
-    if cmds['split']:
-        oldbase = cmds['basedir']
-        cmds['basedir'] = os.path.join(cmds['basedir'], directory)
-    if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])):
+    if conf.split:
+        oldbase = conf.basedir
+        conf.basedir = os.path.join(conf.basedir, conf.directory)
+    if not checkAndMakeDir(os.path.join(conf.outputdir, conf.tempdir)):
         sys.exit(1)
 
-    if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])):
+    if not checkAndMakeDir(os.path.join(conf.outputdir, conf.finaldir)):
         sys.exit(1)
 
-    if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])):
-        errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir'])
+    if os.path.exists(os.path.join(conf.outputdir, conf.olddir)):
+        errorprint(_('Old data directory exists, please remove: %s') % conf.olddir)
         sys.exit(1)
 
     # make sure we can write to where we want to write to:
     for direc in ['tempdir', 'finaldir']:
         for f in ['primaryfile', 'filelistsfile', 'otherfile', 'repomdfile']:
-            filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[f])
+            filepath = os.path.join(conf.outputdir, direc, f)
             if os.path.exists(filepath):
                 if not os.access(filepath, os.W_OK):
                     errorprint(_('error in must be able to write to metadata files:\n  -> %s') % filepath)
                     usage()
-                if cmds['checkts']:
-                    ts = os.path.getctime(filepath)
-                    if ts > cmds['mdtimestamp']:
-                        cmds['mdtimestamp'] = ts
+                if conf.checkts:
+                    timestamp = os.path.getctime(filepath)
+                    if timestamp > conf.mdtimestamp:
+                        conf.mdtimestamp = timestamp
         
-    if cmds['split']:
-        cmds['basedir'] = oldbase
-        mdgen = createrepo.SplitMetaDataGenerator(cmds)
+    if conf.split:
+        conf.basedir = oldbase
+        mdgen = createrepo.SplitMetaDataGenerator(config_obj=conf)
         mdgen.doPkgMetadata(directories)
     else:
-        mdgen = createrepo.MetaDataGenerator(cmds)
-        if cmds['checkts'] and mdgen.checkTimeStamps(directory):
-            if cmds['verbose']:
+        mdgen = createrepo.MetaDataGenerator(config_obj=conf)
+        if mdgen.checkTimeStamps():
+            if mdgen.conf.verbose:
                 print _('repo is up to date')
             sys.exit(0)
-        mdgen.doPkgMetadata(directory)
+        mdgen.doPkgMetadata()
     mdgen.doRepoMetadata()
 
-    if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])):
+    output_final_dir = os.path.join(mdgen.conf.outputdir, mdgen.conf.finaldir) 
+    output_old_dir = os.path.join(mdgen.conf.outputdir, mdgen.conf.olddir)
+    
+    if os.path.exists(output_final_dir):
         try:
-            os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']),
-                      os.path.join(cmds['outputdir'], cmds['olddir']))
+            os.rename(output_final_dir, output_old_dir)
         except:
-            errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']),
-                                                                  os.path.join(cmds['outputdir'], cmds['olddir']))))
+            errorprint(_('Error moving final %s to old dir %s' % (output_final_dir,
+                                                                 output_old_dir)))
             sys.exit(1)
 
+    output_temp_dir =os.path.join(mdgen.conf.outputdir, mdgen.conf.tempdir)
+
     try:
-        os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']),
-                  os.path.join(cmds['outputdir'], cmds['finaldir']))
+        os.rename(output_temp_dir, output_final_dir)
     except:
         errorprint(_('Error moving final metadata into place'))
         # put the old stuff back
-        os.rename(os.path.join(cmds['outputdir'], cmds['olddir']),
-                  os.path.join(cmds['outputdir'], cmds['finaldir']))
+        os.rename(output_old_dir, output_final_dir)
         sys.exit(1)
 
     for f in ['primaryfile', 'filelistsfile', 'otherfile', 'repomdfile', 'groupfile']:
-        if cmds[f]:
-            fn = os.path.basename(cmds[f])
+        if getattr(mdgen.conf, f):
+            fn = os.path.basename(getattr(mdgen.conf, f))
         else:
             continue
-        oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn)
+        oldfile = os.path.join(output_old_dir, fn)
+
         if os.path.exists(oldfile):
             try:
                 os.remove(oldfile)
@@ -352,11 +240,9 @@ def main(args):
                 sys.exit(1)
 
     # Move everything else back from olddir (eg. repoview files)
-    olddir = os.path.join(cmds['outputdir'], cmds['olddir'])
-    finaldir = os.path.join(cmds['outputdir'], cmds['finaldir'])
-    for f in os.listdir(olddir):
-        oldfile = os.path.join(olddir, f)
-        finalfile = os.path.join(finaldir, f)
+    for f in os.listdir(output_old_dir):
+        oldfile = os.path.join(output_old_dir, f)
+        finalfile = os.path.join(output_final_dir, f)
         if os.path.exists(finalfile):
             # Hmph?  Just leave it alone, then.
             try:
@@ -378,9 +264,9 @@ def main(args):
 
 #XXX: fix to remove tree as we mung basedir
     try:
-        os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir']))
+        os.rmdir(output_old_dir)
     except OSError, e:
-        errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir'])
+        errorprint(_('Could not remove old metadata dir: %s') % mdgen.conf.olddir)
         errorprint(_('Error was %s') % e)
         errorprint(_('Please clean up this directory manually.'))
 



More information about the Rpm-metadata mailing list