[Rpm-metadata] 2 commits - createrepo/__init__.py createrepo/utils.py genpkgmetadata.py mergerepo.py modifyrepo.py

skvidal at osuosl.org skvidal at osuosl.org
Fri Sep 16 05:37:42 UTC 2011


 createrepo/__init__.py |   83 +++++++++++++++++++++++++------------------------
 createrepo/utils.py    |   43 +++++++++++++++++++++++++
 genpkgmetadata.py      |    8 ++++
 mergerepo.py           |    8 ++--
 modifyrepo.py          |   29 +++++++++++++----
 5 files changed, 122 insertions(+), 49 deletions(-)

New commits:
commit 6715fb5a87d2255660daa42748081582ae59a330
Merge: dafea8c 8c35f58
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Fri Sep 16 01:37:33 2011 -0400

    Merge branch 'master' of ssh://createrepo.baseurl.org/srv/projects/createrepo/git/createrepo
    
    * 'master' of ssh://createrepo.baseurl.org/srv/projects/createrepo/git/createrepo:
      Add --xz to createrepo and mergerepo bash completions.

commit dafea8c15d5feaaf50a9df4898fd34eed1b09916
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Sep 15 17:55:50 2011 -0400

    add --compress-type everywhere and make it all behave.
    
    can't default to xz yet for all md b/c of y-m-p silliness.

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index 7d147a9..61694f9 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -34,7 +34,7 @@ from yum.packageSack import MetaSack
 from yum.packages import YumAvailablePackage
 
 import rpmUtils.transaction
-from utils import _, errorprint, MDError, lzma
+from utils import _, errorprint, MDError, lzma, _available_compression
 import readMetadata
 try:
     import sqlite3 as sqlite
@@ -46,7 +46,7 @@ try:
 except ImportError:
     pass
 
-from utils import _gzipOpen, bzipFile, xzFile, checkAndMakeDir, GzipFile, \
+from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
                   checksum_and_rename, split_list_into_equal_chunks
 import deltarpms
 
@@ -74,7 +74,7 @@ class MetaDataConfig(object):
         self.deltadir = None
         self.delta_relative = 'drpms/'
         self.oldpackage_paths = [] # where to look for the old packages -
-        self.deltafile = 'prestodelta.xml.gz'
+        self.deltafile = 'prestodelta.xml'
         self.num_deltas = 1 # number of older versions to delta (max)
         self.max_delta_rpm_size = 100000000
         self.update_md_path = None
@@ -86,9 +86,9 @@ class MetaDataConfig(object):
         self.skip_symlinks = False
         self.pkglist = []
         self.database_only = False
-        self.primaryfile = 'primary.xml.gz'
-        self.filelistsfile = 'filelists.xml.gz'
-        self.otherfile = 'other.xml.gz'
+        self.primaryfile = 'primary.xml'
+        self.filelistsfile = 'filelists.xml'
+        self.otherfile = 'other.xml'
         self.repomdfile = 'repomd.xml'
         self.tempdir = '.repodata'
         self.finaldir = 'repodata'
@@ -110,7 +110,8 @@ class MetaDataConfig(object):
         self.worker_cmd = '/usr/share/createrepo/worker.py'
         #self.worker_cmd = './worker.py' # helpful when testing
         self.retain_old_md = 0
-        self.xz = False # use xz for compression
+        self.compress_type = 'gz'
+
         
 class SimpleMDCallBack(object):
     def errorlog(self, thing):
@@ -146,8 +147,13 @@ class MetaDataGenerator:
         if not self.conf.directory and not self.conf.directories:
             raise MDError, "No directory given on which to run."
 
-        if self.conf.xz and not utils.lzma:
-            raise MDError, "XZ compression requested but lzma/xz module not available."
+        if not self.conf.compress_type:
+            self.conf.compress_type = 'gz'
+        
+        if self.conf.compress_type not in utils._available_compression:
+            raise MDError, "Compression %s not available: Please choose from: %s" \
+                 % (self.conf.compress_type, ', '.join(utils._available_compression))
+            
             
         if not self.conf.directories: # just makes things easier later
             self.conf.directories = [self.conf.directory]
@@ -414,9 +420,11 @@ class MetaDataGenerator:
 
     def _setupPrimary(self):
         # setup the primary metadata file
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed
+        fpz = self.conf.primaryfile + '.' + 'gz'
         primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
-                                       self.conf.primaryfile)
-        fo = _gzipOpen(primaryfilepath, 'w')
+                                       fpz)
+        fo = compressOpen(primaryfilepath, 'w', 'gz')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
             ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
@@ -425,9 +433,11 @@ class MetaDataGenerator:
 
     def _setupFilelists(self):
         # setup the filelist file
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
+        fpz = self.conf.filelistsfile + '.' + 'gz'
         filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
-                                    self.conf.filelistsfile)
-        fo = _gzipOpen(filelistpath, 'w')
+                                    fpz)
+        fo = compressOpen(filelistpath, 'w', 'gz')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
                  ' packages="%s">' % self.pkgcount)
@@ -435,9 +445,11 @@ class MetaDataGenerator:
 
     def _setupOther(self):
         # setup the other file
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
+        fpz = self.conf.otherfile + '.' + 'gz'
         otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
-                                     self.conf.otherfile)
-        fo = _gzipOpen(otherfilepath, 'w')
+                                     fpz)
+        fo = compressOpen(otherfilepath, 'w', 'gz')
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
                  ' packages="%s">' %
@@ -446,9 +458,10 @@ class MetaDataGenerator:
 
     def _setupDelta(self):
         # setup the other file
+        fpz = self.conf.deltafile + '.' + self.conf.compress_type        
         deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
-                                     self.conf.deltafile)
-        fo = _gzipOpen(deltafilepath, 'w')
+                                     fpz)
+        fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
         fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
         fo.write('<prestodelta>\n')
         return fo
@@ -612,7 +625,6 @@ class MetaDataGenerator:
                 
             for worker_num in range(self.conf.workers):
                 pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
-                print pkl
                 f = open(pkl, 'w') 
                 f.write('\n'.join(worker_chunks[worker_num]))
                 f.close()
@@ -828,7 +840,7 @@ class MetaDataGenerator:
         return ' '.join(results)
 
     def _createRepoDataObject(self, mdfile, mdtype, compress=True, 
-                              compress_type='gzip', attribs={}):
+                              compress_type=None, attribs={}):
         """return random metadata as RepoData object to be  added to RepoMD
            mdfile = complete path to file
            mdtype = the metadata type to use
@@ -838,19 +850,12 @@ class MetaDataGenerator:
         sfile = os.path.basename(mdfile)
         fo = open(mdfile, 'r')
         outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
+        if not compress_type:
+            compress_type = self.conf.compress_type
         if compress:
-            if compress_type == 'gzip':
-                sfile = '%s.gz' % sfile
-                outfn = os.path.join(outdir, sfile)
-                output = GzipFile(filename = outfn, mode='wb')
-            elif compress_type == 'bzip2':
-                sfile = '%s.bz2' % sfile
-                outfn = os.path.join(outdir, sfile)
-                output = BZ2File(filename = outfn, mode='wb')
-            elif compress_type == 'xz':
-                sfile = '%s.xz' % sfile
-                outfn = os.path.join(outdir, sfile)
-                output = utils.lzma.LZMAFile(outfn, mode='wb')
+            sfile = '%s.%s' % (sfile, compress_type)
+            outfn = os.path.join(outdir, sfile)
+            output = compressOpen(outfn, mode='wb', compress_type=compress_type)
                 
         else:
             outfn  = os.path.join(outdir, sfile)
@@ -924,9 +929,13 @@ class MetaDataGenerator:
             rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
 
         for (rpm_file, ftype) in workfiles:
+            # when we fix y-m-p and non-gzipped xml files - then we can make this just add
+            # self.conf.compress_type
+            if ftype in ('other', 'filelists', 'primary'):
+                rpm_file = rpm_file + '.' + 'gz'
             complete_path = os.path.join(repopath, rpm_file)
 
-            zfo = _gzipOpen(complete_path)
+            zfo = compressOpen(complete_path)
             # This is misc.checksum() done locally so we can get the size too.
             data = misc.Checksums([sumtype])
             while data.read(zfo, 2**16):
@@ -967,17 +976,13 @@ class MetaDataGenerator:
 
                     # rename from silly name to not silly name
                     os.rename(tmp_result_path, resultpath)
-                    ext = 'bz2'
-                    compress_func = bzipFile
-                    if self.conf.xz:
-                        ext = 'xz'
-                        compress_func = xzFile
+                    ext = self.conf.compress_type
                     compressed_name = '%s.%s' % (good_name, ext)
                     result_compressed = os.path.join(repopath, compressed_name)
                     db_csums[ftype] = misc.checksum(sumtype, resultpath)
 
                     # compress the files
-                    compress_func(resultpath, result_compressed)
+                    compressFile(resultpath, result_compressed, self.conf.compress_type)
                     # csum the compressed file
                     db_compressed_sums[ftype] = misc.checksum(sumtype,
                                                              result_compressed)
@@ -1051,7 +1056,7 @@ class MetaDataGenerator:
 
         if self.conf.additional_metadata:
             for md_type, md_file in self.conf.additional_metadata.items():
-                mdcontent = self._createRepoDataObject(md_file, md_type, compress_type='xz')
+                mdcontent = self._createRepoDataObject(md_file, md_type)
                 repomd.repoData[mdcontent.type] = mdcontent
                 
 
diff --git a/createrepo/utils.py b/createrepo/utils.py
index 655083d..c816640 100644
--- a/createrepo/utils.py
+++ b/createrepo/utils.py
@@ -91,6 +91,49 @@ def xzFile(source, dest):
     destination.close()
     s_fn.close()
 
+def gzFile(source, dest):
+        
+    s_fn = open(source, 'rb')
+    destination = GzipFile(dest, 'w')
+
+    while True:
+        data = s_fn.read(1024000)
+
+        if not data: break
+        destination.write(data)
+
+    destination.close()
+    s_fn.close()
+
+
+
+def compressFile(source, dest, compress_type):
+    """Compress an existing file using any compression type from source to dest"""
+    
+    if compress_type == 'xz':
+        xzFile(source, dest)
+    elif compress_type == 'bz2':
+        bzipFile(source, dest)
+    elif compress_type == 'gz':
+        gzFile(source, dest)
+    else:
+        raise MDError, "Unknown compression type %s" % compress_type
+    
+def compressOpen(fn, mode='rb', compress_type=None):
+    
+    if not compress_type:
+        # we are readonly and we don't give a compress_type - then guess based on the file extension
+        compress_type = fn.split('.')[-1]
+            
+    if compress_type == 'xz':
+        return lzma.LZMAFile(fn, mode)
+    elif compress_type == 'bz2':
+        return bz2.BZ2File(fn, mode)
+    elif compress_type == 'gz':
+        return _gzipOpen(fn, mode)
+    else:
+        raise MDError, "Unknown compression type %s" % compress_type
+    
 def returnFD(filename):
     try:
         fdno = os.open(filename, os.O_RDONLY)
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index 4ba445e..af0ecb4 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -127,6 +127,9 @@ def parse_args(args, conf):
     parser.add_option("--xz", default=False,
         action="store_true",
         help="use xz for repodata compression")
+    parser.add_option("--compress-type", default=None, dest="compress_type",
+        help="which compression type to use")
+        
     
     (opts, argsleft) = parser.parse_args(args)
     if len(argsleft) > 1 and not opts.split:
@@ -159,6 +162,11 @@ def parse_args(args, conf):
     
     if opts.nodatabase:
         opts.database = False
+    
+    # xz is just a shorthand for compress_type
+    if opts.xz and not opts.compress_type:
+        opts.compress_type='xz'
+        
         
     # let's switch over to using the conf object - put all the opts into it
     for opt in parser.option_list:
diff --git a/mergerepo.py b/mergerepo.py
index 882395a..80cb1a8 100755
--- a/mergerepo.py
+++ b/mergerepo.py
@@ -48,8 +48,8 @@ def parse_args(args):
                       help="Do not merge group(comps) metadata")
     parser.add_option("", "--noupdateinfo", default=False, action="store_true",
                       help="Do not merge updateinfo metadata")
-    parser.add_option("", "--xz", default=False, action="store_true",
-                      help="Use xz for repodata compression")
+    parser.add_option("--compress-type", default=None, dest="compress_type",
+                      help="which compression type to use")
                       
     (opts, argsleft) = parser.parse_args(args)
 
@@ -81,8 +81,8 @@ def main(args):
         rmbase.groups = False
     if opts.noupdateinfo:
         rmbase.updateinfo = False
-    if opts.xz:
-        rmbase.mdconf.xz = True
+    if opts.compress_type:
+        rmbase.mdconf.compress_type = opts.compress_type
     try:
         rmbase.merge_repos()
         rmbase.write_metadata()
diff --git a/modifyrepo.py b/modifyrepo.py
index c3370e8..153ad4d 100755
--- a/modifyrepo.py
+++ b/modifyrepo.py
@@ -29,7 +29,7 @@
 import os
 import sys
 from createrepo import __version__
-from createrepo.utils import checksum_and_rename, GzipFile, MDError
+from createrepo.utils import checksum_and_rename, compressOpen, MDError
 from yum.misc import checksum
 
 from yum.repoMDObject import RepoMD, RepoMDError, RepoData
@@ -44,6 +44,8 @@ class RepoMetadata:
         self.repodir = os.path.abspath(repo)
         self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
         self.checksum_type = 'sha256'
+        self.compress = False
+        self.compress_type='xz'
 
         if not os.path.exists(self.repomdxml):
             raise MDError, '%s not found' % self.repomdxml
@@ -97,8 +99,8 @@ class RepoMetadata:
             mdname = 'updateinfo.xml'
         elif isinstance(metadata, str):
             if os.path.exists(metadata):
-                if metadata.endswith('.gz'):
-                    oldmd = GzipFile(filename=metadata, mode='rb')
+                if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
+                    oldmd = compressOpen(metadata, mode='rb')
                 else:
                     oldmd = file(metadata, 'r')
                 md = oldmd.read()
@@ -109,13 +111,19 @@ class RepoMetadata:
         else:
             raise MDError, 'invalid metadata type'
 
+        do_compress = False
         ## Compress the metadata and move it into the repodata
-        if not mdname.endswith('.gz'):
-            mdname += '.gz'
+        if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'):
+            do_compress = True
+            mdname += '.' + self.compress_type
         mdtype = self._get_mdtype(mdname, mdtype)
 
         destmd = os.path.join(self.repodir, mdname)
-        newmd = GzipFile(filename=destmd, mode='wb')
+        if do_compress:
+            newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
+        else:
+            newmd = open(destmd, 'wb')
+            
         newmd.write(md)
         newmd.close()
         print "Wrote:", destmd
@@ -166,6 +174,10 @@ def main(args):
                       help="specific datatype of the metadata, will be derived from the filename if not specified")
     parser.add_option("--remove", action="store_true",
                       help="remove specified file from repodata")
+    parser.add_option("--compress", action="store_true", default=False,
+                      help="compress the new repodata before adding it to the repo")
+    parser.add_option("--compress-type", dest='compress_type', default='xz',
+                      help="compression format to use")
     parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"
     
     (opts, argsleft) = parser.parse_args(args)
@@ -180,6 +192,10 @@ def main(args):
         print "Could not access repository: %s" % str(e)
         return 1
 
+
+    repomd.compress = opts.compress
+    repomd.compress_type = opts.compress_type
+
     # remove
     if opts.remove:
         try:
@@ -195,6 +211,7 @@ def main(args):
     except MDError, e:
         print "Could not add metadata from file %s: %s" % (metadata, str(e))
         return 1
+    
 
 if __name__ == '__main__':
     ret = main(sys.argv[1:])


More information about the Rpm-metadata mailing list