[Rpm-metadata] 2 commits - createrepo/deltarpms.py createrepo/__init__.py genpkgmetadata.py

skvidal at osuosl.org skvidal at osuosl.org
Thu Jan 29 05:58:26 UTC 2009


 createrepo/__init__.py  |  130 ++++++++++++++++++++++++++++++++++++++++++--
 createrepo/deltarpms.py |  140 ++++++++++++++++++++++++++++++++++++++++++++++++
 genpkgmetadata.py       |    5 +
 3 files changed, 269 insertions(+), 6 deletions(-)

New commits:
commit 36f5e7664cc12e39fad050f7d5fba513ec27a7a1
Merge: 5a54792... e1a9ec9...
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Jan 29 00:58:00 2009 -0500

    Merge branch 'master' of ssh://createrepo.baseurl.org/srv/projects/createrepo/git/createrepo
    
    * 'master' of ssh://createrepo.baseurl.org/srv/projects/createrepo/git/createrepo:
      Add missing documentation on --checksum and --profile

commit 5a5479258a496e6a60cde1bafdd1fa8d3c709a4a
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Jan 29 00:55:57 2009 -0500

    --deltas, enable the creation and metadata-creation for presto/deltarpms

diff --git a/createrepo/__init__.py b/createrepo/__init__.py
index b3a91cc..2b3acac 100644
--- a/createrepo/__init__.py
+++ b/createrepo/__init__.py
@@ -25,7 +25,7 @@ from  bz2 import BZ2File
 from urlgrabber import grabber
 import tempfile
 
-from yum import misc, Errors
+from yum import misc, Errors, to_unicode
 from yum.sqlutils import executeSQL
 from yum.packageSack import MetaSack
 from yum.packages import YumAvailablePackage
@@ -44,6 +44,7 @@ except ImportError:
     pass
 
 from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, checksum_and_rename
+import deltarpms
 
 __version__ = '0.9.6'
 
@@ -64,7 +65,13 @@ class MetaDataConfig(object):
         self.checkts = False
         self.split = False        
         self.update = False
-        self.update_md_path = None
+        self.deltas = False # do the deltarpm thing
+        self.deltadir = None # where to put the .drpms - defaults to 'drpms' inside 'repodata'
+        self.delta_relative = 'drpms/'
+        self.oldpackage_paths = [] # where to look for the old packages - 
+        self.deltafile = 'prestodelta.xml.gz'
+        self.num_deltas = 1 # number of older versions to delta (max)
+        self.update_md_path = None 
         self.skip_stat = False
         self.database = False
         self.outputdir = None
@@ -151,7 +158,6 @@ class MetaDataGenerator:
         if not self.conf.outputdir:
             self.conf.outputdir = os.path.join(self.conf.basedir, self.conf.relative_dir)
 
-
     def _test_setup_dirs(self):
         # start the sanity/stupidity checks
         for mydir in self.conf.directories:
@@ -180,13 +186,23 @@ class MetaDataGenerator:
         if not checkAndMakeDir(temp_final):
             raise MDError, _('Cannot create/verify %s') % temp_final
 
+        if self.conf.deltas:
+            temp_delta = os.path.join(self.conf.outputdir, self.conf.delta_relative)
+        if not checkAndMakeDir(temp_delta):
+            raise MDError, _('Cannot create/verify %s') % temp_delta
+        self.conf.deltadir = temp_delta
+
         if os.path.exists(os.path.join(self.conf.outputdir, self.conf.olddir)):
             raise MDError, _('Old data directory exists, please remove: %s') % self.conf.olddir
 
         # make sure we can write to where we want to write to:
         # and pickup the mdtimestamps while we're at it
-        for direc in ['tempdir', 'finaldir']:
-            filepath = os.path.join(self.conf.outputdir, direc)
+        direcs = ['tempdir' , 'finaldir']
+        if self.conf.deltas:
+            direcs.append('deltadir')
+
+        for direc in direcs:
+            filepath = os.path.join(self.conf.outputdir, getattr(self.conf, direc))
             if os.path.exists(filepath):
                 if not os.access(filepath, os.W_OK):
                     raise MDError, _('error in must be able to write to metadata dir:\n  -> %s') % filepath
@@ -348,6 +364,8 @@ class MetaDataGenerator:
             self.primaryfile = self._setupPrimary()
             self.flfile = self._setupFilelists()
             self.otherfile = self._setupOther()
+        if self.conf.deltas:
+            self.deltafile = self._setupDelta()
 
     def _setupPrimary(self):
         # setup the primary metadata file
@@ -375,6 +393,14 @@ class MetaDataGenerator:
         fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">' %
                        self.pkgcount)
         return fo
+
+    def _setupDelta(self):
+        # setup the other file
+        deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.deltafile)
+        fo = _gzipOpen(deltafilepath, 'w')
+        fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        fo.write('<prestodelta>\n')
+        return fo
         
 
     def read_in_package(self, rpmfile, pkgpath=None, reldir=None):
@@ -452,7 +478,8 @@ class MetaDataGenerator:
                 nodes = self.oldData.getNodes(old_pkg)
                 if nodes is not None:
                     recycled = True
-
+                
+                # FIXME also open up the delta file
             
             # otherwise do it individually
             if not recycled:
@@ -470,6 +497,11 @@ class MetaDataGenerator:
                         # need to say something here
                         self.callback.errorlog("\nError %s: %s\n" % (pkg, e))
                         continue
+                    # we can use deltas:
+                    presto_md = self._do_delta_rpm_package(po)
+                    if presto_md:
+                        self.deltafile.write(presto_md)
+
                 else:
                     po = pkg
 
@@ -500,6 +532,9 @@ class MetaDataGenerator:
                     outfile.write('\n')
 
                 self.oldData.freeNodes(pkg)
+                #FIXME - if we're in update and we have deltas enabled
+                #        check the presto data for this pkg and write its info back out
+                #       to our deltafile
 
             if not self.conf.quiet:
                 if self.conf.verbose:
@@ -540,6 +575,87 @@ class MetaDataGenerator:
             self.otherfile.write('\n</otherdata>')
             self.otherfile.close()
 
+        if not self.conf.quiet:
+            self.callback.log(_('Saving delta metadata'))
+        self.deltafile.write('\n</prestodelta>')
+        self.deltafile.close()
+
+    def _do_delta_rpm_package(self, pkg):
+        """makes the drpms, if possible, for this package object.
+           returns the presto/delta xml metadata as a string
+        """
+
+        results = u""
+        thisdeltastart = u"""  <newpackage name="%s" epoch="%s" version="%s" release="%s" arch="%s">\n""" % (pkg.name,
+                                     pkg.epoch, pkg.ver, pkg.release, pkg.arch)
+        thisdeltaend = u"""  </newpackage>\n"""
+
+        # generate a list of all the potential 'old rpms'
+        opl = self._get_old_package_list()
+        # get list of potential candidates which are likely to match
+        pot_cand = []
+        for fn in opl:
+            if os.path.basename(fn).startswith(pkg.name):
+                pot_cand.append(fn)
+        
+        candidates = []
+        for fn in pot_cand:
+            try:
+                thispo = yumbased.CreateRepoPackage(self.ts, fn)
+            except Errors.MiscError, e:
+                continue
+            if (thispo.name, thispo.arch) != (pkg.name, pkg.arch):
+                # not the same, doesn't matter
+                continue
+            if thispo == pkg: #exactly the same, doesn't matter
+                continue
+            if thispo.EVR >= pkg.EVR: # greater or equal, doesn't matter
+                continue
+            candidates.append(thispo)
+            candidates.sort()
+            candidates.reverse()
+
+        drpm_results = u""
+        for delta_p in candidates[0:self.conf.num_deltas]:
+            #make drpm of pkg and delta_p
+            drpmfn = deltarpms.create_drpm(delta_p, pkg, self.conf.deltadir)
+
+            if drpmfn:
+                # TODO more sanity check the drpm for size, etc
+                # make xml of drpm
+                try:
+                    drpm_po = yumbased.CreateRepoPackage(self.ts, drpmfn)
+                except Errors.MiscError, e:
+                    os.unlink(drpmfn)
+                    continue
+                rel_drpmfn = drpmfn.replace(self.conf.outputdir, '')
+                if rel_drpmfn[0] == '/':
+                    rel_drpmfn = rel_drpmfn[1:]
+                if not self.conf.quiet:
+                    if self.conf.verbose:
+                        self.callback.log('created drpm from %s to %s: %s' % (
+                            delta_p, pkg, drpmfn))
+
+                drpm = deltarpms.DeltaRPMPackage(drpm_po, self.conf.outputdir, rel_drpmfn)
+                drpm_results += to_unicode(drpm.xml_dump_metadata())
+        
+        if drpm_results:
+            results = thisdeltastart + drpm_results + thisdeltaend
+        
+        return results
+
+    def _get_old_package_list(self):
+        if hasattr(self, '_old_package_list'):
+            return self._old_package_list
+        
+        opl = []
+        for d in self.conf.oldpackage_paths:
+            for f in self.getFileList(d, 'rpm'):
+                opl.append(d + '/' + f)
+                    
+        self._old_package_list = opl
+        return self._old_package_list
+
     def addArbitraryMetadata(self, mdfile, mdtype, xml_node, compress=True, 
                                              compress_type='gzip', attribs={}):
         """add random metadata to the repodata dir and repomd.xml
@@ -638,6 +754,8 @@ class MetaDataGenerator:
             db_workfiles = []
             repoid='garbageid'
         
+        if self.conf.deltas:
+            workfiles.append((self.conf.deltafile, 'deltainfo'))
         if self.conf.database:
             if not self.conf.quiet: self.callback.log('Generating sqlite DBs')
             try:
diff --git a/createrepo/deltarpms.py b/createrepo/deltarpms.py
new file mode 100644
index 0000000..4b4acaf
--- /dev/null
+++ b/createrepo/deltarpms.py
@@ -0,0 +1,140 @@
+#!/usr/bin/python -tt
+# util functions for deltarpms
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# copyright 2009 - Red Hat
+
+import os.path
+import commands
+from yum import misc
+import gzip
+
+class DeltaRPMPackage:
+    """each drpm is one object, you pass it a drpm file
+       it opens the file, and pulls the information out in bite-sized chunks :)
+    """
+
+    mode_cache = {}
+
+    def __init__(self, pkgobj, basedir, filename):
+        try:
+            stats = os.stat(os.path.join(basedir, filename))
+            self.size = stats[6]
+            self.mtime = stats[8]
+            del stats
+        except OSError, e:
+            raise MDError, "Error Stat'ing file %s %s" % (basedir, filename)
+        self.csum_type = 'sha256'
+        self.relativepath = filename
+        self.po  = pkgobj
+
+        fd = os.open(self.po.localpath, os.O_RDONLY)
+        os.lseek(fd, 0, 0)
+        fo = os.fdopen(fd, 'rb')
+        self.csum = misc.checksum(self.csum_type, fo)
+        fo.seek(int(self.po.hdrend))
+        self._getOldInfo(fo)
+        del fo
+        del fd
+                    
+    def _stringToNEVR(self, string):
+        i = string.rfind("-", 0, string.rfind("-")-1)
+        name = string[:i]
+        (epoch, ver, rel) = self._stringToVersion(string[i+1:])
+        return (name, epoch, ver, rel)
+        
+    def _getLength(self, in_data):
+        length = 0
+        for val in in_data:
+            length = length * 256
+            length += ord(val)
+        return length
+        
+    def _getOldInfo(self, fo):
+        try:
+            compobj = gzip.GzipFile("", "rb", 9, fo)
+        except:
+            raise zlibError("Data not stored in gzip format")
+            
+        if compobj.read(4)[:3] != "DLT":
+            raise Exception("Not a deltarpm")
+        
+        nevr_length = self._getLength(compobj.read(4))
+        nevr = compobj.read(nevr_length).strip("\x00")
+        seq_length = self._getLength(compobj.read(4))
+        seq = compobj.read(seq_length)
+        hex_seq = ""
+        for char in seq:
+            hex_seq += str("%02x" % ord(char))
+        self.oldnevrstring = nevr
+        self.oldnevr = self._stringToNEVR(nevr)
+        self.sequence = hex_seq
+        compobj.close()
+            
+    def _stringToVersion(self, strng):
+        i = strng.find(':')
+        if i != -1:
+            epoch = strng[:i]
+        else:
+            epoch = '0'
+        j = strng.find('-')
+        if j != -1:
+            if strng[i + 1:j] == '':
+                version = None
+            else:
+                version = strng[i + 1:j]
+            release = strng[j + 1:]
+        else:
+            if strng[i + 1:] == '':
+                version = None
+            else:
+                version = strng[i + 1:]
+            release = None
+        return (epoch, version, release)
+
+    def xml_dump_metadata(self):
+        """takes an xml doc object and a package metadata entry node, populates a 
+           package node with the md information"""
+
+        (oldname, oldepoch, oldver, oldrel) = self.oldnevr
+        sequence = "%s-%s" % (self.oldnevrstring, self.sequence)
+
+        delta_tag = """
+    <delta oldepoch="%s" oldversion="%s" oldrelease="%s">
+      <filename>%s</filename>
+      <sequence>%s</sequence>
+      <size>%s</size>
+      <checksum type="%s">%s</checksum>
+    </delta>\n""" % (oldepoch, oldver, oldrel, self.relativepath, sequence,
+                    self.size, self.csum_type, self.csum)
+        return delta_tag
+
+def create_drpm(old_pkg, new_pkg, destdir):
+    """make a drpm file, if possible. returns None if nothing could
+       be created"""
+    drpmfn = '%s-%s-%s_%s-%s.%s.drpm' % (old_pkg.name, old_pkg.ver,
+                            old_pkg.release, new_pkg.ver, new_pkg.release,
+                            old_pkg.arch)
+    delta_rpm_path  = os.path.join(destdir, drpmfn)
+    delta_command = '/usr/bin/makedeltarpm %s %s %s' % (old_pkg.localpath,
+                                                        new_pkg.localpath,
+                                                        delta_rpm_path)
+    if not os.path.exists(delta_rpm_path):
+        #TODO - check/verify the existing one a bit?
+        (code, out) = commands.getstatusoutput(delta_command)
+        if code:
+            print "Error genDeltaRPM for %s: exitcode was %s - Reported Error: %s" % (old_pkg.name, code, out)
+            return None
+    
+    return delta_rpm_path
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index ec69cb8..033cce6 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -93,6 +93,11 @@ def parseArgs(args, conf):
                       help="tags for the content in the repository")
     parser.add_option("--revision", default=None,
                       help="user-specified revision for this repository")
+    parser.add_option("--deltas", default=False, action="store_true",
+                      help="create delta rpms and metadata")
+    parser.add_option("--oldpackagedirs", default=[], dest="oldpackage_paths", 
+                      action="append", help="paths to look for older pkgs to delta against")
+
 
     (opts, argsleft) = parser.parse_args(args)
     if len(argsleft) > 1 and not opts.split:


More information about the Rpm-metadata mailing list