[Yum-devel] [PATCH 0/5] Partial delta metadata parsing support

James Bowes jbowes at redhat.com
Tue Jun 19 23:04:27 UTC 2007


On Mon, Jun 18, 2007 at 02:18:08PM +0200, Florian Festi wrote:
> Nice work!

Thanks

> But I missed the code producing the metadata deltas. Can you send a pointer?

I hadn't written anything yet ;)

Attached is dmd-diff, you run it like:
dmd-diff.py primary oldprimary.xml newprimary.xml > deltaprimary.xml

It requires the lxml python xml parsing library.

-James
-------------- next part --------------
#!/usr/bin/python

# dmd-diff - Generate deltas between repository metadata
#
# Copyright (C) 2007 James Bowes <jbowes at redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

import sys
from lxml.etree import parse, Element, ElementTree


class MdType(object):
    def __init__(self, namespace, rootelem):
        self.ns = "http://linux.duke.edu/metadata/%s" % namespace
        self.sns = "{%s}" % self.ns
        self.root = rootelem

    def get_pkg_id(self, pkg):
        return pkg.findtext(self.sns + "checksum")

    def make_hash(self, tree):
        pkgshash = {}
        for pkg in tree:
            pkgid = self.get_pkg_id(pkg)
            pkgshash[pkgid] = pkg

        return pkgshash

    def make_pkg_elem(self, pkgid, pkg):
        pkgelem = Element("package")
        pkgelem.set('name', pkg.findtext(self.sns + 'name'))
        pkgelem.set('arch', pkg.findtext(self.sns + 'arch'))
        pkgelem.set('pkgid', pkgid)
        pkgelem.append(pkg.find(self.sns + 'version'))

        return pkgelem

    def diff_trees(self, oldtree, newtree):
        oldpkgs = oldtree.getroot().getchildren()
        newpkgs = newtree.getroot().getchildren()

        oldpkgshash = self.make_hash(oldpkgs)
        newpkgshash = self.make_hash(newpkgs)

        diff =  Element(self.root,
                nsmap = {None : self.ns,
                         "rpm" : "http://linux.duke.edu/metadata/rpm"})
        additions = Element("additions")
        diff.append(additions)
        removals = Element("removals")
        diff.append(removals)

        for pkgid, pkg in newpkgshash.iteritems():
            if not oldpkgshash.has_key(pkgid):
                additions.append(pkg)

        for pkgid, pkg in oldpkgshash.iteritems():
            if not newpkgshash.has_key(pkgid):
                pkgelem = self.make_pkg_elem(pkgid, pkg)
                removals.append(pkgelem)

        diff.set("packages", str(len(removals) + len(additions)))

        for elem in diff.getiterator():
            if elem.tag.startswith(self.sns):
                elem.tag = elem.tag.split('}')[-1]
        ElementTree(diff).write(sys.stdout)
        print ""


class OtherMdType(MdType):
    def get_pkg_id(self, pkg):
        return pkg.get('pkgid')

    def make_pkg_elem(self, pkgid, pkg):
        pkgelem = Element("package")
        pkgelem.set('name', pkg.get('name'))
        pkgelem.set('arch', pkg.get('arch'))
        pkgelem.set('pkgid', pkgid)
        pkgelem.append(pkg.find(self.sns + 'version'))

        return pkgelem


mdtypeinfo = {
        'primary' : MdType('common', 'metadata'),
        'filelists' : OtherMdType('filelists', 'filelists'),
        'other' : OtherMdType('other', 'other'),
        }


def usage(progname):
    print "usage: %s MDTYPE OLDFILE NEWFILE" % progname
    sys.exit()

def main(args):
    if len(args) != 4:
        usage(args[0])
    if args[1] not in ('primary', 'filelists', 'other'):
        usage(args[0])

    oldtree = parse(args[2])
    newtree = parse(args[3])

    mdtypeinfo[args[1]].diff_trees(oldtree, newtree)

if __name__ == "__main__":
    main(sys.argv)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : http://lists.baseurl.org/pipermail/yum-devel/attachments/20070619/fac23835/attachment.pgp 


More information about the Yum-devel mailing list