[Rpm-metadata] bin/Makefile createrepo/__init__.py createrepo/Makefile createrepo/readMetadata.py createrepo.spec createrepo/utils.py createrepo/yumbased.py docs/Makefile dumpMetadata.py genpkgmetadata.py Makefile readMetadata.py

Seth Vidal skvidal at linux.duke.edu
Thu Dec 20 07:21:34 UTC 2007


 Makefile                   |   55 +-
 bin/Makefile               |   20 -
 createrepo.spec            |   10 
 createrepo/Makefile        |   64 +++
 createrepo/__init__.py     |  141 +++++++
 createrepo/readMetadata.py |  198 +++++++++
 createrepo/utils.py        |  101 +++++
 createrepo/yumbased.py     |  383 +++++++++++++++++++
 docs/Makefile              |   12 
 dumpMetadata.py            |  896 ---------------------------------------------
 genpkgmetadata.py          |  189 ++++-----
 readMetadata.py            |  198 ---------
 12 files changed, 1034 insertions(+), 1233 deletions(-)

New commits:
commit 7bf690b4bc2b79a8b12154ee774f80e93f6265ff
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Thu Dec 20 02:18:23 2007 -0500

    Whew: this is the beginning of a big conversion of createrepo to use the yum modules,
    behave more like a modular program and have a proper class structure. It's not done,
    but it's a start.

diff --git a/Makefile b/Makefile
index a5f2ffb..b57acfc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,12 @@
-PACKAGE = createrepo
-VERSION = 0.4.10
+PKGNAME = createrepo
+VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec)
+RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec)
+CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE))
+PYTHON=python
+SUBDIRS = $(PKGNAME) bin docs
+PYFILES = $(wildcard *.py)
+
+
 SHELL = /bin/sh
 top_srcdir = .
 srcdir = .
@@ -20,9 +27,9 @@ includedir = ${prefix}/include
 oldincludedir = /usr/include
 mandir = ${prefix}/share/man
 
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
 top_builddir = 
 
 # all dirs
@@ -37,12 +44,8 @@ INSTALL_DATA    = $(INSTALL) -m 644
 INSTALL_MODULES = $(INSTALL) -m 755 -D 
 RM              = rm -f
 
-SUBDIRS = bin docs
-
 MODULES = $(srcdir)/genpkgmetadata.py \
-		  $(srcdir)/dumpMetadata.py \
-		  $(srcdir)/readMetadata.py \
-		  $(srcdir)/modifyrepo.py
+	$(srcdir)/modifyrepo.py
 
 .SUFFIXES: .py .pyc
 .py.pyc: 
@@ -51,7 +54,7 @@ MODULES = $(srcdir)/genpkgmetadata.py \
 
 all: $(MODULES)
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir VERSION=$(VERSION) PACKAGE=$(PACKAGE) DESTDIR=$(DESTDIR); \
+	  $(MAKE) -C $$subdir VERSION=$(VERSION) PKGNAME=$(PKGNAME) DESTDIR=$(DESTDIR); \
 	done
 
 check: 
@@ -60,7 +63,7 @@ check:
 install: all installdirs
 	$(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir)
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir install VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 installdirs:
@@ -74,13 +77,13 @@ uninstall:
 	  $(RM) $(pkgdatadir)/$$module ; \
 	done
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir uninstall VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir uninstall VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 clean:
 	$(RM)  *.pyc *.pyo
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir clean VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir clean VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 distclean: clean
@@ -88,7 +91,7 @@ distclean: clean
 	$(RM) core
 	$(RM) *~
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir distclean VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir distclean VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 mostlyclean:
@@ -102,12 +105,12 @@ maintainer-clean:
 
 dist:
 	olddir=`pwd`; \
-	distdir=$(PACKAGE)-$(VERSION); \
+	distdir=$(PKGNAME)-$(VERSION); \
 	$(RM) -r .disttmp; \
 	$(INSTALL_DIR) .disttmp; \
 	$(INSTALL_DIR) .disttmp/$$distdir; \
 	$(MAKE) distfiles
-	distdir=$(PACKAGE)-$(VERSION); \
+	distdir=$(PKGNAME)-$(VERSION); \
 	cd .disttmp; \
 	tar -cvz > ../$$distdir.tar.gz $$distdir; \
 	cd $$olddir
@@ -115,23 +118,23 @@ dist:
 
 daily:
 	olddir=`pwd`; \
-	distdir=$(PACKAGE); \
+	distdir=$(PKGNAME); \
 	$(RM) -r .disttmp; \
 	$(INSTALL_DIR) .disttmp; \
 	$(INSTALL_DIR) .disttmp/$$distdir; \
 	$(MAKE) dailyfiles
 	day=`/bin/date +%Y%m%d`; \
-	distdir=$(PACKAGE); \
+	distdir=$(PKGNAME); \
 	tarname=$$distdir-$$day ;\
 	cd .disttmp; \
-	perl -pi -e "s/\#DATE\#/$$day/g" $$distdir/$(PACKAGE)-daily.spec; \
+	perl -pi -e "s/\#DATE\#/$$day/g" $$distdir/$(PKGNAME)-daily.spec; \
 	echo $$day; \
 	tar -cvz > ../$$tarname.tar.gz $$distdir; \
 	cd $$olddir
 	$(RM) -rf .disttmp
 
 dailyfiles:
-	distdir=$(PACKAGE); \
+	distdir=$(PKGNAME); \
 	cp \
 	$(srcdir)/*.py \
 	$(srcdir)/Makefile \
@@ -139,14 +142,14 @@ dailyfiles:
 	$(srcdir)/COPYING \	
 	$(srcdir)/COPYING.lib \		
 	$(srcdir)/README \
-	$(srcdir)/$(PACKAGE).spec \
+	$(srcdir)/$(PKGNAME).spec \
 	$(top_srcdir)/.disttmp/$$distdir
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir dailyfiles VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir dailyfiles VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 distfiles:
-	distdir=$(PACKAGE)-$(VERSION); \
+	distdir=$(PKGNAME)-$(VERSION); \
 	cp \
 	$(srcdir)/*.py \
 	$(srcdir)/Makefile \
@@ -154,10 +157,10 @@ distfiles:
 	$(srcdir)/COPYING \
 	$(srcdir)/COPYING.lib \
 	$(srcdir)/README \
-	$(srcdir)/$(PACKAGE).spec \
+	$(srcdir)/$(PKGNAME).spec \
 	$(top_srcdir)/.disttmp/$$distdir
 	for subdir in $(SUBDIRS) ; do \
-	  $(MAKE) -C $$subdir distfiles VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+	  $(MAKE) -C $$subdir distfiles VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
 	done
 
 archive: dist
diff --git a/bin/Makefile b/bin/Makefile
index 52c1f50..4497230 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -18,9 +18,9 @@ includedir = ${prefix}/include
 oldincludedir = /usr/include
 mandir = ${prefix}/man
 
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
 top_builddir = ../
 
 # all dirs
@@ -36,16 +36,16 @@ INSTALL_MODULES = $(INSTALL) -m 755 -D
 RM 		= rm -f
 
 
-all: $(srcdir)/$(PACKAGE)
+all: $(srcdir)/$(PKGNAME)
 
 
 install: all installdirs
-	$(INSTALL_BIN) $(srcdir)/$(PACKAGE) $(DESTDIR)$(bindir)/$(PACKAGE)
+	$(INSTALL_BIN) $(srcdir)/$(PKGNAME) $(DESTDIR)$(bindir)/$(PKGNAME)
 	$(INSTALL_BIN) $(srcdir)/modifyrepo $(DESTDIR)$(bindir)/modifyrepo
 
 
 uninstall:
-	$(RM) $(bindir)/$(PACKAGE)
+	$(RM) $(bindir)/$(PKGNAME)
 
 
 
@@ -67,19 +67,19 @@ maintainer-clean:
 
 
 distfiles:
-	distdir=$(PACKAGE)-$(VERSION); \
+	distdir=$(PKGNAME)-$(VERSION); \
 	mkdir $(top_srcdir)/.disttmp/$$distdir/bin;\
 	cp \
-	$(srcdir)/$(PACKAGE) \
+	$(srcdir)/$(PKGNAME) \
 	$(srcdir)/Makefile \
 	$(srcdir)/modifyrepo \
 	$(top_srcdir)/.disttmp/$$distdir/bin
 
 dailyfiles:
-	distdir=$(PACKAGE); \
+	distdir=$(PKGNAME); \
 	mkdir $(top_srcdir)/.disttmp/$$distdir/bin;\
 	cp \
-	$(srcdir)/$(PACKAGE) \
+	$(srcdir)/$(PKGNAME) \
 	$(srcdir)/Makefile \
 	$(srcdir)/modifyrepo \
 	$(top_srcdir)/.disttmp/$$distdir/bin
diff --git a/createrepo.spec b/createrepo.spec
index 3c5cc75..969ad95 100644
--- a/createrepo.spec
+++ b/createrepo.spec
@@ -1,6 +1,8 @@
+%{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")}
+
 Summary: Creates a common metadata repository
 Name: createrepo
-Version: 0.4.10
+Version: 0.9
 Release: 1
 License: GPL
 Group: System Environment/Base
@@ -9,7 +11,7 @@ URL: http://linux.duke.edu/metadata/
 BuildRoot: %{_tmppath}/%{name}-%{version}root
 BuildArchitectures: noarch
 Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
-Requires: yum-metadata-parser
+Requires: yum-metadata-parser, yum >= 3.2.7
 
 %description
 This utility will generate a common metadata repository from a directory of
@@ -35,8 +37,12 @@ rpm packages
 %{_bindir}/modifyrepo
 %{_mandir}/man8/createrepo.8*
 %{_mandir}/man1/modifyrepo.1*
+%{python_sitelib}/createrepo
 
 %changelog
+* Thu Dec 20 2007 Seth Vidal <skvidal at fedoraproject.org>
+- beginning of the new version
+
 * Mon Dec  3 2007 Luke Macken <lmacken at redhat.com>
 - Add man page for modifyrepo
 
diff --git a/createrepo/Makefile b/createrepo/Makefile
new file mode 100644
index 0000000..d3d3a34
--- /dev/null
+++ b/createrepo/Makefile
@@ -0,0 +1,64 @@
+PYTHON=python
+PACKAGE = $(shell basename `pwd`)
+PYFILES = $(wildcard *.py)
+PYVER := $(shell $(PYTHON) -c 'import sys; print "%.3s" %(sys.version)')
+PYSYSDIR := $(shell $(PYTHON) -c 'import sys; print sys.prefix')
+PYLIBDIR = $(PYSYSDIR)/lib/python$(PYVER)
+PKGDIR = $(PYLIBDIR)/site-packages/$(PKGNAME)
+
+SHELL = /bin/sh
+top_srcdir = ..
+srcdir = ../$(PKGNAME)
+prefix = /usr
+exec_prefix = ${prefix}
+
+bindir = ${exec_prefix}/bin
+sbindir = ${exec_prefix}/sbin
+libexecdir = ${exec_prefix}/libexec
+datadir = ${prefix}/share
+sysconfdir = ${prefix}/etc
+sharedstatedir = ${prefix}/com
+localstatedir = ${prefix}/var
+libdir = ${exec_prefix}/lib
+infodir = ${prefix}/info
+docdir = 
+includedir = ${prefix}/include
+oldincludedir = /usr/include
+mandir = ${datadir}/man
+
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
+top_builddir = ../
+
+
+all: 
+	echo "Nothing to do"
+
+clean:
+	rm -f *.pyc *.pyo *~
+
+install:
+	mkdir -p $(DESTDIR)/$(PKGDIR)
+	for p in $(PYFILES) ; do \
+		install -m 644 $$p $(DESTDIR)/$(PKGDIR)/$$p; \
+	done
+	$(PYTHON) -c "import compileall; compileall.compile_dir('$(DESTDIR)/$(PKGDIR)', 1, '$(PKGDIR)', 1)"
+
+distfiles:
+	distdir=$(PKGNAME)-$(VERSION); \
+	mkdir $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME);\
+	cp \
+	$(srcdir)/$(PYFILES) \
+	$(srcdir)/Makefile \
+	$(top_srcdir)/.disttmp/$$distdir/$(PKGNAME)
+
+dailyfiles:
+	distdir=$(PKGNAME); \
+	mkdir $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME);\
+	cp \
+	$(srcdir)/$(PYFILES) \	
+	$(srcdir)/__init__.py \
+	$(srcdir)/Makefile \
+	$(top_srcdir)/.disttmp/$$distdir/$(PKGNAME)
+
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
new file mode 100644
index 0000000..ac4451d
--- /dev/null
+++ b/createrepo/__init__.py
@@ -0,0 +1,141 @@
+import exceptions
+import os
+import sys
+import libxml2
+import hashlib
+from yum import misc
+
+try:
+    import sqlitecachec
+except ImportError:
+    pass
+
+
+from utils import _gzipOpen, bzipFile
+
+
+__version__ = '0.9'
+
+
+class MDError(exceptions.Exception):
+    def __init__(self, value=None):
+        exceptions.Exception.__init__(self)
+        self.value = value
+    
+    def __str__(self):
+        return self.value
+
+def repoXML(node, cmds):
+    """generate the repomd.xml file that stores the info on the other files"""
+    sumtype = cmds['sumtype']
+    workfiles = [(cmds['otherfile'], 'other',),
+                 (cmds['filelistsfile'], 'filelists'),
+                 (cmds['primaryfile'], 'primary')]
+    repoid='garbageid'
+    
+    repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
+    
+    if cmds['database']:
+        try:
+            dbversion = str(sqlitecachec.DBVERSION)
+        except AttributeError:
+            dbversion = '9'
+        rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
+
+    for (file, ftype) in workfiles:
+        complete_path = os.path.join(repopath, file)
+        
+        zfo = _gzipOpen(complete_path)
+        uncsum = misc.checksum(sumtype, zfo)
+        zfo.close()
+        csum = misc.checksum(sumtype, complete_path)
+        timestamp = os.stat(complete_path)[8]
+        
+        db_csums = {}
+        db_compressed_sums = {}
+        
+        if cmds['database']:
+            if ftype == 'primary':
+                rp.getPrimary(complete_path, csum)
+                            
+            elif ftype == 'filelists':
+                rp.getFilelists(complete_path, csum)
+                
+            elif ftype == 'other':
+                rp.getOtherdata(complete_path, csum)
+            
+
+            tmp_result_name = '%s.xml.gz.sqlite' % ftype
+            tmp_result_path = os.path.join(repopath, tmp_result_name)
+            good_name = '%s.sqlite' % ftype
+            resultpath = os.path.join(repopath, good_name)
+            
+            # rename from silly name to not silly name
+            os.rename(tmp_result_path, resultpath)
+            compressed_name = '%s.bz2' % good_name
+            result_compressed = os.path.join(repopath, compressed_name)
+            db_csums[ftype] = misc.checksum(sumtype, resultpath)
+            
+            # compress the files
+            bzipFile(resultpath, result_compressed)
+            # csum the compressed file
+            db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed)
+            # remove the uncompressed file
+            os.unlink(resultpath)
+
+            # timestamp the compressed file
+            db_timestamp = os.stat(result_compressed)[8]
+            
+            # add this data as a section to the repomdxml
+            db_data_type = '%s_db' % ftype
+            data = node.newChild(None, 'data', None)
+            data.newProp('type', db_data_type)
+            location = data.newChild(None, 'location', None)
+            if cmds['baseurl'] is not None:
+                location.newProp('xml:base', cmds['baseurl'])
+            
+            location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
+            checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
+            checksum.newProp('type', sumtype)
+            db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
+            unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
+            unchecksum.newProp('type', sumtype)
+            database_version = data.newChild(None, 'database_version', dbversion)
+            
+            
+        data = node.newChild(None, 'data', None)
+        data.newProp('type', ftype)
+        location = data.newChild(None, 'location', None)
+        if cmds['baseurl'] is not None:
+            location.newProp('xml:base', cmds['baseurl'])
+        location.newProp('href', os.path.join(cmds['finaldir'], file))
+        checksum = data.newChild(None, 'checksum', csum)
+        checksum.newProp('type', sumtype)
+        timestamp = data.newChild(None, 'timestamp', str(timestamp))
+        unchecksum = data.newChild(None, 'open-checksum', uncsum)
+        unchecksum.newProp('type', sumtype)
+    
+    # if we've got a group file then checksum it once and be done
+    if cmds['groupfile'] is not None:
+        grpfile = cmds['groupfile']
+        timestamp = os.stat(grpfile)[8]
+        sfile = os.path.basename(grpfile)
+        fo = open(grpfile, 'r')
+        output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
+        output.write(fo.read())
+        output.close()
+        fo.seek(0)
+        csum = misc.checksum(sumtype, fo)
+        fo.close()
+
+        data = node.newChild(None, 'data', None)
+        data.newProp('type', 'group')
+        location = data.newChild(None, 'location', None)
+        if cmds['baseurl'] is not None:
+            location.newProp('xml:base', cmds['baseurl'])
+        location.newProp('href', os.path.join(cmds['finaldir'], sfile))
+        checksum = data.newChild(None, 'checksum', csum)
+        checksum.newProp('type', sumtype)
+        timestamp = data.newChild(None, 'timestamp', str(timestamp))
+
+
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
new file mode 100644
index 0000000..0d9dacf
--- /dev/null
+++ b/createrepo/readMetadata.py
@@ -0,0 +1,198 @@
+#!/usr/bin/python -t
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2006 Red Hat
+
+import os
+import sys
+import libxml2
+import stat
+
+def errorprint(stuff):
+    print >> sys.stderr, stuff
+
+def _(args):
+    """Stub function for translation"""
+    return args
+
+class MetadataIndex(object):
+
+    def __init__(self, outputdir, basefile, filelistfile, otherfile, opts=None):
+        if opts is None:
+            opts = {}
+        self.opts = opts
+        self.outputdir = outputdir
+        self.files = {'base' : basefile,
+                      'filelist' : filelistfile,
+                      'other' : otherfile}
+        self.scan()
+
+    def scan(self):
+        """Read in and index old repo data"""
+        self.basenodes = {}
+        self.filesnodes = {}
+        self.othernodes = {}
+        self.pkg_ids = {}
+        if self.opts.get('verbose'):
+            print _("Scanning old repo data")
+        for file in self.files.values():
+            if not os.path.exists(file):
+                #cannot scan
+                errorprint(_("Previous repo file missing: %s") % file)
+                return
+        root = libxml2.parseFile(self.files['base']).getRootElement()
+        self._scanPackageNodes(root, self._handleBase)
+        if self.opts.get('verbose'):
+            print _("Indexed %i base nodes" % len(self.basenodes))
+        root = libxml2.parseFile(self.files['filelist']).getRootElement()
+        self._scanPackageNodes(root, self._handleFiles)
+        if self.opts.get('verbose'):
+            print _("Indexed %i filelist nodes" % len(self.filesnodes))
+        root = libxml2.parseFile(self.files['other']).getRootElement()
+        self._scanPackageNodes(root, self._handleOther)
+        if self.opts.get('verbose'):
+            print _("Indexed %i other nodes" % len(self.othernodes))
+        #reverse index pkg ids to track references
+        self.pkgrefs = {}
+        for relpath, pkgid in self.pkg_ids.iteritems():
+            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
+
+    def _scanPackageNodes(self, root, handler):
+        node = root.children
+        while node is not None:
+            if node.type != "element":
+                node = node.next
+                continue
+            if node.name == "package":
+                handler(node)
+            node = node.next
+
+    def _handleBase(self, node):
+        top = node
+        node = node.children
+        pkgid = None
+        mtime = None
+        size = None
+        relpath = None
+        while node is not None:
+            if node.type != "element":
+                node = node.next
+                continue
+            if node.name == "checksum":
+                pkgid = node.content
+            elif node.name == "time":
+                mtime = int(node.prop('file'))
+            elif node.name == "size":
+                size = int(node.prop('package'))
+            elif node.name == "location":
+                relpath = node.prop('href')
+            node = node.next
+        if relpath is None:
+            print _("Incomplete data for node")
+            return
+        if pkgid is None:
+            print _("pkgid missing for %s") % relpath
+            return
+        if mtime is None:
+            print _("mtime missing for %s") % relpath
+            return
+        if size is None:
+            print _("size missing for %s") % relpath
+            return
+        filepath = os.path.join(self.opts['pkgdir'], relpath)
+        try:
+            st = os.stat(filepath)
+        except OSError:
+            #file missing -- ignore
+            return
+        if not stat.S_ISREG(st.st_mode):
+            #ignore non files
+            return
+        #check size and mtime
+        if st.st_size != size:
+            if self.opts.get('verbose'):
+                print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
+            return
+        if st.st_mtime != mtime:
+            if self.opts.get('verbose'):
+                print _("Modification time changed for %s") % filepath
+            return
+        #otherwise we index
+        self.basenodes[relpath] = top
+        self.pkg_ids[relpath] = pkgid
+
+    def _handleFiles(self, node):
+        pkgid = node.prop('pkgid')
+        if pkgid:
+            self.filesnodes[pkgid] = node
+
+    def _handleOther(self, node):
+        pkgid = node.prop('pkgid')
+        if pkgid:
+            self.othernodes[pkgid] = node
+
+    def getNodes(self, relpath):
+        """Return base, filelist, and other nodes for file, if they exist
+
+        Returns a tuple of nodes, or None if not found
+        """
+        bnode = self.basenodes.get(relpath,None)
+        if bnode is None:
+            return None
+        pkgid = self.pkg_ids.get(relpath,None)
+        if pkgid is None:
+            print _("No pkgid found for: %s") % relpath
+            return None
+        fnode = self.filesnodes.get(pkgid,None)
+        if fnode is None:
+            return None
+        onode = self.othernodes.get(pkgid,None)
+        if onode is None:
+            return None
+        return bnode, fnode, onode
+
+    def freeNodes(self,relpath):
+        #causing problems
+        """Free up nodes corresponding to file, if possible"""
+        bnode = self.basenodes.get(relpath,None)
+        if bnode is None:
+            print "Missing node for %s" % relpath
+            return
+        bnode.unlinkNode()
+        bnode.freeNode()
+        del self.basenodes[relpath]
+        pkgid = self.pkg_ids.get(relpath,None)
+        if pkgid is None:
+            print _("No pkgid found for: %s") % relpath
+            return None
+        del self.pkg_ids[relpath]
+        dups = self.pkgrefs.get(pkgid)
+        dups.remove(relpath)
+        if len(dups):
+            #still referenced
+            return
+        del self.pkgrefs[pkgid]
+        for nodes in self.filesnodes, self.othernodes:
+            node = nodes.get(pkgid)
+            if node is not None:
+                node.unlinkNode()
+                node.freeNode()
+                del nodes[pkgid]
+
+
+if __name__ == "__main__":
+    #test code - attempts to read a repo in working directory
+    idx = MetadataIndex(".", "repodata/primary.xml.gz", "repodata/filelists.xml.gz",
+                        "repodata/other.xml.gz", {'verbose':1})
diff --git a/createrepo/utils.py b/createrepo/utils.py
new file mode 100644
index 0000000..bb3939c
--- /dev/null
+++ b/createrepo/utils.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python
+# util functions for createrepo
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+
+import os
+import sys
+import bz2
+import gzip
+from gzip import write32u, FNAME
+
+def errorprint(stuff):
+    print >> sys.stderr, stuff
+
+def _(args):
+    """Stub function for translation"""
+    return args
+
+
+class GzipFile(gzip.GzipFile):
+    def _write_gzip_header(self):
+        self.fileobj.write('\037\213')             # magic header
+        self.fileobj.write('\010')                 # compression method
+        fname = self.filename[:-3]
+        flags = 0
+        if fname:
+            flags = FNAME
+        self.fileobj.write(chr(flags))
+        write32u(self.fileobj, long(0))
+        self.fileobj.write('\002')
+        self.fileobj.write('\377')
+        if fname:
+            self.fileobj.write(fname + '\000')
+
+
+def _gzipOpen(filename, mode="rb", compresslevel=9):
+    return GzipFile(filename, mode, compresslevel)
+    
+def bzipFile(source, dest):
+    
+    s_fn = open(source, 'rb')
+    destination = bz2.BZ2File(dest, 'w', compresslevel=9)
+
+    while True:
+        data = s_fn.read(1024000)
+        
+        if not data: break
+        destination.write(data)
+
+    destination.close()
+    s_fn.close()
+    
+
+def returnFD(filename):
+    try:
+        fdno = os.open(filename, os.O_RDONLY)
+    except OSError:
+        raise MDError, "Error opening file"
+    return fdno
+
+def utf8String(string):
+    """hands back a unicoded string"""
+    if string is None:
+        return ''
+    elif isinstance(string, unicode):    
+        return string
+    try:
+        x = unicode(string, 'ascii')
+        return string
+    except UnicodeError:
+        encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
+        for enc in encodings:
+            try:
+                x = unicode(string, enc)
+            except UnicodeError:
+                pass
+            else:
+                if x.encode(enc) == string:
+                    return x.encode('utf-8')
+    newstring = ''
+    for char in string:
+        if ord(char) > 127:
+            newstring = newstring + '?'
+        else:
+            newstring = newstring + char
+    return newstring
+
+
diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
new file mode 100644
index 0000000..ea2b9aa
--- /dev/null
+++ b/createrepo/yumbased.py
@@ -0,0 +1,383 @@
+#!/usr/bin/python -tt
+
+import os
+import sys
+import struct
+import rpm
+import types
+import re
+import xml.sax.saxutils
+
+from yum.packages import YumLocalPackage
+from yum.Errors import *
+from yum import misc
+from rpmUtils.transaction import initReadOnlyTransaction
+from rpmUtils.miscutils import flagToString, stringToVersion
+
+fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+file_re = []
+for glob in fileglobs:
+    file_re.append(re.compile(glob))        
+
+dirglobs = ['.*bin\/.*', '^\/etc\/.*']
+dir_re = []
+for glob in dirglobs:
+    dir_re.append(re.compile(glob))        
+
+
+class CreateRepoPackage(YumLocalPackage):
+    def __init__(self, ts, package):
+        YumLocalPackage.__init__(self, ts, package)
+        self._checksum = None        
+        self._stat = os.stat(package)
+        self.filetime = str(self._stat[-1])
+        self.packagesize = str(self._stat[6])
+        self._hdrstart = None
+        self._hdrend = None
+        
+    def _xml(self, item):
+        return xml.sax.saxutils.escape(item)
+        
+    def _do_checksum(self):
+        if not self._checksum:
+            self._checksum = misc.checksum('sha', self.localpath)
+            
+        return self._checksum
+    checksum = property(fget=lambda self: self._do_checksum())
+    
+    def _get_header_byte_range(self):
+        """takes an rpm file or fileobject and returns byteranges for location of the header"""
+        if self._hdrstart and self._hdrend:
+            return (self._hdrstart, self._hdrend)
+      
+           
+        fo = open(self.localpath, 'r')
+        #read in past lead and first 8 bytes of sig header
+        fo.seek(104)
+        # 104 bytes in
+        binindex = fo.read(4)
+        # 108 bytes in
+        (sigindex, ) = struct.unpack('>I', binindex)
+        bindata = fo.read(4)
+        # 112 bytes in
+        (sigdata, ) = struct.unpack('>I', bindata)
+        # each index is 4 32bit segments - so each is 16 bytes
+        sigindexsize = sigindex * 16
+        sigsize = sigdata + sigindexsize
+        # we have to round off to the next 8 byte boundary
+        disttoboundary = (sigsize % 8)
+        if disttoboundary != 0:
+            disttoboundary = 8 - disttoboundary
+        # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
+        hdrstart = 112 + sigsize  + disttoboundary
+        
+        fo.seek(hdrstart) # go to the start of the header
+        fo.seek(8,1) # read past the magic number and reserved bytes
+
+        binindex = fo.read(4) 
+        (hdrindex, ) = struct.unpack('>I', binindex)
+        bindata = fo.read(4)
+        (hdrdata, ) = struct.unpack('>I', bindata)
+        
+        # each index is 4 32bit segments - so each is 16 bytes
+        hdrindexsize = hdrindex * 16 
+        # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
+        # end of the sig and the header.
+        hdrsize = hdrdata + hdrindexsize + 16
+        
+        # header end is hdrstart + hdrsize 
+        hdrend = hdrstart + hdrsize 
+        fo.close()
+        self._hdrstart = hdrstart
+        self._hdrend = hdrend
+       
+        return (hdrstart, hdrend)
+        
+    hdrend = property(fget=lambda self: self._get_header_byte_range()[1])
+    hdrstart = property(fget=lambda self: self._get_header_byte_range()[0])
+    
+    def _dump_base_items(self):
+        msg = """
+  <name>%s</name>
+  <arch>%s</arch>
+  <version epoch="%s" ver="%s" rel="%s"/>
+  <checksum type="sha" pkgid="YES">%s</checksum>
+  <summary>%s</summary>
+  <description>%s</description>
+  <packager>%s</packager>
+  <url>%s</url>
+  <time file="%s" build="%s"/>
+  <size package="%s" installed="%s" archive="%s"/>
+  <location href="%s"/>
+  """ % (self.name, self.arch, self.epoch, self.ver, self.rel, self.checksum, 
+         self._xml(self.summary), self._xml(self.description), 
+         self._xml(self.packager), self._xml(self.url), self.filetime,
+         self.buildtime, self.packagesize, self.size, self.archivesize, 
+         self.localpath )
+        return msg
+
+    def _dump_format_items(self):
+        msg = "  <format>\n"
+        if self.license:
+            msg += """    <rpm:license>%s</rpm:license>\n""" % self._xml(self.license)
+        if self.vendor:
+            msg += """    <rpm:vendor>%s</rpm:vendor>\n""" % self._xml(self.vendor)
+        if self.group:
+            msg += """    <rpm:group>%s</rpm:group>\n""" % self._xml(self.group)
+        if self.buildhost:
+            msg += """    <rpm:buildhost>%s</rpm:buildhost>\n""" % self._xml(self.buildhost)
+        if self.sourcerpm:
+            msg += """    <rpm:sourcerpm>%s</rpm:sourcerpm>\n""" % self._xml(self.sourcerpm)
+        msg +="""    <rpm:header-range start="%s" end="%s"/>""" % (self.hdrstart,
+                                                               self.hdrend)
+        msg += self._dump_pco('provides')
+        msg += self._dump_requires()
+        msg += self._dump_pco('conflicts')         
+        msg += self._dump_pco('obsoletes')         
+        msg += self._dump_files(True)
+        msg += """\n  </format>\n"""
+        return msg
+
+    def _dump_pco(self, pcotype):
+           
+        msg = ""
+        mylist = getattr(self, pcotype)
+        if mylist: msg = "\n    <rpm:%s>\n" % pcotype
+        for (name, flags, (e,v,r)) in mylist:
+            pcostring = '''      <rpm:entry name="%s"''' % name
+            if flags:
+                pcostring += ''' flags="%s"''' % flags
+                if e:
+                    pcostring += ''' epoch="%s"''' % e
+                if v:
+                    pcostring += ''' ver="%s"''' % v
+                if r:
+                    pcostring += ''' rel="%s"''' % r
+                    
+            pcostring += "/>\n"
+            msg += pcostring
+            
+        if mylist: msg += "    </rpm:%s>" % pcotype
+        return msg
+    
+    def _return_primary_files(self, list_of_files=None):
+
+        returns = {}
+        if list_of_files is None:
+            list_of_files = self.returnFileEntries('file')
+        for item in list_of_files:
+            if item is None:
+                continue
+            for glob in file_re:
+                if glob.match(item):
+                    returns[item] = 1
+        return returns.keys()
+
+    def _return_primary_dirs(self):
+
+        returns = {}
+        for item in self.returnFileEntries('dir'):
+            if item is None:
+                continue
+            for glob in dir_re:
+                if glob.match(item):
+                    returns[item] = 1
+        return returns.keys()
+        
+        
+    def _dump_files(self, primary=False):
+        msg ="\n"
+        if not primary:
+            files = self.returnFileEntries('file')
+            dirs = self.returnFileEntries('dir')
+            ghosts = self.returnFileEntries('ghost')
+        else:
+            files = self._return_primary_files()
+            ghosts = self._return_primary_files(list_of_files = self.returnFileEntries('ghost'))
+            dirs = self._return_primary_dirs()
+                
+        for fn in files:
+            msg += """    <file>%s</file>\n""" % fn
+        for fn in dirs:
+            msg += """    <file type="dir">%s</file>\n""" % fn
+        for fn in ghosts:
+            msg += """    <file type="ghost">%s</file>\n""" % fn
+        
+        return msg
+
+    def _is_pre_req(self, flag):
+        """check the flags for a requirement, return 1 or 0 whether or not requires
+           is a pre-requires or a not"""
+        # FIXME this should probably be put in rpmUtils.miscutils since 
+        # - that's what it is
+        newflag = flag
+        if flag is not None:
+            newflag = flag & 64
+            if newflag == 64:
+                return 1
+            else:
+                return 0
+        return 0
+                
+    def _dump_requires(self):
+        """returns deps in format"""
+        name = self.hdr[rpm.RPMTAG_REQUIRENAME]
+        lst = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
+        flag = map(flagToString, lst)
+        pre = map(self._is_pre_req, lst)
+        lst = self.hdr[rpm.RPMTAG_REQUIREVERSION]
+        vers = map(stringToVersion, lst)
+        if name is not None:
+            lst = zip(name, flag, vers, pre)
+        mylist = misc.unique(lst)
+
+        msg = ""
+
+        if mylist: msg = "\n    <rpm:requires>\n"
+        for (name, flags, (e,v,r),pre) in mylist:
+            prcostring = '''      <rpm:entry name="%s"''' % name
+            if flags:
+                prcostring += ''' flags="%s"''' % flags
+                if e:
+                    prcostring += ''' epoch="%s"''' % e
+                if v:
+                    prcostring += ''' ver="%s"''' % v
+                if r:
+                    prcostring += ''' rel="%s"''' % r
+            if pre:
+                prcostring += ''' pre="%s"''' % pre
+                    
+            prcostring += "/>\n"
+            msg += prcostring
+            
+        if mylist: msg += "    </rpm:requires>"
+        return msg
+
+    def _dump_changelog(self):
+        if not self.changelog:
+            return ""
+        msg = "\n"
+        for (ts, author, content) in self.changelog:
+            msg += """<changelog author="%s" date="%s">%s</changelog>\n""" % \
+                         (self._xml(author), ts, self._xml(content))
+        return msg                                                 
+
+    def do_primary_xml_dump(self):
+        msg = """\n<package type="rpm">"""
+        msg += self._dump_base_items()
+        msg += self._dump_format_items()
+        msg += """\n</package>\n"""
+        return msg
+
+    def do_filelists_xml_dump(self):
+        msg = """\n<package pkgid="%s" name="%s" arch="%s">
+    <version epoch="%s" ver="%s" rel="%s"/>\n""" % (self.checksum, self.name, 
+                                     self.arch, self.epoch, self.ver, self.rel)
+        msg += self._dump_files()
+        msg += "\n</package>\n"
+        return msg
+
+    def do_other_xml_dump(self):
+        msg = """\n<package pkgid="%s" name="%s" arch="%s">
+    <version epoch="%s" ver="%s" rel="%s"/>\n""" % (self.checksum, self.name, 
+                                     self.arch, self.epoch, self.ver, self.rel)
+        msg += self._dump_changelog()
+        msg += "\n</package>\n"
+        return msg
+       
+class CreateRepoConfig(object):
+    def __init__(self):
+        self.quiet = False
+        self.verbose = False
+        self.excludes = []
+        self.baseurl = None
+        self.groupfile = None
+        self.sumtype = 'sha'
+        self.noepoch = False #???
+        self.pretty = False
+        self.cachedir = None
+        self.basedir = os.getcwd()
+        self.use_cache = False
+        self.checkts = False
+        self.split = False        
+        self.update = False
+        self.make_database = False
+        self.outputdir = None
+        self.file_pattern_match = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+        self.dir_pattern_match = ['.*bin\/.*', '^\/etc\/.*']
+        self.skip_symlinks = False
+        self.pkglist = []
+        
+           
+class YumCreateRepo(object):
+    def __init__(self):
+        self.ts = initReadOnlyTransaction()
+        self.pkglist = []
+        self.conf = CreateRepoConfig()
+        
+    def add_package(self, rpmfile):
+        # take a file
+        # check it to make sure it:
+        # exists and is an rpm
+        # can be opened
+        # whatever else
+        if not os.path.exists(rpmfile):
+            return False
+
+        self.pkglist.append(rpmfile)
+        return True
+                      
+            
+    def read_in_package(self, rpmfile):
+        # XXX fixme try/excepts here
+        po = CreateRepoPackage(self.ts, rpmfile)
+        return po
+
+    def dump_metadata(self, pkglist=None):
+        if pkglist is None:
+            pkglist = self.pkglist
+            
+        primary = open('primary-test.xml', 'w')
+        primary.write("""<?xml version="1.0" encoding="UTF-8"?>
+<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%d">""" % len(pkglist))
+        
+        filelists = open('filelists-test.xml', 'w')
+        filelists.write("""<?xml version="1.0" encoding="UTF-8"?>
+<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">""" % len(pkglist))
+
+        other = open('other-test.xml', 'w')
+        other.write("""<?xml version="1.0" encoding="UTF-8"?>
+<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">""" % len(pkglist))
+
+                        
+        for pkg in pkglist:
+            po = self.read_in_package(pkg)
+            primary.write(po.do_primary_xml_dump())
+            filelists.write(po.do_filelists_xml_dump())
+            other.write(po.do_other_xml_dump())
+       
+        
+        primary.write("\n</metadata>\n")
+        primary.close()
+        filelists.write("\n</filelists>\n")
+        filelists.close()
+        other.write("\n</otherdata>\n")
+        other.close()
+
+
+def main(args):
+   ycr = YumCreateRepo()
+   # parseargs get the path for the output and opts
+   #
+   # determine package lists
+   
+   # make metadata
+   
+   for pkg in args:
+       ycr.add_package(pkg)
+   ycr.dump_metadata()
+       
+if __name__ == "__main__":
+    main(sys.argv[1:])
+    
+       
diff --git a/docs/Makefile b/docs/Makefile
index 4b32071..5accea6 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,9 +18,9 @@ includedir = ${prefix}/include
 oldincludedir = /usr/include
 mandir = ${datadir}/man
 
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
 top_builddir = ../
 
 # all dirs
@@ -47,7 +47,7 @@ install: all installdirs
 
 
 uninstall:
-	$(RM) $(bindir)/$(PACKAGE)
+	$(RM) $(bindir)/$(PKGNAME)
 
 
 
@@ -69,7 +69,7 @@ maintainer-clean:
 
 
 distfiles:
-	distdir=$(PACKAGE)-$(VERSION); \
+	distdir=$(PKGNAME)-$(VERSION); \
 	mkdir $(top_srcdir)/.disttmp/$$distdir/docs;\
 	cp \
 	$(srcdir)/createrepo.8 \
@@ -78,7 +78,7 @@ distfiles:
 	$(top_srcdir)/.disttmp/$$distdir/docs
 
 dailyfiles:
-	distdir=$(PACKAGE); \
+	distdir=$(PKGNAME); \
 	mkdir $(top_srcdir)/.disttmp/$$distdir/docs;\
 	cp \
 	$(srcdir)/createrepo.8 \
diff --git a/dumpMetadata.py b/dumpMetadata.py
deleted file mode 100644
index 2836e25..0000000
--- a/dumpMetadata.py
+++ /dev/null
@@ -1,896 +0,0 @@
-#!/usr/bin/python -t
-# base classes and functions for dumping out package Metadata
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2004 Duke University
-
-# $Id$
-
-import os
-import rpm
-import exceptions
-import md5
-import sha
-import types
-import struct
-import re
-import stat
-import bz2
-try:
-    import sqlitecachec
-except ImportError:
-    pass
-
-# done to fix gzip randomly changing the checksum
-import gzip
-from gzip import write32u, FNAME
-
-__all__ = ["GzipFile","open"]
-
-class GzipFile(gzip.GzipFile):
-    def _write_gzip_header(self):
-        self.fileobj.write('\037\213')             # magic header
-        self.fileobj.write('\010')                 # compression method
-        fname = self.filename[:-3]
-        flags = 0
-        if fname:
-            flags = FNAME
-        self.fileobj.write(chr(flags))
-        write32u(self.fileobj, long(0))
-        self.fileobj.write('\002')
-        self.fileobj.write('\377')
-        if fname:
-            self.fileobj.write(fname + '\000')
-
-
-def _gzipOpen(filename, mode="rb", compresslevel=9):
-    return GzipFile(filename, mode, compresslevel)
-    
-def bzipFile(source, dest):
-    
-    s_fn = open(source, 'rb')
-    destination = bz2.BZ2File(dest, 'w', compresslevel=9)
-
-    while True:
-        data = s_fn.read(1024000)
-        
-        if not data: break
-        destination.write(data)
-
-    destination.close()
-    s_fn.close()
-    
-
-def returnFD(filename):
-    try:
-        fdno = os.open(filename, os.O_RDONLY)
-    except OSError:
-        raise MDError, "Error opening file"
-    return fdno
-    
-def returnHdr(ts, package):
-    """hand back the rpm header or raise an Error if the pkg is fubar"""
-    opened_here = 0
-    try:
-        if type(package) is types.StringType:
-            opened_here = 1
-            fdno = os.open(package, os.O_RDONLY)
-        else: 
-            fdno = package # let's assume this is an fdno and go with it :)
-    except OSError:
-        raise MDError, "Error opening file"
-    ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
-    try:
-        hdr = ts.hdrFromFdno(fdno)
-    except rpm.error:
-        raise MDError, "Error opening package"
-    if type(hdr) != rpm.hdr:
-        raise MDError, "Error opening package"
-    ts.setVSFlags(0)
-    
-    if opened_here:
-        os.close(fdno)
-        del fdno
-
-    return hdr
-    
-def getChecksum(sumtype, file, CHUNK=2**16):
-    """takes filename, hand back Checksum of it
-       sumtype = md5 or sha
-       filename = /path/to/file
-       CHUNK=65536 by default"""
-       
-    # chunking brazenly lifted from Ryan Tomayko
-    opened_here = 0
-    try:
-        if type(file) is not types.StringType:
-            fo = file # assume it's a file-like-object
-        else:
-            opened_here = 1
-            fo = open(file, 'rb', CHUNK)
-            
-        if sumtype == 'md5':
-            sum = md5.new()
-        elif sumtype == 'sha':
-            sum = sha.new()
-        else:
-            raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype
-        chunk = fo.read
-        while chunk: 
-            chunk = fo.read(CHUNK)
-            sum.update(chunk)
-
-        if opened_here:
-            fo.close()
-            del fo
-            
-        return sum.hexdigest()
-    except:
-        raise MDError, 'Error opening file for checksum: %s' % file
-
-
-def utf8String(string):
-    """hands back a unicoded string"""
-    if string is None:
-        return ''
-    elif isinstance(string, unicode):    
-        return string
-    try:
-        x = unicode(string, 'ascii')
-        return string
-    except UnicodeError:
-        encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
-        for enc in encodings:
-            try:
-                x = unicode(string, enc)
-            except UnicodeError:
-                pass
-            else:
-                if x.encode(enc) == string:
-                    return x.encode('utf-8')
-    newstring = ''
-    for char in string:
-        if ord(char) > 127:
-            newstring = newstring + '?'
-        else:
-            newstring = newstring + char
-    return newstring
-
-        
-def byteranges(file):
-    """takes an rpm file or fileobject and returns byteranges for location of the header"""
-    opened_here = 0
-    if type(file) is not types.StringType:
-        fo = file
-    else:
-        opened_here = 1
-        fo = open(file, 'r')
-    #read in past lead and first 8 bytes of sig header
-    fo.seek(104)
-    # 104 bytes in
-    binindex = fo.read(4)
-    # 108 bytes in
-    (sigindex, ) = struct.unpack('>I', binindex)
-    bindata = fo.read(4)
-    # 112 bytes in
-    (sigdata, ) = struct.unpack('>I', bindata)
-    # each index is 4 32bit segments - so each is 16 bytes
-    sigindexsize = sigindex * 16
-    sigsize = sigdata + sigindexsize
-    # we have to round off to the next 8 byte boundary
-    disttoboundary = (sigsize % 8)
-    if disttoboundary != 0:
-        disttoboundary = 8 - disttoboundary
-    # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
-    hdrstart = 112 + sigsize  + disttoboundary
-    
-    fo.seek(hdrstart) # go to the start of the header
-    fo.seek(8,1) # read past the magic number and reserved bytes
-
-    binindex = fo.read(4) 
-    (hdrindex, ) = struct.unpack('>I', binindex)
-    bindata = fo.read(4)
-    (hdrdata, ) = struct.unpack('>I', bindata)
-    
-    # each index is 4 32bit segments - so each is 16 bytes
-    hdrindexsize = hdrindex * 16 
-    # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
-    # end of the sig and the header.
-    hdrsize = hdrdata + hdrindexsize + 16
-    
-    # header end is hdrstart + hdrsize 
-    hdrend = hdrstart + hdrsize 
-    if opened_here:
-        fo.close()
-        del fo
-    return (hdrstart, hdrend)
-    
-
-class MDError(exceptions.Exception):
-    def __init__(self, value=None):
-        exceptions.Exception.__init__(self)
-        self.value = value
-    
-    def __str__(self):
-        return self.value
-
-
-
-class RpmMetaData:
-    """each rpm is one object, you pass it an rpm file
-       it opens the file, and pulls the information out in bite-sized chunks :)
-    """
-
-    mode_cache = {}
-
-    def __init__(self, ts, basedir, filename, options):
-        try:
-            stats = os.stat(os.path.join(basedir, filename))
-            self.size = stats[6]
-            self.mtime = stats[8]
-            del stats
-        except OSError, e:
-            raise MDError, "Error Stat'ing file %s %s" % (basedir, filename)
-        self.options = options
-        self.localurl = options['baseurl']
-        if options['noepoch']:
-           self.noepoch = ""
-        else:
-           self.noepoch = 0
-        self.relativepath = filename
-        fd = returnFD(os.path.join(basedir, filename))
-        self.hdr = returnHdr(ts, fd)
-        os.lseek(fd, 0, 0)
-        fo = os.fdopen(fd, 'rb')
-        self.pkgid = self.doChecksumCache(fo)
-        fo.seek(0)
-        (self.rangestart, self.rangeend) = byteranges(fo)
-        fo.close()
-        del fo
-        del fd
-        
-        # setup our regex objects
-        fileglobs = options['file-pattern-match']
-        #['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
-        dirglobs = options['dir-pattern-match']
-        #['.*bin\/.*', '^\/etc\/.*']
-        self.dirrc = []
-        self.filerc = []
-        for glob in fileglobs:
-            self.filerc.append(re.compile(glob))
-        
-        for glob in dirglobs:
-            self.dirrc.append(re.compile(glob))
-            
-        self.filenames = []
-        self.dirnames = []
-        self.ghostnames = []
-        self.genFileLists()
-
-    def arch(self):
-        if self.tagByName('sourcepackage') == 1 or not self.tagByName('sourcerpm'):
-            return 'src'
-        else:
-            return self.tagByName('arch')
-
-    def _correctFlags(self, flags):
-        returnflags=[]
-        if flags is None:
-            return returnflags
-
-        if type(flags) is not types.ListType:
-            newflag = flags & 0xf
-            returnflags.append(newflag)
-        else:
-            for flag in flags:
-                newflag = flag
-                if flag is not None:
-                    newflag = flag & 0xf
-                returnflags.append(newflag)
-        return returnflags
-
-    def _checkPreReq(self, flags):
-        reqs=[]
-        if flags is None:
-            return reqs
-
-        if type(flags) is not types.ListType:
-            flags = [flags]
-        for flag in flags:
-            newflag = flag
-            if flag is not None:
-                newflag = flag & 64
-                if newflag == 64:
-                    reqs.append(1)
-                else:
-                    reqs.append(0)
-        return reqs
-
-
-    def _correctVersion(self, vers):
-        returnvers = []
-        vertuple = (None, None, None)
-        if vers is None:
-            returnvers.append(vertuple)
-            return returnvers
-            
-        if type(vers) is not types.ListType:
-            if vers is not None:
-                vertuple = self._stringToVersion(vers)
-            else:
-                vertuple = (None, None, None)
-            returnvers.append(vertuple)
-        else:
-            for ver in vers:
-                if ver is not None:
-                    vertuple = self._stringToVersion(ver)
-                else:
-                    vertuple = (None, None, None)
-                returnvers.append(vertuple)
-        return returnvers
-            
-    
-    def _stringToVersion(self, strng):
-        i = strng.find(':')
-        if i != -1 and strng[:i].isdigit():
-            epoch = strng[:i]
-        else:
-            i = -1
-            epoch = self.noepoch
-        j = strng.rfind('-')
-        if j != -1:
-            if strng[i + 1:j] == '':
-                version = None
-            else:
-                version = strng[i + 1:j]
-            release = strng[j + 1:]
-        else:
-            if strng[i + 1:] == '':
-                version = None
-            else:
-                version = strng[i + 1:]
-            release = None
-        return (epoch, version, release)
-
-    ###########
-    # Title: Remove duplicates from a sequence
-    # Submitter: Tim Peters 
-    # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560                      
-        
-    def _uniq(self,s):
-        """Return a list of the elements in s, but without duplicates.
-    
-        For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
-        unique("abcabc") some permutation of ["a", "b", "c"], and
-        unique(([1, 2], [2, 3], [1, 2])) some permutation of
-        [[2, 3], [1, 2]].
-    
-        For best speed, all sequence elements should be hashable.  Then
-        unique() will usually work in linear time.
-    
-        If not possible, the sequence elements should enjoy a total
-        ordering, and if list(s).sort() doesn't raise TypeError it's
-        assumed that they do enjoy a total ordering.  Then unique() will
-        usually work in O(N*log2(N)) time.
-    
-        If that's not possible either, the sequence elements must support
-        equality-testing.  Then unique() will usually work in quadratic
-        time.
-        """
-    
-        n = len(s)
-        if n == 0:
-            return []
-    
-        # Try using a dict first, as that's the fastest and will usually
-        # work.  If it doesn't work, it will usually fail quickly, so it
-        # usually doesn't cost much to *try* it.  It requires that all the
-        # sequence elements be hashable, and support equality comparison.
-        u = {}
-        try:
-            for x in s:
-                u[x] = 1
-        except TypeError:
-            del u  # move on to the next method
-        else:
-            ret = u.keys()
-            ret.sort()
-            return ret
-    
-        # We can't hash all the elements.  Second fastest is to sort,
-        # which brings the equal elements together; then duplicates are
-        # easy to weed out in a single pass.
-        # NOTE:  Python's list.sort() was designed to be efficient in the
-        # presence of many duplicate elements.  This isn't true of all
-        # sort functions in all languages or libraries, so this approach
-        # is more effective in Python than it may be elsewhere.
-        try:
-            t = list(s)
-            t.sort()
-        except TypeError:
-            del t  # move on to the next method
-        else:
-            assert n > 0
-            last = t[0]
-            lasti = i = 1
-            while i < n:
-                if t[i] != last:
-                    t[lasti] = last = t[i]
-                    lasti += 1
-                i += 1
-            return t[:lasti]
-    
-        # Brute force is all that's left.
-        u = []
-        for x in s:
-            if x not in u:
-                u.append(x)
-        return u
-
-    def tagByName(self, tag):
-        data = self.hdr[tag]
-        if type(data) is types.ListType:
-            if len(data) > 0:
-                return data[0]
-            else:
-                return ''
-        else:
-            return data
-    
-    def listTagByName(self, tag):
-        """take a tag that should be a list and make sure it is one"""
-        lst = []
-        data = self.hdr[tag]
-        if data is None:
-            return lst
-            
-        if type(data) is types.ListType:
-            lst.extend(data)
-        else:
-            lst.append(data)
-        return lst
-
-        
-    def epoch(self):
-        if self.hdr['epoch'] is None:
-            return self.noepoch
-        else:
-            return self.tagByName('epoch')
-            
-    def genFileLists(self):
-        """produces lists of dirs and files for this header in two lists"""
-        
-        files = self.listTagByName('filenames')
-        fileflags = self.listTagByName('fileflags')
-        filemodes = self.listTagByName('filemodes')
-        filetuple = zip(files, filemodes, fileflags)
-        for (file, mode, flag) in filetuple:
-            #garbage checks
-            if mode is None or mode == '':
-                self.filenames.append(file)
-                continue
-            if not RpmMetaData.mode_cache.has_key(mode):
-                RpmMetaData.mode_cache[mode] = stat.S_ISDIR(mode)
-            if RpmMetaData.mode_cache[mode]:
-                self.dirnames.append(file)
-            else:
-                if flag is None:
-                    self.filenames.append(file)
-                else:
-                    if (flag & 64): 
-                        self.ghostnames.append(file)
-                        continue
-                    self.filenames.append(file)
-
-        
-    def usefulFiles(self):
-        """search for good files"""
-        returns = {}     
-        for item in self.filenames:
-            if item is None:
-                continue
-            for glob in self.filerc:
-                if glob.match(item):
-                    returns[item] = 1
-        return returns.keys()
-                    
-    def usefulGhosts(self):
-        """search for useful ghost file names"""
-        returns = {}
-        for item in self.ghostnames:
-            if item is None:
-                continue
-            for glob in self.filerc:
-                if glob.match(item):
-                    returns[item] = 1
-        return returns.keys()
-
-
-    def usefulDirs(self):
-        """search for good dirs"""
-        returns = {}
-        for item in self.dirnames:
-            if item is None:
-                continue
-            for glob in self.dirrc:
-                if glob.match(item):
-                    returns[item] = 1
-        return returns.keys()
-
-    
-    def depsList(self):
-        """returns a list of tuples of dependencies"""
-        # these should probably compress down duplicates too
-        lst = []
-        names = self.hdr[rpm.RPMTAG_REQUIRENAME]
-        tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
-        flags = self._correctFlags(tmpflags)
-        prereq = self._checkPreReq(tmpflags)
-        ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION])
-        if names is not None:
-            lst = zip(names, flags, ver, prereq)
-        return self._uniq(lst)
-        
-    def obsoletesList(self):
-        lst = []
-        names = self.hdr[rpm.RPMTAG_OBSOLETENAME]
-        tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS]
-        flags = self._correctFlags(tmpflags)
-        ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION])
-        if names is not None:
-            lst = zip(names, flags, ver)
-        return self._uniq(lst)
-
-    def conflictsList(self):
-        lst = []
-        names = self.hdr[rpm.RPMTAG_CONFLICTNAME]
-        tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS]
-        flags = self._correctFlags(tmpflags)
-        ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION])
-        if names is not None:
-            lst = zip(names, flags, ver)
-        return self._uniq(lst)
-
-    def providesList(self):
-        lst = []
-        names = self.hdr[rpm.RPMTAG_PROVIDENAME]
-        tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS]
-        flags = self._correctFlags(tmpflags)
-        ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION])
-        if names is not None:
-            lst = zip(names, flags, ver)
-        return self._uniq(lst)
-        
-    def changelogLists(self):
-        lst = []
-        names = self.listTagByName('changelogname')
-        times = self.listTagByName('changelogtime')
-        texts = self.listTagByName('changelogtext')
-        if len(names) > 0:
-            lst = zip(names, times, texts)
-        return lst
-    
-    def doChecksumCache(self, fo):
-        """return a checksum for a package:
-           - check if the checksum cache is enabled
-              if not - return the checksum
-              if so - check to see if it has a cache file
-                if so, open it and return the first line's contents
-                if not, grab the checksum and write it to a file for this pkg
-            """
-        if not self.options['cache']:
-            return getChecksum(self.options['sumtype'], fo)
-
-        t = []
-        if type(self.hdr[rpm.RPMTAG_SIGGPG]) is not types.NoneType:
-            t.append("".join(self.hdr[rpm.RPMTAG_SIGGPG]))   
-        if type(self.hdr[rpm.RPMTAG_SIGPGP]) is not types.NoneType:
-            t.append("".join(self.hdr[rpm.RPMTAG_SIGPGP]))
-        if type(self.hdr[rpm.RPMTAG_HDRID]) is not types.NoneType:
-            t.append("".join(self.hdr[rpm.RPMTAG_HDRID]))
-
-        key = md5.new("".join(t)).hexdigest()
-                                        
-        csumtag = '%s-%s-%s-%s' % (os.path.basename(self.relativepath), 
-                                   self.hdr[rpm.RPMTAG_SHA1HEADER], 
-                                   self.size, self.mtime)
-        csumfile = '%s/%s' % (self.options['cachedir'], csumtag)
-        if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]:
-            csumo = open(csumfile, 'r')
-            checksum = csumo.readline()
-            csumo.close()
-            
-        else:
-            checksum = getChecksum(self.options['sumtype'], fo)
-            csumo = open(csumfile, 'w')
-            csumo.write(checksum)
-            csumo.close()
-            
-        return checksum
-
-
-    
-def generateXML(doc, node, formatns, rpmObj, sumtype):
-    """takes an xml doc object and a package metadata entry node, populates a 
-       package node with the md information"""
-    ns = node.ns()
-    pkgNode = node.newChild(None, "package", None)
-    pkgNode.newProp('type', 'rpm')
-    pkgNode.newChild(None, 'name', rpmObj.tagByName('name'))
-    pkgNode.newChild(None, 'arch', rpmObj.arch())
-    version = pkgNode.newChild(None, 'version', None)
-    if str(rpmObj.epoch()):
-        version.newProp('epoch', str(rpmObj.epoch()))
-    version.newProp('ver', str(rpmObj.tagByName('version')))
-    version.newProp('rel', str(rpmObj.tagByName('release')))
-    csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid)
-    csum.newProp('type', sumtype)
-    csum.newProp('pkgid', 'YES')
-    for tag in ['summary', 'description', 'packager', 'url']:
-        value = rpmObj.tagByName(tag)
-        value = utf8String(value)
-        value = re.sub("\n$", '', value)
-        entry = pkgNode.newChild(None, tag, None)
-        entry.addContent(value)
-        
-    time = pkgNode.newChild(None, 'time', None)
-    time.newProp('file', str(rpmObj.mtime))
-    time.newProp('build', str(rpmObj.tagByName('buildtime')))
-    size = pkgNode.newChild(None, 'size', None)
-    size.newProp('package', str(rpmObj.size))
-    size.newProp('installed', str(rpmObj.tagByName('size')))
-    size.newProp('archive', str(rpmObj.tagByName('archivesize')))
-    location = pkgNode.newChild(None, 'location', None)
-    if rpmObj.localurl is not None:
-        location.newProp('xml:base', rpmObj.localurl)
-    location.newProp('href', rpmObj.relativepath)
-    format = pkgNode.newChild(ns, 'format', None)
-    for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']:
-        value = rpmObj.tagByName(tag)
-        value = utf8String(value)
-        value = re.sub("\n$", '', value)
-        entry = format.newChild(formatns, tag, None)
-        entry.addContent(value)
-        
-    hr = format.newChild(formatns, 'header-range', None)
-    hr.newProp('start', str(rpmObj.rangestart))
-    hr.newProp('end', str(rpmObj.rangeend))
-    for (lst, nodename) in [(rpmObj.providesList(), 'provides'),
-                            (rpmObj.conflictsList(), 'conflicts'),
-                            (rpmObj.obsoletesList(), 'obsoletes')]:
-        if len(lst) > 0:               
-            rpconode = format.newChild(formatns, nodename, None)
-            for (name, flags, (e,v,r)) in lst:
-                entry = rpconode.newChild(formatns, 'entry', None)
-                entry.newProp('name', name)
-                if flags != 0:
-                    if flags == 2: arg = 'LT'
-                    if flags == 4: arg = 'GT'
-                    if flags == 8: arg = 'EQ'
-                    if flags == 10: arg = 'LE'
-                    if flags == 12: arg = 'GE'
-                    entry.newProp('flags', arg)
-                    # if we've got a flag we've got a version, I hope :)
-                    if str(e):
-                        entry.newProp('epoch', str(e))
-                    if v:
-                        entry.newProp('ver', str(v))
-                    if r:
-                        entry.newProp('rel', str(r))
-
-    depsList = rpmObj.depsList()
-    if len(depsList) > 0:
-        rpconode = format.newChild(formatns, 'requires', None)    
-        for (name, flags, (e,v,r), prereq) in depsList:
-            entry = rpconode.newChild(formatns, 'entry', None)
-            entry.newProp('name', name)
-            if flags != 0:
-                if flags == 2: arg = 'LT'
-                if flags == 4: arg = 'GT'
-                if flags == 8: arg = 'EQ'
-                if flags == 10: arg = 'LE'
-                if flags == 12: arg = 'GE'
-                entry.newProp('flags', arg)
-                # if we've got a flag we've got a version, I hope :)
-                if str(e):
-                    entry.newProp('epoch', str(e))
-                if v:
-                    entry.newProp('ver', str(v))
-                if r:
-                    entry.newProp('rel', str(r))
-            if prereq == 1:
-                entry.newProp('pre', str(prereq))
-        
-    ff = rpmObj.usefulFiles()
-    ff.sort()
-    for file in ff:
-        files = format.newChild(None, 'file', None)
-        file = utf8String(file)
-        files.addContent(file)
-    ff = rpmObj.usefulDirs()
-    ff.sort()
-    for directory in ff:
-        files = format.newChild(None, 'file', None)
-        directory = utf8String(directory)
-        files.addContent(directory)
-        files.newProp('type', 'dir')
-    ff = rpmObj.usefulGhosts()
-    ff.sort()
-    for directory in ff:
-        files = format.newChild(None, 'file', None)
-        directory = utf8String(directory)
-        files.addContent(directory)
-        files.newProp('type', 'ghost')
-
-    return pkgNode
-    
-def fileListXML(doc, node, rpmObj):
-    pkg = node.newChild(None, 'package', None)
-    pkg.newProp('pkgid', rpmObj.pkgid)
-    pkg.newProp('name', rpmObj.tagByName('name'))
-    pkg.newProp('arch', rpmObj.arch())
-    version = pkg.newChild(None, 'version', None)
-    if str(rpmObj.epoch()):
-        version.newProp('epoch', str(rpmObj.epoch()))
-    version.newProp('ver', str(rpmObj.tagByName('version')))
-    version.newProp('rel', str(rpmObj.tagByName('release')))
-    for file in rpmObj.filenames:
-        files = pkg.newChild(None, 'file', None)
-        file = utf8String(file)
-        files.addContent(file)
-    for directory in rpmObj.dirnames:
-        files = pkg.newChild(None, 'file', None)
-        directory = utf8String(directory)
-        files.addContent(directory)
-        files.newProp('type', 'dir')
-    for ghost in rpmObj.ghostnames:
-        files = pkg.newChild(None, 'file', None)
-        ghost = utf8String(ghost)
-        files.addContent(ghost)
-        files.newProp('type', 'ghost')
-    return pkg
-       
-def otherXML(doc, node, rpmObj):
-    pkg = node.newChild(None, 'package', None)
-    pkg.newProp('pkgid', rpmObj.pkgid)
-    pkg.newProp('name', rpmObj.tagByName('name'))
-    pkg.newProp('arch', rpmObj.arch())
-    version = pkg.newChild(None, 'version', None)
-    if str(rpmObj.epoch()):
-        version.newProp('epoch', str(rpmObj.epoch()))
-    version.newProp('ver', str(rpmObj.tagByName('version')))
-    version.newProp('rel', str(rpmObj.tagByName('release')))
-    clogs = rpmObj.changelogLists()
-    for (name, time, text) in clogs:
-        clog = pkg.newChild(None, 'changelog', None)
-        clog.addContent(utf8String(text))
-        clog.newProp('author', utf8String(name))
-        clog.newProp('date', str(time))
-    return pkg
-    
-def repoXML(node, cmds):
-    """generate the repomd.xml file that stores the info on the other files"""
-    sumtype = cmds['sumtype']
-    workfiles = [(cmds['otherfile'], 'other',),
-                 (cmds['filelistsfile'], 'filelists'),
-                 (cmds['primaryfile'], 'primary')]
-    repoid='garbageid'
-    
-    repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
-    
-    if cmds['database']:
-        try:
-            dbversion = str(sqlitecachec.DBVERSION)
-        except AttributeError:
-            dbversion = '9'
-        rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
-
-    for (file, ftype) in workfiles:
-        complete_path = os.path.join(repopath, file)
-        
-        zfo = _gzipOpen(complete_path)
-        uncsum = getChecksum(sumtype, zfo)
-        zfo.close()
-        csum = getChecksum(sumtype, complete_path)
-        timestamp = os.stat(complete_path)[8]
-        
-        db_csums = {}
-        db_compressed_sums = {}
-        
-        if cmds['database']:
-            if ftype == 'primary':
-                rp.getPrimary(complete_path, csum)
-                            
-            elif ftype == 'filelists':
-                rp.getFilelists(complete_path, csum)
-                
-            elif ftype == 'other':
-                rp.getOtherdata(complete_path, csum)
-            
-
-            tmp_result_name = '%s.xml.gz.sqlite' % ftype
-            tmp_result_path = os.path.join(repopath, tmp_result_name)
-            good_name = '%s.sqlite' % ftype
-            resultpath = os.path.join(repopath, good_name)
-            
-            # rename from silly name to not silly name
-            os.rename(tmp_result_path, resultpath)
-            compressed_name = '%s.bz2' % good_name
-            result_compressed = os.path.join(repopath, compressed_name)
-            db_csums[ftype] = getChecksum(sumtype, resultpath)
-            
-            # compress the files
-            bzipFile(resultpath, result_compressed)
-            # csum the compressed file
-            db_compressed_sums[ftype] = getChecksum(sumtype, result_compressed)
-            # remove the uncompressed file
-            os.unlink(resultpath)
-
-            # timestamp the compressed file
-            db_timestamp = os.stat(result_compressed)[8]
-            
-            # add this data as a section to the repomdxml
-            db_data_type = '%s_db' % ftype
-            data = node.newChild(None, 'data', None)
-            data.newProp('type', db_data_type)
-            location = data.newChild(None, 'location', None)
-            if cmds['baseurl'] is not None:
-                location.newProp('xml:base', cmds['baseurl'])
-            
-            location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
-            checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
-            checksum.newProp('type', sumtype)
-            db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
-            unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
-            unchecksum.newProp('type', sumtype)
-            database_version = data.newChild(None, 'database_version', dbversion)
-            
-            
-        data = node.newChild(None, 'data', None)
-        data.newProp('type', ftype)
-        location = data.newChild(None, 'location', None)
-        if cmds['baseurl'] is not None:
-            location.newProp('xml:base', cmds['baseurl'])
-        location.newProp('href', os.path.join(cmds['finaldir'], file))
-        checksum = data.newChild(None, 'checksum', csum)
-        checksum.newProp('type', sumtype)
-        timestamp = data.newChild(None, 'timestamp', str(timestamp))
-        unchecksum = data.newChild(None, 'open-checksum', uncsum)
-        unchecksum.newProp('type', sumtype)
-    
-    # if we've got a group file then checksum it once and be done
-    if cmds['groupfile'] is not None:
-        grpfile = cmds['groupfile']
-        timestamp = os.stat(grpfile)[8]
-        sfile = os.path.basename(grpfile)
-        fo = open(grpfile, 'r')
-        output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
-        output.write(fo.read())
-        output.close()
-        fo.seek(0)
-        csum = getChecksum(sumtype, fo)
-        fo.close()
-
-        data = node.newChild(None, 'data', None)
-        data.newProp('type', 'group')
-        location = data.newChild(None, 'location', None)
-        if cmds['baseurl'] is not None:
-            location.newProp('xml:base', cmds['baseurl'])
-        location.newProp('href', os.path.join(cmds['finaldir'], sfile))
-        checksum = data.newChild(None, 'checksum', csum)
-        checksum.newProp('type', sumtype)
-        timestamp = data.newChild(None, 'timestamp', str(timestamp))
-    
-        
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index bef0225..1650a38 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -17,30 +17,27 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 # Copyright 2004 Duke University
 
-# $Id$
-
 
 import os
 import sys
 import getopt
-import rpm
 import libxml2
 import string
 import fnmatch
 import shutil
+import rpm
 
-import dumpMetadata
-import readMetadata
-from dumpMetadata import _gzipOpen
-__version__ = '0.4.9'
+# for now, for later, we move all this around
+import createrepo
+from createrepo import MDError
+import createrepo.yumbased
+import createrepo.utils
 
-def errorprint(stuff):
-    print >> sys.stderr, stuff
+from createrepo.utils import _gzipOpen, errorprint, _
 
-def _(args):
-    """Stub function for translation"""
-    return args
+__version__ = '0.9'
 
+# cli
 def usage(retval=1):
     print _("""
     createrepo [options] directory-of-packages
@@ -69,6 +66,7 @@ def usage(retval=1):
 
     sys.exit(retval)
 
+# module
 class MetaDataGenerator:
     def __init__(self, cmds):
         self.cmds = cmds
@@ -76,6 +74,7 @@ class MetaDataGenerator:
         self.pkgcount = 0
         self.files = []
 
+    # module
     def _os_path_walk(self, top, func, arg):
         """Directory tree walk with callback function.
          copy of os.path.walk, fixes the link/stating problem
@@ -90,7 +89,7 @@ class MetaDataGenerator:
             name = os.path.join(top, name)
             if os.path.isdir(name):
                 self._os_path_walk(name, func, arg)
-
+    # module
     def getFileList(self, basepath, directory, ext):
         """Return all files in path matching ext, store them in filelist,
         recurse dirs. Returns a list object"""
@@ -112,7 +111,7 @@ class MetaDataGenerator:
         startdir = os.path.join(basepath, directory) + '/'
         self._os_path_walk(startdir, extension_visitor, filelist)
         return filelist
-
+    #module
     def checkTimeStamps(self, directory):
         if self.cmds['checkts']:
             files = self.getFileList(self.cmds['basedir'], directory, '.rpm')
@@ -124,7 +123,7 @@ class MetaDataGenerator:
                 if os.path.getctime(fn) > self.cmds['mdtimestamp']:
                     return False
         return True
-
+    #module
     def trimRpms(self, files):
         badrpms = []
         for file in files:
@@ -144,7 +143,7 @@ class MetaDataGenerator:
         # rpms we're going to be dealing with
         if self.cmds['update']:
             #build the paths
-            basefile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile'])
+            primaryfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile'])
             flfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['filelistsfile'])
             otherfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['otherfile'])
             opts = {
@@ -152,71 +151,62 @@ class MetaDataGenerator:
                 'pkgdir' : os.path.normpath(os.path.join(self.cmds['basedir'], directory))
             }
             #and scan the old repo
-            self.oldData = readMetadata.MetadataIndex(self.cmds['outputdir'],
-                                                      basefile, flfile, otherfile, opts)
+            self.oldData = createrepo.readMetadata.MetadataIndex(self.cmds['outputdir'],
+                                                      primaryfile, flfile, otherfile, opts)
         if self.cmds['pkglist']:
-            files = self.cmds['pkglist']
+            packages = self.cmds['pkglist']
         else:
-            files = self.getFileList(self.cmds['basedir'], directory, '.rpm')
+            packages = self.getFileList(self.cmds['basedir'], directory, '.rpm')
             
-        files = self.trimRpms(files)
-        self.pkgcount = len(files)
+        packages = self.trimRpms(packages)
+        self.pkgcount = len(packages)
         self.openMetadataDocs()
-        self.writeMetadataDocs(files, directory)
+        self.writeMetadataDocs(packages, directory)
         self.closeMetadataDocs()
 
-
+    # module
     def openMetadataDocs(self):
-        self._setupBase()
-        self._setupFilelists()
-        self._setupOther()
-
-    def _setupBase(self):
-        # setup the base metadata doc
-        self.basedoc = libxml2.newDoc("1.0")
-        self.baseroot =  self.basedoc.newChild(None, "metadata", None)
-        basens = self.baseroot.newNs('http://linux.duke.edu/metadata/common', None)
-        self.formatns = self.baseroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm')
-        self.baseroot.setNs(basens)
-        basefilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile'])
-        self.basefile = _gzipOpen(basefilepath, 'w')
-        self.basefile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
-        self.basefile.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
+        self.primaryfile = self._setupPrimary()
+        self.flfile = self._setupFilelists()
+        self.otherfile = self._setupOther()
+
+    def _setupPrimary(self):
+        # setup the primary metadata file
+        primaryfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile'])
+        fo = _gzipOpen(primaryfilepath, 'w')
+        fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
                        self.pkgcount)
+        return fo
 
     def _setupFilelists(self):
-        # setup the file list doc
-        self.filesdoc = libxml2.newDoc("1.0")
-        self.filesroot = self.filesdoc.newChild(None, "filelists", None)
-        filesns = self.filesroot.newNs('http://linux.duke.edu/metadata/filelists', None)
-        self.filesroot.setNs(filesns)
+        # setup the filelist file
         filelistpath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['filelistsfile'])
-        self.flfile = _gzipOpen(filelistpath, 'w')
-        self.flfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
-        self.flfile.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">\n' %
+        fo = _gzipOpen(filelistpath, 'w')
+        fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">\n' %
                        self.pkgcount)
-
+        return fo
+        
     def _setupOther(self):
-        # setup the other doc
-        self.otherdoc = libxml2.newDoc("1.0")
-        self.otherroot = self.otherdoc.newChild(None, "otherdata", None)
-        otherns = self.otherroot.newNs('http://linux.duke.edu/metadata/other', None)
-        self.otherroot.setNs(otherns)
+        # setup the other file
         otherfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['otherfile'])
-        self.otherfile = _gzipOpen(otherfilepath, 'w')
-        self.otherfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
-        self.otherfile.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">\n' %
+        fo = _gzipOpen(otherfilepath, 'w')
+        fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">\n' %
                        self.pkgcount)
-
-    def _getNodes(self, file, directory, current):
+        return fo
+        
+    def _getNodes(self, pkg, directory, current):
+        # delete function since it seems to nothing anymore
         basenode = None
         filesnode = None
         othernode = None
         try:
             rpmdir= os.path.join(self.cmds['basedir'], directory)
-            mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, file, self.cmds)
+            mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, pkg, self.cmds)
         except dumpMetadata.MDError, e:
-            errorprint('\n%s - %s' % (e, file))
+            errorprint('\n%s - %s' % (e, pkg))
             return None
         try:
             basenode = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype'])
@@ -235,48 +225,60 @@ class MetaDataGenerator:
             return None
         return basenode,filesnode,othernode
 
-    def writeMetadataDocs(self, files, directory, current=0):
-        for file in files:
+    def read_in_package(self, directory, rpmfile):
+        # XXX fixme try/excepts here
+        # directory is stupid - just make it part of the class
+        rpmfile = '%s/%s/%s' % (self.cmds['basedir'], directory, rpmfile)
+        po = createrepo.yumbased.CreateRepoPackage(self.ts, rpmfile)
+        return po
+
+    def writeMetadataDocs(self, pkglist, directory, current=0):
+        # FIXME
+        # directory is unused, kill it, pkglist should come from self
+        # I don't see why current needs to be this way at all
+        for pkg in pkglist:
             current+=1
             recycled = False
             sep = '-'
+            
+            # look to see if we can get the data from the old repodata
+            # if so write this one out that way
             if self.cmds['update']:
                 #see if we can pull the nodes from the old repo
-                nodes = self.oldData.getNodes(file)
+                nodes = self.oldData.getNodes(pkg)
                 if nodes is not None:
                     recycled = True
-                    sep = '*'
+
+            
+            # otherwise do it individually
             if not recycled:
                 #scan rpm files
-                nodes = self._getNodes(file, directory, current)
-            if nodes is None:
-                continue
-            basenode, filenode, othernode = nodes
-            del nodes
+                po = self.read_in_package(directory, pkg)
+                self.primaryfile.write(po.do_primary_xml_dump())
+                self.flfile.write(po.do_filelists_xml_dump())
+                self.otherfile.write(po.do_other_xml_dump())
+            else:
+                sep = '*'
+                primarynode, filenode, othernode = nodes    
+
+                for node, outfile in ((primarynode,self.primaryfile),
+                                      (filenode,self.flfile),
+                                      (othernode,self.otherfile)):
+                    if node is None:
+                        break
+                    output = node.serialize('UTF-8', self.cmds['pretty'])
+                    outfile.write(output)
+                    outfile.write('\n')
+  
+                    self.oldData.freeNodes(pkg)
+
             if not self.cmds['quiet']:
                 if self.cmds['verbose']:
-                    print '%d/%d %s %s' % (current, self.pkgcount, sep, file)
+                    print '%d/%d %s %s' % (current, self.pkgcount, sep, pkg)
                 else:
                     sys.stdout.write('\r' + ' ' * 80)
-                    sys.stdout.write("\r%d/%d %s %s" % (current, self.pkgcount, sep, file))
+                    sys.stdout.write("\r%d/%d %s %s" % (current, self.pkgcount, sep, pkg))
                     sys.stdout.flush()
-            if basenode is None:
-                continue
-
-            for node, outfile in ((basenode,self.basefile),
-                                  (filenode,self.flfile),
-                                  (othernode,self.otherfile)):
-                if node is None:
-                    break
-                output = node.serialize('UTF-8', self.cmds['pretty'])
-                outfile.write(output)
-                outfile.write('\n')
-                if not recycled:
-                    #recycled nodes can be multiply referenced
-                    node.unlinkNode()
-                    node.freeNode()
-            if recycled:
-                self.oldData.freeNodes(file)
 
         return current
 
@@ -288,21 +290,18 @@ class MetaDataGenerator:
         # save them up to the tmp locations:
         if not self.cmds['quiet']:
             print _('Saving Primary metadata')
-        self.basefile.write('\n</metadata>')
-        self.basefile.close()
-        self.basedoc.freeDoc()
+        self.primaryfile.write('\n</metadata>')
+        self.primaryfile.close()
 
         if not self.cmds['quiet']:
             print _('Saving file lists metadata')
         self.flfile.write('\n</filelists>')
         self.flfile.close()
-        self.filesdoc.freeDoc()
 
         if not self.cmds['quiet']:
             print _('Saving other metadata')
         self.otherfile.write('\n</otherdata>')
         self.otherfile.close()
-        self.otherdoc.freeDoc()
 
     def doRepoMetadata(self):
         """wrapper to generate the repomd.xml file that stores the info on the other files"""
@@ -313,8 +312,8 @@ class MetaDataGenerator:
         repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['repomdfile'])
 
         try:
-            dumpMetadata.repoXML(reporoot, self.cmds)
-        except dumpMetadata.MDError, e:
+            createrepo.repoXML(reporoot, self.cmds)
+        except MDError, e:
             errorprint(_('Error generating repo xml file: %s') % e)
             sys.exit(1)
 
diff --git a/readMetadata.py b/readMetadata.py
deleted file mode 100644
index 0d9dacf..0000000
--- a/readMetadata.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/python -t
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2006 Red Hat
-
-import os
-import sys
-import libxml2
-import stat
-
-def errorprint(stuff):
-    print >> sys.stderr, stuff
-
-def _(args):
-    """Stub function for translation"""
-    return args
-
-class MetadataIndex(object):
-
-    def __init__(self, outputdir, basefile, filelistfile, otherfile, opts=None):
-        if opts is None:
-            opts = {}
-        self.opts = opts
-        self.outputdir = outputdir
-        self.files = {'base' : basefile,
-                      'filelist' : filelistfile,
-                      'other' : otherfile}
-        self.scan()
-
-    def scan(self):
-        """Read in and index old repo data"""
-        self.basenodes = {}
-        self.filesnodes = {}
-        self.othernodes = {}
-        self.pkg_ids = {}
-        if self.opts.get('verbose'):
-            print _("Scanning old repo data")
-        for file in self.files.values():
-            if not os.path.exists(file):
-                #cannot scan
-                errorprint(_("Previous repo file missing: %s") % file)
-                return
-        root = libxml2.parseFile(self.files['base']).getRootElement()
-        self._scanPackageNodes(root, self._handleBase)
-        if self.opts.get('verbose'):
-            print _("Indexed %i base nodes" % len(self.basenodes))
-        root = libxml2.parseFile(self.files['filelist']).getRootElement()
-        self._scanPackageNodes(root, self._handleFiles)
-        if self.opts.get('verbose'):
-            print _("Indexed %i filelist nodes" % len(self.filesnodes))
-        root = libxml2.parseFile(self.files['other']).getRootElement()
-        self._scanPackageNodes(root, self._handleOther)
-        if self.opts.get('verbose'):
-            print _("Indexed %i other nodes" % len(self.othernodes))
-        #reverse index pkg ids to track references
-        self.pkgrefs = {}
-        for relpath, pkgid in self.pkg_ids.iteritems():
-            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
-
-    def _scanPackageNodes(self, root, handler):
-        node = root.children
-        while node is not None:
-            if node.type != "element":
-                node = node.next
-                continue
-            if node.name == "package":
-                handler(node)
-            node = node.next
-
-    def _handleBase(self, node):
-        top = node
-        node = node.children
-        pkgid = None
-        mtime = None
-        size = None
-        relpath = None
-        while node is not None:
-            if node.type != "element":
-                node = node.next
-                continue
-            if node.name == "checksum":
-                pkgid = node.content
-            elif node.name == "time":
-                mtime = int(node.prop('file'))
-            elif node.name == "size":
-                size = int(node.prop('package'))
-            elif node.name == "location":
-                relpath = node.prop('href')
-            node = node.next
-        if relpath is None:
-            print _("Incomplete data for node")
-            return
-        if pkgid is None:
-            print _("pkgid missing for %s") % relpath
-            return
-        if mtime is None:
-            print _("mtime missing for %s") % relpath
-            return
-        if size is None:
-            print _("size missing for %s") % relpath
-            return
-        filepath = os.path.join(self.opts['pkgdir'], relpath)
-        try:
-            st = os.stat(filepath)
-        except OSError:
-            #file missing -- ignore
-            return
-        if not stat.S_ISREG(st.st_mode):
-            #ignore non files
-            return
-        #check size and mtime
-        if st.st_size != size:
-            if self.opts.get('verbose'):
-                print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
-            return
-        if st.st_mtime != mtime:
-            if self.opts.get('verbose'):
-                print _("Modification time changed for %s") % filepath
-            return
-        #otherwise we index
-        self.basenodes[relpath] = top
-        self.pkg_ids[relpath] = pkgid
-
-    def _handleFiles(self, node):
-        pkgid = node.prop('pkgid')
-        if pkgid:
-            self.filesnodes[pkgid] = node
-
-    def _handleOther(self, node):
-        pkgid = node.prop('pkgid')
-        if pkgid:
-            self.othernodes[pkgid] = node
-
-    def getNodes(self, relpath):
-        """Return base, filelist, and other nodes for file, if they exist
-
-        Returns a tuple of nodes, or None if not found
-        """
-        bnode = self.basenodes.get(relpath,None)
-        if bnode is None:
-            return None
-        pkgid = self.pkg_ids.get(relpath,None)
-        if pkgid is None:
-            print _("No pkgid found for: %s") % relpath
-            return None
-        fnode = self.filesnodes.get(pkgid,None)
-        if fnode is None:
-            return None
-        onode = self.othernodes.get(pkgid,None)
-        if onode is None:
-            return None
-        return bnode, fnode, onode
-
-    def freeNodes(self,relpath):
-        #causing problems
-        """Free up nodes corresponding to file, if possible"""
-        bnode = self.basenodes.get(relpath,None)
-        if bnode is None:
-            print "Missing node for %s" % relpath
-            return
-        bnode.unlinkNode()
-        bnode.freeNode()
-        del self.basenodes[relpath]
-        pkgid = self.pkg_ids.get(relpath,None)
-        if pkgid is None:
-            print _("No pkgid found for: %s") % relpath
-            return None
-        del self.pkg_ids[relpath]
-        dups = self.pkgrefs.get(pkgid)
-        dups.remove(relpath)
-        if len(dups):
-            #still referenced
-            return
-        del self.pkgrefs[pkgid]
-        for nodes in self.filesnodes, self.othernodes:
-            node = nodes.get(pkgid)
-            if node is not None:
-                node.unlinkNode()
-                node.freeNode()
-                del nodes[pkgid]
-
-
-if __name__ == "__main__":
-    #test code - attempts to read a repo in working directory
-    idx = MetadataIndex(".", "repodata/primary.xml.gz", "repodata/filelists.xml.gz",
-                        "repodata/other.xml.gz", {'verbose':1})



More information about the Rpm-metadata mailing list