[Rpm-metadata] bin/Makefile createrepo/__init__.py createrepo/Makefile createrepo/readMetadata.py createrepo.spec createrepo/utils.py createrepo/yumbased.py docs/Makefile dumpMetadata.py genpkgmetadata.py Makefile readMetadata.py
Seth Vidal
skvidal at linux.duke.edu
Thu Dec 20 07:21:34 UTC 2007
Makefile | 55 +-
bin/Makefile | 20 -
createrepo.spec | 10
createrepo/Makefile | 64 +++
createrepo/__init__.py | 141 +++++++
createrepo/readMetadata.py | 198 +++++++++
createrepo/utils.py | 101 +++++
createrepo/yumbased.py | 383 +++++++++++++++++++
docs/Makefile | 12
dumpMetadata.py | 896 ---------------------------------------------
genpkgmetadata.py | 189 ++++-----
readMetadata.py | 198 ---------
12 files changed, 1034 insertions(+), 1233 deletions(-)
New commits:
commit 7bf690b4bc2b79a8b12154ee774f80e93f6265ff
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Thu Dec 20 02:18:23 2007 -0500
Whew: this is the beginning of a big conversion of createrepo to use the yum modules,
behave more like a modular program and have a proper class structure. It's not done,
but it's a start.
diff --git a/Makefile b/Makefile
index a5f2ffb..b57acfc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,12 @@
-PACKAGE = createrepo
-VERSION = 0.4.10
+PKGNAME = createrepo
+VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec)
+RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec)
+CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE))
+PYTHON=python
+SUBDIRS = $(PKGNAME) bin docs
+PYFILES = $(wildcard *.py)
+
+
SHELL = /bin/sh
top_srcdir = .
srcdir = .
@@ -20,9 +27,9 @@ includedir = ${prefix}/include
oldincludedir = /usr/include
mandir = ${prefix}/share/man
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
top_builddir =
# all dirs
@@ -37,12 +44,8 @@ INSTALL_DATA = $(INSTALL) -m 644
INSTALL_MODULES = $(INSTALL) -m 755 -D
RM = rm -f
-SUBDIRS = bin docs
-
MODULES = $(srcdir)/genpkgmetadata.py \
- $(srcdir)/dumpMetadata.py \
- $(srcdir)/readMetadata.py \
- $(srcdir)/modifyrepo.py
+ $(srcdir)/modifyrepo.py
.SUFFIXES: .py .pyc
.py.pyc:
@@ -51,7 +54,7 @@ MODULES = $(srcdir)/genpkgmetadata.py \
all: $(MODULES)
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir VERSION=$(VERSION) PACKAGE=$(PACKAGE) DESTDIR=$(DESTDIR); \
+ $(MAKE) -C $$subdir VERSION=$(VERSION) PKGNAME=$(PKGNAME) DESTDIR=$(DESTDIR); \
done
check:
@@ -60,7 +63,7 @@ check:
install: all installdirs
$(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir)
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir install VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
installdirs:
@@ -74,13 +77,13 @@ uninstall:
$(RM) $(pkgdatadir)/$$module ; \
done
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir uninstall VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir uninstall VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
clean:
$(RM) *.pyc *.pyo
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir clean VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir clean VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
distclean: clean
@@ -88,7 +91,7 @@ distclean: clean
$(RM) core
$(RM) *~
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir distclean VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir distclean VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
mostlyclean:
@@ -102,12 +105,12 @@ maintainer-clean:
dist:
olddir=`pwd`; \
- distdir=$(PACKAGE)-$(VERSION); \
+ distdir=$(PKGNAME)-$(VERSION); \
$(RM) -r .disttmp; \
$(INSTALL_DIR) .disttmp; \
$(INSTALL_DIR) .disttmp/$$distdir; \
$(MAKE) distfiles
- distdir=$(PACKAGE)-$(VERSION); \
+ distdir=$(PKGNAME)-$(VERSION); \
cd .disttmp; \
tar -cvz > ../$$distdir.tar.gz $$distdir; \
cd $$olddir
@@ -115,23 +118,23 @@ dist:
daily:
olddir=`pwd`; \
- distdir=$(PACKAGE); \
+ distdir=$(PKGNAME); \
$(RM) -r .disttmp; \
$(INSTALL_DIR) .disttmp; \
$(INSTALL_DIR) .disttmp/$$distdir; \
$(MAKE) dailyfiles
day=`/bin/date +%Y%m%d`; \
- distdir=$(PACKAGE); \
+ distdir=$(PKGNAME); \
tarname=$$distdir-$$day ;\
cd .disttmp; \
- perl -pi -e "s/\#DATE\#/$$day/g" $$distdir/$(PACKAGE)-daily.spec; \
+ perl -pi -e "s/\#DATE\#/$$day/g" $$distdir/$(PKGNAME)-daily.spec; \
echo $$day; \
tar -cvz > ../$$tarname.tar.gz $$distdir; \
cd $$olddir
$(RM) -rf .disttmp
dailyfiles:
- distdir=$(PACKAGE); \
+ distdir=$(PKGNAME); \
cp \
$(srcdir)/*.py \
$(srcdir)/Makefile \
@@ -139,14 +142,14 @@ dailyfiles:
$(srcdir)/COPYING \
$(srcdir)/COPYING.lib \
$(srcdir)/README \
- $(srcdir)/$(PACKAGE).spec \
+ $(srcdir)/$(PKGNAME).spec \
$(top_srcdir)/.disttmp/$$distdir
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir dailyfiles VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir dailyfiles VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
distfiles:
- distdir=$(PACKAGE)-$(VERSION); \
+ distdir=$(PKGNAME)-$(VERSION); \
cp \
$(srcdir)/*.py \
$(srcdir)/Makefile \
@@ -154,10 +157,10 @@ distfiles:
$(srcdir)/COPYING \
$(srcdir)/COPYING.lib \
$(srcdir)/README \
- $(srcdir)/$(PACKAGE).spec \
+ $(srcdir)/$(PKGNAME).spec \
$(top_srcdir)/.disttmp/$$distdir
for subdir in $(SUBDIRS) ; do \
- $(MAKE) -C $$subdir distfiles VERSION=$(VERSION) PACKAGE=$(PACKAGE); \
+ $(MAKE) -C $$subdir distfiles VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
done
archive: dist
diff --git a/bin/Makefile b/bin/Makefile
index 52c1f50..4497230 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -18,9 +18,9 @@ includedir = ${prefix}/include
oldincludedir = /usr/include
mandir = ${prefix}/man
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
top_builddir = ../
# all dirs
@@ -36,16 +36,16 @@ INSTALL_MODULES = $(INSTALL) -m 755 -D
RM = rm -f
-all: $(srcdir)/$(PACKAGE)
+all: $(srcdir)/$(PKGNAME)
install: all installdirs
- $(INSTALL_BIN) $(srcdir)/$(PACKAGE) $(DESTDIR)$(bindir)/$(PACKAGE)
+ $(INSTALL_BIN) $(srcdir)/$(PKGNAME) $(DESTDIR)$(bindir)/$(PKGNAME)
$(INSTALL_BIN) $(srcdir)/modifyrepo $(DESTDIR)$(bindir)/modifyrepo
uninstall:
- $(RM) $(bindir)/$(PACKAGE)
+ $(RM) $(bindir)/$(PKGNAME)
@@ -67,19 +67,19 @@ maintainer-clean:
distfiles:
- distdir=$(PACKAGE)-$(VERSION); \
+ distdir=$(PKGNAME)-$(VERSION); \
mkdir $(top_srcdir)/.disttmp/$$distdir/bin;\
cp \
- $(srcdir)/$(PACKAGE) \
+ $(srcdir)/$(PKGNAME) \
$(srcdir)/Makefile \
$(srcdir)/modifyrepo \
$(top_srcdir)/.disttmp/$$distdir/bin
dailyfiles:
- distdir=$(PACKAGE); \
+ distdir=$(PKGNAME); \
mkdir $(top_srcdir)/.disttmp/$$distdir/bin;\
cp \
- $(srcdir)/$(PACKAGE) \
+ $(srcdir)/$(PKGNAME) \
$(srcdir)/Makefile \
$(srcdir)/modifyrepo \
$(top_srcdir)/.disttmp/$$distdir/bin
diff --git a/createrepo.spec b/createrepo.spec
index 3c5cc75..969ad95 100644
--- a/createrepo.spec
+++ b/createrepo.spec
@@ -1,6 +1,8 @@
+%{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")}
+
Summary: Creates a common metadata repository
Name: createrepo
-Version: 0.4.10
+Version: 0.9
Release: 1
License: GPL
Group: System Environment/Base
@@ -9,7 +11,7 @@ URL: http://linux.duke.edu/metadata/
BuildRoot: %{_tmppath}/%{name}-%{version}root
BuildArchitectures: noarch
Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
-Requires: yum-metadata-parser
+Requires: yum-metadata-parser, yum >= 3.2.7
%description
This utility will generate a common metadata repository from a directory of
@@ -35,8 +37,12 @@ rpm packages
%{_bindir}/modifyrepo
%{_mandir}/man8/createrepo.8*
%{_mandir}/man1/modifyrepo.1*
+%{python_sitelib}/createrepo
%changelog
+* Thu Dec 20 2007 Seth Vidal <skvidal at fedoraproject.org>
+- beginning of the new version
+
* Mon Dec 3 2007 Luke Macken <lmacken at redhat.com>
- Add man page for modifyrepo
diff --git a/createrepo/Makefile b/createrepo/Makefile
new file mode 100644
index 0000000..d3d3a34
--- /dev/null
+++ b/createrepo/Makefile
@@ -0,0 +1,64 @@
+PYTHON=python
+PACKAGE = $(shell basename `pwd`)
+PYFILES = $(wildcard *.py)
+PYVER := $(shell $(PYTHON) -c 'import sys; print "%.3s" %(sys.version)')
+PYSYSDIR := $(shell $(PYTHON) -c 'import sys; print sys.prefix')
+PYLIBDIR = $(PYSYSDIR)/lib/python$(PYVER)
+PKGDIR = $(PYLIBDIR)/site-packages/$(PKGNAME)
+
+SHELL = /bin/sh
+top_srcdir = ..
+srcdir = ../$(PKGNAME)
+prefix = /usr
+exec_prefix = ${prefix}
+
+bindir = ${exec_prefix}/bin
+sbindir = ${exec_prefix}/sbin
+libexecdir = ${exec_prefix}/libexec
+datadir = ${prefix}/share
+sysconfdir = ${prefix}/etc
+sharedstatedir = ${prefix}/com
+localstatedir = ${prefix}/var
+libdir = ${exec_prefix}/lib
+infodir = ${prefix}/info
+docdir =
+includedir = ${prefix}/include
+oldincludedir = /usr/include
+mandir = ${datadir}/man
+
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
+top_builddir = ../
+
+
+all:
+ echo "Nothing to do"
+
+clean:
+ rm -f *.pyc *.pyo *~
+
+install:
+ mkdir -p $(DESTDIR)/$(PKGDIR)
+ for p in $(PYFILES) ; do \
+ install -m 644 $$p $(DESTDIR)/$(PKGDIR)/$$p; \
+ done
+ $(PYTHON) -c "import compileall; compileall.compile_dir('$(DESTDIR)/$(PKGDIR)', 1, '$(PKGDIR)', 1)"
+
+distfiles:
+ distdir=$(PKGNAME)-$(VERSION); \
+ mkdir $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME);\
+ cp \
+ $(srcdir)/$(PYFILES) \
+ $(srcdir)/Makefile \
+ $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME)
+
+dailyfiles:
+ distdir=$(PKGNAME); \
+ mkdir $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME);\
+ cp \
+ $(srcdir)/$(PYFILES) \
+ $(srcdir)/__init__.py \
+ $(srcdir)/Makefile \
+ $(top_srcdir)/.disttmp/$$distdir/$(PKGNAME)
+
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
new file mode 100644
index 0000000..ac4451d
--- /dev/null
+++ b/createrepo/__init__.py
@@ -0,0 +1,141 @@
+import exceptions
+import os
+import sys
+import libxml2
+import hashlib
+from yum import misc
+
+try:
+ import sqlitecachec
+except ImportError:
+ pass
+
+
+from utils import _gzipOpen, bzipFile
+
+
+__version__ = '0.9'
+
+
+class MDError(exceptions.Exception):
+ def __init__(self, value=None):
+ exceptions.Exception.__init__(self)
+ self.value = value
+
+ def __str__(self):
+ return self.value
+
+def repoXML(node, cmds):
+ """generate the repomd.xml file that stores the info on the other files"""
+ sumtype = cmds['sumtype']
+ workfiles = [(cmds['otherfile'], 'other',),
+ (cmds['filelistsfile'], 'filelists'),
+ (cmds['primaryfile'], 'primary')]
+ repoid='garbageid'
+
+ repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
+
+ if cmds['database']:
+ try:
+ dbversion = str(sqlitecachec.DBVERSION)
+ except AttributeError:
+ dbversion = '9'
+ rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
+
+ for (file, ftype) in workfiles:
+ complete_path = os.path.join(repopath, file)
+
+ zfo = _gzipOpen(complete_path)
+ uncsum = misc.checksum(sumtype, zfo)
+ zfo.close()
+ csum = misc.checksum(sumtype, complete_path)
+ timestamp = os.stat(complete_path)[8]
+
+ db_csums = {}
+ db_compressed_sums = {}
+
+ if cmds['database']:
+ if ftype == 'primary':
+ rp.getPrimary(complete_path, csum)
+
+ elif ftype == 'filelists':
+ rp.getFilelists(complete_path, csum)
+
+ elif ftype == 'other':
+ rp.getOtherdata(complete_path, csum)
+
+
+ tmp_result_name = '%s.xml.gz.sqlite' % ftype
+ tmp_result_path = os.path.join(repopath, tmp_result_name)
+ good_name = '%s.sqlite' % ftype
+ resultpath = os.path.join(repopath, good_name)
+
+ # rename from silly name to not silly name
+ os.rename(tmp_result_path, resultpath)
+ compressed_name = '%s.bz2' % good_name
+ result_compressed = os.path.join(repopath, compressed_name)
+ db_csums[ftype] = misc.checksum(sumtype, resultpath)
+
+ # compress the files
+ bzipFile(resultpath, result_compressed)
+ # csum the compressed file
+ db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed)
+ # remove the uncompressed file
+ os.unlink(resultpath)
+
+ # timestamp the compressed file
+ db_timestamp = os.stat(result_compressed)[8]
+
+ # add this data as a section to the repomdxml
+ db_data_type = '%s_db' % ftype
+ data = node.newChild(None, 'data', None)
+ data.newProp('type', db_data_type)
+ location = data.newChild(None, 'location', None)
+ if cmds['baseurl'] is not None:
+ location.newProp('xml:base', cmds['baseurl'])
+
+ location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
+ checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
+ checksum.newProp('type', sumtype)
+ db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
+ unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
+ unchecksum.newProp('type', sumtype)
+ database_version = data.newChild(None, 'database_version', dbversion)
+
+
+ data = node.newChild(None, 'data', None)
+ data.newProp('type', ftype)
+ location = data.newChild(None, 'location', None)
+ if cmds['baseurl'] is not None:
+ location.newProp('xml:base', cmds['baseurl'])
+ location.newProp('href', os.path.join(cmds['finaldir'], file))
+ checksum = data.newChild(None, 'checksum', csum)
+ checksum.newProp('type', sumtype)
+ timestamp = data.newChild(None, 'timestamp', str(timestamp))
+ unchecksum = data.newChild(None, 'open-checksum', uncsum)
+ unchecksum.newProp('type', sumtype)
+
+ # if we've got a group file then checksum it once and be done
+ if cmds['groupfile'] is not None:
+ grpfile = cmds['groupfile']
+ timestamp = os.stat(grpfile)[8]
+ sfile = os.path.basename(grpfile)
+ fo = open(grpfile, 'r')
+ output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
+ output.write(fo.read())
+ output.close()
+ fo.seek(0)
+ csum = misc.checksum(sumtype, fo)
+ fo.close()
+
+ data = node.newChild(None, 'data', None)
+ data.newProp('type', 'group')
+ location = data.newChild(None, 'location', None)
+ if cmds['baseurl'] is not None:
+ location.newProp('xml:base', cmds['baseurl'])
+ location.newProp('href', os.path.join(cmds['finaldir'], sfile))
+ checksum = data.newChild(None, 'checksum', csum)
+ checksum.newProp('type', sumtype)
+ timestamp = data.newChild(None, 'timestamp', str(timestamp))
+
+
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
new file mode 100644
index 0000000..0d9dacf
--- /dev/null
+++ b/createrepo/readMetadata.py
@@ -0,0 +1,198 @@
+#!/usr/bin/python -t
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2006 Red Hat
+
+import os
+import sys
+import libxml2
+import stat
+
+def errorprint(stuff):
+ print >> sys.stderr, stuff
+
+def _(args):
+ """Stub function for translation"""
+ return args
+
+class MetadataIndex(object):
+
+ def __init__(self, outputdir, basefile, filelistfile, otherfile, opts=None):
+ if opts is None:
+ opts = {}
+ self.opts = opts
+ self.outputdir = outputdir
+ self.files = {'base' : basefile,
+ 'filelist' : filelistfile,
+ 'other' : otherfile}
+ self.scan()
+
+ def scan(self):
+ """Read in and index old repo data"""
+ self.basenodes = {}
+ self.filesnodes = {}
+ self.othernodes = {}
+ self.pkg_ids = {}
+ if self.opts.get('verbose'):
+ print _("Scanning old repo data")
+ for file in self.files.values():
+ if not os.path.exists(file):
+ #cannot scan
+ errorprint(_("Previous repo file missing: %s") % file)
+ return
+ root = libxml2.parseFile(self.files['base']).getRootElement()
+ self._scanPackageNodes(root, self._handleBase)
+ if self.opts.get('verbose'):
+ print _("Indexed %i base nodes" % len(self.basenodes))
+ root = libxml2.parseFile(self.files['filelist']).getRootElement()
+ self._scanPackageNodes(root, self._handleFiles)
+ if self.opts.get('verbose'):
+ print _("Indexed %i filelist nodes" % len(self.filesnodes))
+ root = libxml2.parseFile(self.files['other']).getRootElement()
+ self._scanPackageNodes(root, self._handleOther)
+ if self.opts.get('verbose'):
+ print _("Indexed %i other nodes" % len(self.othernodes))
+ #reverse index pkg ids to track references
+ self.pkgrefs = {}
+ for relpath, pkgid in self.pkg_ids.iteritems():
+ self.pkgrefs.setdefault(pkgid,[]).append(relpath)
+
+ def _scanPackageNodes(self, root, handler):
+ node = root.children
+ while node is not None:
+ if node.type != "element":
+ node = node.next
+ continue
+ if node.name == "package":
+ handler(node)
+ node = node.next
+
+ def _handleBase(self, node):
+ top = node
+ node = node.children
+ pkgid = None
+ mtime = None
+ size = None
+ relpath = None
+ while node is not None:
+ if node.type != "element":
+ node = node.next
+ continue
+ if node.name == "checksum":
+ pkgid = node.content
+ elif node.name == "time":
+ mtime = int(node.prop('file'))
+ elif node.name == "size":
+ size = int(node.prop('package'))
+ elif node.name == "location":
+ relpath = node.prop('href')
+ node = node.next
+ if relpath is None:
+ print _("Incomplete data for node")
+ return
+ if pkgid is None:
+ print _("pkgid missing for %s") % relpath
+ return
+ if mtime is None:
+ print _("mtime missing for %s") % relpath
+ return
+ if size is None:
+ print _("size missing for %s") % relpath
+ return
+ filepath = os.path.join(self.opts['pkgdir'], relpath)
+ try:
+ st = os.stat(filepath)
+ except OSError:
+ #file missing -- ignore
+ return
+ if not stat.S_ISREG(st.st_mode):
+ #ignore non files
+ return
+ #check size and mtime
+ if st.st_size != size:
+ if self.opts.get('verbose'):
+ print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
+ return
+ if st.st_mtime != mtime:
+ if self.opts.get('verbose'):
+ print _("Modification time changed for %s") % filepath
+ return
+ #otherwise we index
+ self.basenodes[relpath] = top
+ self.pkg_ids[relpath] = pkgid
+
+ def _handleFiles(self, node):
+ pkgid = node.prop('pkgid')
+ if pkgid:
+ self.filesnodes[pkgid] = node
+
+ def _handleOther(self, node):
+ pkgid = node.prop('pkgid')
+ if pkgid:
+ self.othernodes[pkgid] = node
+
+ def getNodes(self, relpath):
+ """Return base, filelist, and other nodes for file, if they exist
+
+ Returns a tuple of nodes, or None if not found
+ """
+ bnode = self.basenodes.get(relpath,None)
+ if bnode is None:
+ return None
+ pkgid = self.pkg_ids.get(relpath,None)
+ if pkgid is None:
+ print _("No pkgid found for: %s") % relpath
+ return None
+ fnode = self.filesnodes.get(pkgid,None)
+ if fnode is None:
+ return None
+ onode = self.othernodes.get(pkgid,None)
+ if onode is None:
+ return None
+ return bnode, fnode, onode
+
+ def freeNodes(self,relpath):
+ #causing problems
+ """Free up nodes corresponding to file, if possible"""
+ bnode = self.basenodes.get(relpath,None)
+ if bnode is None:
+ print "Missing node for %s" % relpath
+ return
+ bnode.unlinkNode()
+ bnode.freeNode()
+ del self.basenodes[relpath]
+ pkgid = self.pkg_ids.get(relpath,None)
+ if pkgid is None:
+ print _("No pkgid found for: %s") % relpath
+ return None
+ del self.pkg_ids[relpath]
+ dups = self.pkgrefs.get(pkgid)
+ dups.remove(relpath)
+ if len(dups):
+ #still referenced
+ return
+ del self.pkgrefs[pkgid]
+ for nodes in self.filesnodes, self.othernodes:
+ node = nodes.get(pkgid)
+ if node is not None:
+ node.unlinkNode()
+ node.freeNode()
+ del nodes[pkgid]
+
+
+if __name__ == "__main__":
+ #test code - attempts to read a repo in working directory
+ idx = MetadataIndex(".", "repodata/primary.xml.gz", "repodata/filelists.xml.gz",
+ "repodata/other.xml.gz", {'verbose':1})
diff --git a/createrepo/utils.py b/createrepo/utils.py
new file mode 100644
index 0000000..bb3939c
--- /dev/null
+++ b/createrepo/utils.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python
+# util functions for createrepo
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+
+import os
+import sys
+import bz2
+import gzip
+from gzip import write32u, FNAME
+
+def errorprint(stuff):
+ print >> sys.stderr, stuff
+
+def _(args):
+ """Stub function for translation"""
+ return args
+
+
+class GzipFile(gzip.GzipFile):
+ def _write_gzip_header(self):
+ self.fileobj.write('\037\213') # magic header
+ self.fileobj.write('\010') # compression method
+ fname = self.filename[:-3]
+ flags = 0
+ if fname:
+ flags = FNAME
+ self.fileobj.write(chr(flags))
+ write32u(self.fileobj, long(0))
+ self.fileobj.write('\002')
+ self.fileobj.write('\377')
+ if fname:
+ self.fileobj.write(fname + '\000')
+
+
+def _gzipOpen(filename, mode="rb", compresslevel=9):
+ return GzipFile(filename, mode, compresslevel)
+
+def bzipFile(source, dest):
+
+ s_fn = open(source, 'rb')
+ destination = bz2.BZ2File(dest, 'w', compresslevel=9)
+
+ while True:
+ data = s_fn.read(1024000)
+
+ if not data: break
+ destination.write(data)
+
+ destination.close()
+ s_fn.close()
+
+
+def returnFD(filename):
+ try:
+ fdno = os.open(filename, os.O_RDONLY)
+ except OSError:
+ raise MDError, "Error opening file"
+ return fdno
+
+def utf8String(string):
+ """hands back a unicoded string"""
+ if string is None:
+ return ''
+ elif isinstance(string, unicode):
+ return string
+ try:
+ x = unicode(string, 'ascii')
+ return string
+ except UnicodeError:
+ encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
+ for enc in encodings:
+ try:
+ x = unicode(string, enc)
+ except UnicodeError:
+ pass
+ else:
+ if x.encode(enc) == string:
+ return x.encode('utf-8')
+ newstring = ''
+ for char in string:
+ if ord(char) > 127:
+ newstring = newstring + '?'
+ else:
+ newstring = newstring + char
+ return newstring
+
+
diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
new file mode 100644
index 0000000..ea2b9aa
--- /dev/null
+++ b/createrepo/yumbased.py
@@ -0,0 +1,383 @@
+#!/usr/bin/python -tt
+
+import os
+import sys
+import struct
+import rpm
+import types
+import re
+import xml.sax.saxutils
+
+from yum.packages import YumLocalPackage
+from yum.Errors import *
+from yum import misc
+from rpmUtils.transaction import initReadOnlyTransaction
+from rpmUtils.miscutils import flagToString, stringToVersion
+
+fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+file_re = []
+for glob in fileglobs:
+ file_re.append(re.compile(glob))
+
+dirglobs = ['.*bin\/.*', '^\/etc\/.*']
+dir_re = []
+for glob in dirglobs:
+ dir_re.append(re.compile(glob))
+
+
+class CreateRepoPackage(YumLocalPackage):
+ def __init__(self, ts, package):
+ YumLocalPackage.__init__(self, ts, package)
+ self._checksum = None
+ self._stat = os.stat(package)
+ self.filetime = str(self._stat[-1])
+ self.packagesize = str(self._stat[6])
+ self._hdrstart = None
+ self._hdrend = None
+
+ def _xml(self, item):
+ return xml.sax.saxutils.escape(item)
+
+ def _do_checksum(self):
+ if not self._checksum:
+ self._checksum = misc.checksum('sha', self.localpath)
+
+ return self._checksum
+ checksum = property(fget=lambda self: self._do_checksum())
+
+ def _get_header_byte_range(self):
+ """takes an rpm file or fileobject and returns byteranges for location of the header"""
+ if self._hdrstart and self._hdrend:
+ return (self._hdrstart, self._hdrend)
+
+
+ fo = open(self.localpath, 'r')
+ #read in past lead and first 8 bytes of sig header
+ fo.seek(104)
+ # 104 bytes in
+ binindex = fo.read(4)
+ # 108 bytes in
+ (sigindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ # 112 bytes in
+ (sigdata, ) = struct.unpack('>I', bindata)
+ # each index is 4 32bit segments - so each is 16 bytes
+ sigindexsize = sigindex * 16
+ sigsize = sigdata + sigindexsize
+ # we have to round off to the next 8 byte boundary
+ disttoboundary = (sigsize % 8)
+ if disttoboundary != 0:
+ disttoboundary = 8 - disttoboundary
+ # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
+ hdrstart = 112 + sigsize + disttoboundary
+
+ fo.seek(hdrstart) # go to the start of the header
+ fo.seek(8,1) # read past the magic number and reserved bytes
+
+ binindex = fo.read(4)
+ (hdrindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ (hdrdata, ) = struct.unpack('>I', bindata)
+
+ # each index is 4 32bit segments - so each is 16 bytes
+ hdrindexsize = hdrindex * 16
+ # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
+ # end of the sig and the header.
+ hdrsize = hdrdata + hdrindexsize + 16
+
+ # header end is hdrstart + hdrsize
+ hdrend = hdrstart + hdrsize
+ fo.close()
+ self._hdrstart = hdrstart
+ self._hdrend = hdrend
+
+ return (hdrstart, hdrend)
+
+ hdrend = property(fget=lambda self: self._get_header_byte_range()[1])
+ hdrstart = property(fget=lambda self: self._get_header_byte_range()[0])
+
+ def _dump_base_items(self):
+ msg = """
+ <name>%s</name>
+ <arch>%s</arch>
+ <version epoch="%s" ver="%s" rel="%s"/>
+ <checksum type="sha" pkgid="YES">%s</checksum>
+ <summary>%s</summary>
+ <description>%s</description>
+ <packager>%s</packager>
+ <url>%s</url>
+ <time file="%s" build="%s"/>
+ <size package="%s" installed="%s" archive="%s"/>
+ <location href="%s"/>
+ """ % (self.name, self.arch, self.epoch, self.ver, self.rel, self.checksum,
+ self._xml(self.summary), self._xml(self.description),
+ self._xml(self.packager), self._xml(self.url), self.filetime,
+ self.buildtime, self.packagesize, self.size, self.archivesize,
+ self.localpath )
+ return msg
+
+ def _dump_format_items(self):
+ msg = " <format>\n"
+ if self.license:
+ msg += """ <rpm:license>%s</rpm:license>\n""" % self._xml(self.license)
+ if self.vendor:
+ msg += """ <rpm:vendor>%s</rpm:vendor>\n""" % self._xml(self.vendor)
+ if self.group:
+ msg += """ <rpm:group>%s</rpm:group>\n""" % self._xml(self.group)
+ if self.buildhost:
+ msg += """ <rpm:buildhost>%s</rpm:buildhost>\n""" % self._xml(self.buildhost)
+ if self.sourcerpm:
+ msg += """ <rpm:sourcerpm>%s</rpm:sourcerpm>\n""" % self._xml(self.sourcerpm)
+ msg +=""" <rpm:header-range start="%s" end="%s"/>""" % (self.hdrstart,
+ self.hdrend)
+ msg += self._dump_pco('provides')
+ msg += self._dump_requires()
+ msg += self._dump_pco('conflicts')
+ msg += self._dump_pco('obsoletes')
+ msg += self._dump_files(True)
+ msg += """\n </format>\n"""
+ return msg
+
+ def _dump_pco(self, pcotype):
+
+ msg = ""
+ mylist = getattr(self, pcotype)
+ if mylist: msg = "\n <rpm:%s>\n" % pcotype
+ for (name, flags, (e,v,r)) in mylist:
+ pcostring = ''' <rpm:entry name="%s"''' % name
+ if flags:
+ pcostring += ''' flags="%s"''' % flags
+ if e:
+ pcostring += ''' epoch="%s"''' % e
+ if v:
+ pcostring += ''' ver="%s"''' % v
+ if r:
+ pcostring += ''' rel="%s"''' % r
+
+ pcostring += "/>\n"
+ msg += pcostring
+
+ if mylist: msg += " </rpm:%s>" % pcotype
+ return msg
+
+ def _return_primary_files(self, list_of_files=None):
+
+ returns = {}
+ if list_of_files is None:
+ list_of_files = self.returnFileEntries('file')
+ for item in list_of_files:
+ if item is None:
+ continue
+ for glob in file_re:
+ if glob.match(item):
+ returns[item] = 1
+ return returns.keys()
+
+ def _return_primary_dirs(self):
+
+ returns = {}
+ for item in self.returnFileEntries('dir'):
+ if item is None:
+ continue
+ for glob in dir_re:
+ if glob.match(item):
+ returns[item] = 1
+ return returns.keys()
+
+
+ def _dump_files(self, primary=False):
+ msg ="\n"
+ if not primary:
+ files = self.returnFileEntries('file')
+ dirs = self.returnFileEntries('dir')
+ ghosts = self.returnFileEntries('ghost')
+ else:
+ files = self._return_primary_files()
+ ghosts = self._return_primary_files(list_of_files = self.returnFileEntries('ghost'))
+ dirs = self._return_primary_dirs()
+
+ for fn in files:
+ msg += """ <file>%s</file>\n""" % fn
+ for fn in dirs:
+ msg += """ <file type="dir">%s</file>\n""" % fn
+ for fn in ghosts:
+ msg += """ <file type="ghost">%s</file>\n""" % fn
+
+ return msg
+
+ def _is_pre_req(self, flag):
+ """check the flags for a requirement, return 1 or 0 whether or not requires
+ is a pre-requires or a not"""
+ # FIXME this should probably be put in rpmUtils.miscutils since
+ # - that's what it is
+ newflag = flag
+ if flag is not None:
+ newflag = flag & 64
+ if newflag == 64:
+ return 1
+ else:
+ return 0
+ return 0
+
+ def _dump_requires(self):
+ """returns deps in format"""
+ name = self.hdr[rpm.RPMTAG_REQUIRENAME]
+ lst = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
+ flag = map(flagToString, lst)
+ pre = map(self._is_pre_req, lst)
+ lst = self.hdr[rpm.RPMTAG_REQUIREVERSION]
+ vers = map(stringToVersion, lst)
+ if name is not None:
+ lst = zip(name, flag, vers, pre)
+ mylist = misc.unique(lst)
+
+ msg = ""
+
+ if mylist: msg = "\n <rpm:requires>\n"
+ for (name, flags, (e,v,r),pre) in mylist:
+ prcostring = ''' <rpm:entry name="%s"''' % name
+ if flags:
+ prcostring += ''' flags="%s"''' % flags
+ if e:
+ prcostring += ''' epoch="%s"''' % e
+ if v:
+ prcostring += ''' ver="%s"''' % v
+ if r:
+ prcostring += ''' rel="%s"''' % r
+ if pre:
+ prcostring += ''' pre="%s"''' % pre
+
+ prcostring += "/>\n"
+ msg += prcostring
+
+ if mylist: msg += " </rpm:requires>"
+ return msg
+
+ def _dump_changelog(self):
+ if not self.changelog:
+ return ""
+ msg = "\n"
+ for (ts, author, content) in self.changelog:
+ msg += """<changelog author="%s" date="%s">%s</changelog>\n""" % \
+ (self._xml(author), ts, self._xml(content))
+ return msg
+
+ def do_primary_xml_dump(self):
+ msg = """\n<package type="rpm">"""
+ msg += self._dump_base_items()
+ msg += self._dump_format_items()
+ msg += """\n</package>\n"""
+ return msg
+
+ def do_filelists_xml_dump(self):
+ msg = """\n<package pkgid="%s" name="%s" arch="%s">
+ <version epoch="%s" ver="%s" rel="%s"/>\n""" % (self.checksum, self.name,
+ self.arch, self.epoch, self.ver, self.rel)
+ msg += self._dump_files()
+ msg += "\n</package>\n"
+ return msg
+
+ def do_other_xml_dump(self):
+ msg = """\n<package pkgid="%s" name="%s" arch="%s">
+ <version epoch="%s" ver="%s" rel="%s"/>\n""" % (self.checksum, self.name,
+ self.arch, self.epoch, self.ver, self.rel)
+ msg += self._dump_changelog()
+ msg += "\n</package>\n"
+ return msg
+
+class CreateRepoConfig(object):
+ def __init__(self):
+ self.quiet = False
+ self.verbose = False
+ self.excludes = []
+ self.baseurl = None
+ self.groupfile = None
+ self.sumtype = 'sha'
+ self.noepoch = False #???
+ self.pretty = False
+ self.cachedir = None
+ self.basedir = os.getcwd()
+ self.use_cache = False
+ self.checkts = False
+ self.split = False
+ self.update = False
+ self.make_database = False
+ self.outputdir = None
+ self.file_pattern_match = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+ self.dir_pattern_match = ['.*bin\/.*', '^\/etc\/.*']
+ self.skip_symlinks = False
+ self.pkglist = []
+
+
+class YumCreateRepo(object):
+ def __init__(self):
+ self.ts = initReadOnlyTransaction()
+ self.pkglist = []
+ self.conf = CreateRepoConfig()
+
+ def add_package(self, rpmfile):
+ # take a file
+ # check it to make sure it:
+ # exists and is an rpm
+ # can be opened
+ # whatever else
+ if not os.path.exists(rpmfile):
+ return False
+
+ self.pkglist.append(rpmfile)
+ return True
+
+
+ def read_in_package(self, rpmfile):
+ # XXX fixme try/excepts here
+ po = CreateRepoPackage(self.ts, rpmfile)
+ return po
+
+ def dump_metadata(self, pkglist=None):
+ if pkglist is None:
+ pkglist = self.pkglist
+
+ primary = open('primary-test.xml', 'w')
+ primary.write("""<?xml version="1.0" encoding="UTF-8"?>
+<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%d">""" % len(pkglist))
+
+ filelists = open('filelists-test.xml', 'w')
+ filelists.write("""<?xml version="1.0" encoding="UTF-8"?>
+<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">""" % len(pkglist))
+
+ other = open('other-test.xml', 'w')
+ other.write("""<?xml version="1.0" encoding="UTF-8"?>
+<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">""" % len(pkglist))
+
+
+ for pkg in pkglist:
+ po = self.read_in_package(pkg)
+ primary.write(po.do_primary_xml_dump())
+ filelists.write(po.do_filelists_xml_dump())
+ other.write(po.do_other_xml_dump())
+
+
+ primary.write("\n</metadata>\n")
+ primary.close()
+ filelists.write("\n</filelists>\n")
+ filelists.close()
+ other.write("\n</otherdata>\n")
+ other.close()
+
+
+def main(args):
+ ycr = YumCreateRepo()
+ # parseargs get the path for the output and opts
+ #
+ # determine package lists
+
+ # make metadata
+
+ for pkg in args:
+ ycr.add_package(pkg)
+ ycr.dump_metadata()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
+
+
diff --git a/docs/Makefile b/docs/Makefile
index 4b32071..5accea6 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,9 +18,9 @@ includedir = ${prefix}/include
oldincludedir = /usr/include
mandir = ${datadir}/man
-pkgdatadir = $(datadir)/$(PACKAGE)
-pkglibdir = $(libdir)/$(PACKAGE)
-pkgincludedir = $(includedir)/$(PACKAGE)
+pkgdatadir = $(datadir)/$(PKGNAME)
+pkglibdir = $(libdir)/$(PKGNAME)
+pkgincludedir = $(includedir)/$(PKGNAME)
top_builddir = ../
# all dirs
@@ -47,7 +47,7 @@ install: all installdirs
uninstall:
- $(RM) $(bindir)/$(PACKAGE)
+ $(RM) $(bindir)/$(PKGNAME)
@@ -69,7 +69,7 @@ maintainer-clean:
distfiles:
- distdir=$(PACKAGE)-$(VERSION); \
+ distdir=$(PKGNAME)-$(VERSION); \
mkdir $(top_srcdir)/.disttmp/$$distdir/docs;\
cp \
$(srcdir)/createrepo.8 \
@@ -78,7 +78,7 @@ distfiles:
$(top_srcdir)/.disttmp/$$distdir/docs
dailyfiles:
- distdir=$(PACKAGE); \
+ distdir=$(PKGNAME); \
mkdir $(top_srcdir)/.disttmp/$$distdir/docs;\
cp \
$(srcdir)/createrepo.8 \
diff --git a/dumpMetadata.py b/dumpMetadata.py
deleted file mode 100644
index 2836e25..0000000
--- a/dumpMetadata.py
+++ /dev/null
@@ -1,896 +0,0 @@
-#!/usr/bin/python -t
-# base classes and functions for dumping out package Metadata
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2004 Duke University
-
-# $Id$
-
-import os
-import rpm
-import exceptions
-import md5
-import sha
-import types
-import struct
-import re
-import stat
-import bz2
-try:
- import sqlitecachec
-except ImportError:
- pass
-
-# done to fix gzip randomly changing the checksum
-import gzip
-from gzip import write32u, FNAME
-
-__all__ = ["GzipFile","open"]
-
-class GzipFile(gzip.GzipFile):
- def _write_gzip_header(self):
- self.fileobj.write('\037\213') # magic header
- self.fileobj.write('\010') # compression method
- fname = self.filename[:-3]
- flags = 0
- if fname:
- flags = FNAME
- self.fileobj.write(chr(flags))
- write32u(self.fileobj, long(0))
- self.fileobj.write('\002')
- self.fileobj.write('\377')
- if fname:
- self.fileobj.write(fname + '\000')
-
-
-def _gzipOpen(filename, mode="rb", compresslevel=9):
- return GzipFile(filename, mode, compresslevel)
-
-def bzipFile(source, dest):
-
- s_fn = open(source, 'rb')
- destination = bz2.BZ2File(dest, 'w', compresslevel=9)
-
- while True:
- data = s_fn.read(1024000)
-
- if not data: break
- destination.write(data)
-
- destination.close()
- s_fn.close()
-
-
-def returnFD(filename):
- try:
- fdno = os.open(filename, os.O_RDONLY)
- except OSError:
- raise MDError, "Error opening file"
- return fdno
-
-def returnHdr(ts, package):
- """hand back the rpm header or raise an Error if the pkg is fubar"""
- opened_here = 0
- try:
- if type(package) is types.StringType:
- opened_here = 1
- fdno = os.open(package, os.O_RDONLY)
- else:
- fdno = package # let's assume this is an fdno and go with it :)
- except OSError:
- raise MDError, "Error opening file"
- ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
- try:
- hdr = ts.hdrFromFdno(fdno)
- except rpm.error:
- raise MDError, "Error opening package"
- if type(hdr) != rpm.hdr:
- raise MDError, "Error opening package"
- ts.setVSFlags(0)
-
- if opened_here:
- os.close(fdno)
- del fdno
-
- return hdr
-
-def getChecksum(sumtype, file, CHUNK=2**16):
- """takes filename, hand back Checksum of it
- sumtype = md5 or sha
- filename = /path/to/file
- CHUNK=65536 by default"""
-
- # chunking brazenly lifted from Ryan Tomayko
- opened_here = 0
- try:
- if type(file) is not types.StringType:
- fo = file # assume it's a file-like-object
- else:
- opened_here = 1
- fo = open(file, 'rb', CHUNK)
-
- if sumtype == 'md5':
- sum = md5.new()
- elif sumtype == 'sha':
- sum = sha.new()
- else:
- raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype
- chunk = fo.read
- while chunk:
- chunk = fo.read(CHUNK)
- sum.update(chunk)
-
- if opened_here:
- fo.close()
- del fo
-
- return sum.hexdigest()
- except:
- raise MDError, 'Error opening file for checksum: %s' % file
-
-
-def utf8String(string):
- """hands back a unicoded string"""
- if string is None:
- return ''
- elif isinstance(string, unicode):
- return string
- try:
- x = unicode(string, 'ascii')
- return string
- except UnicodeError:
- encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
- for enc in encodings:
- try:
- x = unicode(string, enc)
- except UnicodeError:
- pass
- else:
- if x.encode(enc) == string:
- return x.encode('utf-8')
- newstring = ''
- for char in string:
- if ord(char) > 127:
- newstring = newstring + '?'
- else:
- newstring = newstring + char
- return newstring
-
-
-def byteranges(file):
- """takes an rpm file or fileobject and returns byteranges for location of the header"""
- opened_here = 0
- if type(file) is not types.StringType:
- fo = file
- else:
- opened_here = 1
- fo = open(file, 'r')
- #read in past lead and first 8 bytes of sig header
- fo.seek(104)
- # 104 bytes in
- binindex = fo.read(4)
- # 108 bytes in
- (sigindex, ) = struct.unpack('>I', binindex)
- bindata = fo.read(4)
- # 112 bytes in
- (sigdata, ) = struct.unpack('>I', bindata)
- # each index is 4 32bit segments - so each is 16 bytes
- sigindexsize = sigindex * 16
- sigsize = sigdata + sigindexsize
- # we have to round off to the next 8 byte boundary
- disttoboundary = (sigsize % 8)
- if disttoboundary != 0:
- disttoboundary = 8 - disttoboundary
- # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
- hdrstart = 112 + sigsize + disttoboundary
-
- fo.seek(hdrstart) # go to the start of the header
- fo.seek(8,1) # read past the magic number and reserved bytes
-
- binindex = fo.read(4)
- (hdrindex, ) = struct.unpack('>I', binindex)
- bindata = fo.read(4)
- (hdrdata, ) = struct.unpack('>I', bindata)
-
- # each index is 4 32bit segments - so each is 16 bytes
- hdrindexsize = hdrindex * 16
- # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
- # end of the sig and the header.
- hdrsize = hdrdata + hdrindexsize + 16
-
- # header end is hdrstart + hdrsize
- hdrend = hdrstart + hdrsize
- if opened_here:
- fo.close()
- del fo
- return (hdrstart, hdrend)
-
-
-class MDError(exceptions.Exception):
- def __init__(self, value=None):
- exceptions.Exception.__init__(self)
- self.value = value
-
- def __str__(self):
- return self.value
-
-
-
-class RpmMetaData:
- """each rpm is one object, you pass it an rpm file
- it opens the file, and pulls the information out in bite-sized chunks :)
- """
-
- mode_cache = {}
-
- def __init__(self, ts, basedir, filename, options):
- try:
- stats = os.stat(os.path.join(basedir, filename))
- self.size = stats[6]
- self.mtime = stats[8]
- del stats
- except OSError, e:
- raise MDError, "Error Stat'ing file %s %s" % (basedir, filename)
- self.options = options
- self.localurl = options['baseurl']
- if options['noepoch']:
- self.noepoch = ""
- else:
- self.noepoch = 0
- self.relativepath = filename
- fd = returnFD(os.path.join(basedir, filename))
- self.hdr = returnHdr(ts, fd)
- os.lseek(fd, 0, 0)
- fo = os.fdopen(fd, 'rb')
- self.pkgid = self.doChecksumCache(fo)
- fo.seek(0)
- (self.rangestart, self.rangeend) = byteranges(fo)
- fo.close()
- del fo
- del fd
-
- # setup our regex objects
- fileglobs = options['file-pattern-match']
- #['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
- dirglobs = options['dir-pattern-match']
- #['.*bin\/.*', '^\/etc\/.*']
- self.dirrc = []
- self.filerc = []
- for glob in fileglobs:
- self.filerc.append(re.compile(glob))
-
- for glob in dirglobs:
- self.dirrc.append(re.compile(glob))
-
- self.filenames = []
- self.dirnames = []
- self.ghostnames = []
- self.genFileLists()
-
- def arch(self):
- if self.tagByName('sourcepackage') == 1 or not self.tagByName('sourcerpm'):
- return 'src'
- else:
- return self.tagByName('arch')
-
- def _correctFlags(self, flags):
- returnflags=[]
- if flags is None:
- return returnflags
-
- if type(flags) is not types.ListType:
- newflag = flags & 0xf
- returnflags.append(newflag)
- else:
- for flag in flags:
- newflag = flag
- if flag is not None:
- newflag = flag & 0xf
- returnflags.append(newflag)
- return returnflags
-
- def _checkPreReq(self, flags):
- reqs=[]
- if flags is None:
- return reqs
-
- if type(flags) is not types.ListType:
- flags = [flags]
- for flag in flags:
- newflag = flag
- if flag is not None:
- newflag = flag & 64
- if newflag == 64:
- reqs.append(1)
- else:
- reqs.append(0)
- return reqs
-
-
- def _correctVersion(self, vers):
- returnvers = []
- vertuple = (None, None, None)
- if vers is None:
- returnvers.append(vertuple)
- return returnvers
-
- if type(vers) is not types.ListType:
- if vers is not None:
- vertuple = self._stringToVersion(vers)
- else:
- vertuple = (None, None, None)
- returnvers.append(vertuple)
- else:
- for ver in vers:
- if ver is not None:
- vertuple = self._stringToVersion(ver)
- else:
- vertuple = (None, None, None)
- returnvers.append(vertuple)
- return returnvers
-
-
- def _stringToVersion(self, strng):
- i = strng.find(':')
- if i != -1 and strng[:i].isdigit():
- epoch = strng[:i]
- else:
- i = -1
- epoch = self.noepoch
- j = strng.rfind('-')
- if j != -1:
- if strng[i + 1:j] == '':
- version = None
- else:
- version = strng[i + 1:j]
- release = strng[j + 1:]
- else:
- if strng[i + 1:] == '':
- version = None
- else:
- version = strng[i + 1:]
- release = None
- return (epoch, version, release)
-
- ###########
- # Title: Remove duplicates from a sequence
- # Submitter: Tim Peters
- # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560
-
- def _uniq(self,s):
- """Return a list of the elements in s, but without duplicates.
-
- For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
- unique("abcabc") some permutation of ["a", "b", "c"], and
- unique(([1, 2], [2, 3], [1, 2])) some permutation of
- [[2, 3], [1, 2]].
-
- For best speed, all sequence elements should be hashable. Then
- unique() will usually work in linear time.
-
- If not possible, the sequence elements should enjoy a total
- ordering, and if list(s).sort() doesn't raise TypeError it's
- assumed that they do enjoy a total ordering. Then unique() will
- usually work in O(N*log2(N)) time.
-
- If that's not possible either, the sequence elements must support
- equality-testing. Then unique() will usually work in quadratic
- time.
- """
-
- n = len(s)
- if n == 0:
- return []
-
- # Try using a dict first, as that's the fastest and will usually
- # work. If it doesn't work, it will usually fail quickly, so it
- # usually doesn't cost much to *try* it. It requires that all the
- # sequence elements be hashable, and support equality comparison.
- u = {}
- try:
- for x in s:
- u[x] = 1
- except TypeError:
- del u # move on to the next method
- else:
- ret = u.keys()
- ret.sort()
- return ret
-
- # We can't hash all the elements. Second fastest is to sort,
- # which brings the equal elements together; then duplicates are
- # easy to weed out in a single pass.
- # NOTE: Python's list.sort() was designed to be efficient in the
- # presence of many duplicate elements. This isn't true of all
- # sort functions in all languages or libraries, so this approach
- # is more effective in Python than it may be elsewhere.
- try:
- t = list(s)
- t.sort()
- except TypeError:
- del t # move on to the next method
- else:
- assert n > 0
- last = t[0]
- lasti = i = 1
- while i < n:
- if t[i] != last:
- t[lasti] = last = t[i]
- lasti += 1
- i += 1
- return t[:lasti]
-
- # Brute force is all that's left.
- u = []
- for x in s:
- if x not in u:
- u.append(x)
- return u
-
- def tagByName(self, tag):
- data = self.hdr[tag]
- if type(data) is types.ListType:
- if len(data) > 0:
- return data[0]
- else:
- return ''
- else:
- return data
-
- def listTagByName(self, tag):
- """take a tag that should be a list and make sure it is one"""
- lst = []
- data = self.hdr[tag]
- if data is None:
- return lst
-
- if type(data) is types.ListType:
- lst.extend(data)
- else:
- lst.append(data)
- return lst
-
-
- def epoch(self):
- if self.hdr['epoch'] is None:
- return self.noepoch
- else:
- return self.tagByName('epoch')
-
- def genFileLists(self):
- """produces lists of dirs and files for this header in two lists"""
-
- files = self.listTagByName('filenames')
- fileflags = self.listTagByName('fileflags')
- filemodes = self.listTagByName('filemodes')
- filetuple = zip(files, filemodes, fileflags)
- for (file, mode, flag) in filetuple:
- #garbage checks
- if mode is None or mode == '':
- self.filenames.append(file)
- continue
- if not RpmMetaData.mode_cache.has_key(mode):
- RpmMetaData.mode_cache[mode] = stat.S_ISDIR(mode)
- if RpmMetaData.mode_cache[mode]:
- self.dirnames.append(file)
- else:
- if flag is None:
- self.filenames.append(file)
- else:
- if (flag & 64):
- self.ghostnames.append(file)
- continue
- self.filenames.append(file)
-
-
- def usefulFiles(self):
- """search for good files"""
- returns = {}
- for item in self.filenames:
- if item is None:
- continue
- for glob in self.filerc:
- if glob.match(item):
- returns[item] = 1
- return returns.keys()
-
- def usefulGhosts(self):
- """search for useful ghost file names"""
- returns = {}
- for item in self.ghostnames:
- if item is None:
- continue
- for glob in self.filerc:
- if glob.match(item):
- returns[item] = 1
- return returns.keys()
-
-
- def usefulDirs(self):
- """search for good dirs"""
- returns = {}
- for item in self.dirnames:
- if item is None:
- continue
- for glob in self.dirrc:
- if glob.match(item):
- returns[item] = 1
- return returns.keys()
-
-
- def depsList(self):
- """returns a list of tuples of dependencies"""
- # these should probably compress down duplicates too
- lst = []
- names = self.hdr[rpm.RPMTAG_REQUIRENAME]
- tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
- flags = self._correctFlags(tmpflags)
- prereq = self._checkPreReq(tmpflags)
- ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION])
- if names is not None:
- lst = zip(names, flags, ver, prereq)
- return self._uniq(lst)
-
- def obsoletesList(self):
- lst = []
- names = self.hdr[rpm.RPMTAG_OBSOLETENAME]
- tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS]
- flags = self._correctFlags(tmpflags)
- ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION])
- if names is not None:
- lst = zip(names, flags, ver)
- return self._uniq(lst)
-
- def conflictsList(self):
- lst = []
- names = self.hdr[rpm.RPMTAG_CONFLICTNAME]
- tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS]
- flags = self._correctFlags(tmpflags)
- ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION])
- if names is not None:
- lst = zip(names, flags, ver)
- return self._uniq(lst)
-
- def providesList(self):
- lst = []
- names = self.hdr[rpm.RPMTAG_PROVIDENAME]
- tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS]
- flags = self._correctFlags(tmpflags)
- ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION])
- if names is not None:
- lst = zip(names, flags, ver)
- return self._uniq(lst)
-
- def changelogLists(self):
- lst = []
- names = self.listTagByName('changelogname')
- times = self.listTagByName('changelogtime')
- texts = self.listTagByName('changelogtext')
- if len(names) > 0:
- lst = zip(names, times, texts)
- return lst
-
- def doChecksumCache(self, fo):
- """return a checksum for a package:
- - check if the checksum cache is enabled
- if not - return the checksum
- if so - check to see if it has a cache file
- if so, open it and return the first line's contents
- if not, grab the checksum and write it to a file for this pkg
- """
- if not self.options['cache']:
- return getChecksum(self.options['sumtype'], fo)
-
- t = []
- if type(self.hdr[rpm.RPMTAG_SIGGPG]) is not types.NoneType:
- t.append("".join(self.hdr[rpm.RPMTAG_SIGGPG]))
- if type(self.hdr[rpm.RPMTAG_SIGPGP]) is not types.NoneType:
- t.append("".join(self.hdr[rpm.RPMTAG_SIGPGP]))
- if type(self.hdr[rpm.RPMTAG_HDRID]) is not types.NoneType:
- t.append("".join(self.hdr[rpm.RPMTAG_HDRID]))
-
- key = md5.new("".join(t)).hexdigest()
-
- csumtag = '%s-%s-%s-%s' % (os.path.basename(self.relativepath),
- self.hdr[rpm.RPMTAG_SHA1HEADER],
- self.size, self.mtime)
- csumfile = '%s/%s' % (self.options['cachedir'], csumtag)
- if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]:
- csumo = open(csumfile, 'r')
- checksum = csumo.readline()
- csumo.close()
-
- else:
- checksum = getChecksum(self.options['sumtype'], fo)
- csumo = open(csumfile, 'w')
- csumo.write(checksum)
- csumo.close()
-
- return checksum
-
-
-
-def generateXML(doc, node, formatns, rpmObj, sumtype):
- """takes an xml doc object and a package metadata entry node, populates a
- package node with the md information"""
- ns = node.ns()
- pkgNode = node.newChild(None, "package", None)
- pkgNode.newProp('type', 'rpm')
- pkgNode.newChild(None, 'name', rpmObj.tagByName('name'))
- pkgNode.newChild(None, 'arch', rpmObj.arch())
- version = pkgNode.newChild(None, 'version', None)
- if str(rpmObj.epoch()):
- version.newProp('epoch', str(rpmObj.epoch()))
- version.newProp('ver', str(rpmObj.tagByName('version')))
- version.newProp('rel', str(rpmObj.tagByName('release')))
- csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid)
- csum.newProp('type', sumtype)
- csum.newProp('pkgid', 'YES')
- for tag in ['summary', 'description', 'packager', 'url']:
- value = rpmObj.tagByName(tag)
- value = utf8String(value)
- value = re.sub("\n$", '', value)
- entry = pkgNode.newChild(None, tag, None)
- entry.addContent(value)
-
- time = pkgNode.newChild(None, 'time', None)
- time.newProp('file', str(rpmObj.mtime))
- time.newProp('build', str(rpmObj.tagByName('buildtime')))
- size = pkgNode.newChild(None, 'size', None)
- size.newProp('package', str(rpmObj.size))
- size.newProp('installed', str(rpmObj.tagByName('size')))
- size.newProp('archive', str(rpmObj.tagByName('archivesize')))
- location = pkgNode.newChild(None, 'location', None)
- if rpmObj.localurl is not None:
- location.newProp('xml:base', rpmObj.localurl)
- location.newProp('href', rpmObj.relativepath)
- format = pkgNode.newChild(ns, 'format', None)
- for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']:
- value = rpmObj.tagByName(tag)
- value = utf8String(value)
- value = re.sub("\n$", '', value)
- entry = format.newChild(formatns, tag, None)
- entry.addContent(value)
-
- hr = format.newChild(formatns, 'header-range', None)
- hr.newProp('start', str(rpmObj.rangestart))
- hr.newProp('end', str(rpmObj.rangeend))
- for (lst, nodename) in [(rpmObj.providesList(), 'provides'),
- (rpmObj.conflictsList(), 'conflicts'),
- (rpmObj.obsoletesList(), 'obsoletes')]:
- if len(lst) > 0:
- rpconode = format.newChild(formatns, nodename, None)
- for (name, flags, (e,v,r)) in lst:
- entry = rpconode.newChild(formatns, 'entry', None)
- entry.newProp('name', name)
- if flags != 0:
- if flags == 2: arg = 'LT'
- if flags == 4: arg = 'GT'
- if flags == 8: arg = 'EQ'
- if flags == 10: arg = 'LE'
- if flags == 12: arg = 'GE'
- entry.newProp('flags', arg)
- # if we've got a flag we've got a version, I hope :)
- if str(e):
- entry.newProp('epoch', str(e))
- if v:
- entry.newProp('ver', str(v))
- if r:
- entry.newProp('rel', str(r))
-
- depsList = rpmObj.depsList()
- if len(depsList) > 0:
- rpconode = format.newChild(formatns, 'requires', None)
- for (name, flags, (e,v,r), prereq) in depsList:
- entry = rpconode.newChild(formatns, 'entry', None)
- entry.newProp('name', name)
- if flags != 0:
- if flags == 2: arg = 'LT'
- if flags == 4: arg = 'GT'
- if flags == 8: arg = 'EQ'
- if flags == 10: arg = 'LE'
- if flags == 12: arg = 'GE'
- entry.newProp('flags', arg)
- # if we've got a flag we've got a version, I hope :)
- if str(e):
- entry.newProp('epoch', str(e))
- if v:
- entry.newProp('ver', str(v))
- if r:
- entry.newProp('rel', str(r))
- if prereq == 1:
- entry.newProp('pre', str(prereq))
-
- ff = rpmObj.usefulFiles()
- ff.sort()
- for file in ff:
- files = format.newChild(None, 'file', None)
- file = utf8String(file)
- files.addContent(file)
- ff = rpmObj.usefulDirs()
- ff.sort()
- for directory in ff:
- files = format.newChild(None, 'file', None)
- directory = utf8String(directory)
- files.addContent(directory)
- files.newProp('type', 'dir')
- ff = rpmObj.usefulGhosts()
- ff.sort()
- for directory in ff:
- files = format.newChild(None, 'file', None)
- directory = utf8String(directory)
- files.addContent(directory)
- files.newProp('type', 'ghost')
-
- return pkgNode
-
-def fileListXML(doc, node, rpmObj):
- pkg = node.newChild(None, 'package', None)
- pkg.newProp('pkgid', rpmObj.pkgid)
- pkg.newProp('name', rpmObj.tagByName('name'))
- pkg.newProp('arch', rpmObj.arch())
- version = pkg.newChild(None, 'version', None)
- if str(rpmObj.epoch()):
- version.newProp('epoch', str(rpmObj.epoch()))
- version.newProp('ver', str(rpmObj.tagByName('version')))
- version.newProp('rel', str(rpmObj.tagByName('release')))
- for file in rpmObj.filenames:
- files = pkg.newChild(None, 'file', None)
- file = utf8String(file)
- files.addContent(file)
- for directory in rpmObj.dirnames:
- files = pkg.newChild(None, 'file', None)
- directory = utf8String(directory)
- files.addContent(directory)
- files.newProp('type', 'dir')
- for ghost in rpmObj.ghostnames:
- files = pkg.newChild(None, 'file', None)
- ghost = utf8String(ghost)
- files.addContent(ghost)
- files.newProp('type', 'ghost')
- return pkg
-
-def otherXML(doc, node, rpmObj):
- pkg = node.newChild(None, 'package', None)
- pkg.newProp('pkgid', rpmObj.pkgid)
- pkg.newProp('name', rpmObj.tagByName('name'))
- pkg.newProp('arch', rpmObj.arch())
- version = pkg.newChild(None, 'version', None)
- if str(rpmObj.epoch()):
- version.newProp('epoch', str(rpmObj.epoch()))
- version.newProp('ver', str(rpmObj.tagByName('version')))
- version.newProp('rel', str(rpmObj.tagByName('release')))
- clogs = rpmObj.changelogLists()
- for (name, time, text) in clogs:
- clog = pkg.newChild(None, 'changelog', None)
- clog.addContent(utf8String(text))
- clog.newProp('author', utf8String(name))
- clog.newProp('date', str(time))
- return pkg
-
-def repoXML(node, cmds):
- """generate the repomd.xml file that stores the info on the other files"""
- sumtype = cmds['sumtype']
- workfiles = [(cmds['otherfile'], 'other',),
- (cmds['filelistsfile'], 'filelists'),
- (cmds['primaryfile'], 'primary')]
- repoid='garbageid'
-
- repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
-
- if cmds['database']:
- try:
- dbversion = str(sqlitecachec.DBVERSION)
- except AttributeError:
- dbversion = '9'
- rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
-
- for (file, ftype) in workfiles:
- complete_path = os.path.join(repopath, file)
-
- zfo = _gzipOpen(complete_path)
- uncsum = getChecksum(sumtype, zfo)
- zfo.close()
- csum = getChecksum(sumtype, complete_path)
- timestamp = os.stat(complete_path)[8]
-
- db_csums = {}
- db_compressed_sums = {}
-
- if cmds['database']:
- if ftype == 'primary':
- rp.getPrimary(complete_path, csum)
-
- elif ftype == 'filelists':
- rp.getFilelists(complete_path, csum)
-
- elif ftype == 'other':
- rp.getOtherdata(complete_path, csum)
-
-
- tmp_result_name = '%s.xml.gz.sqlite' % ftype
- tmp_result_path = os.path.join(repopath, tmp_result_name)
- good_name = '%s.sqlite' % ftype
- resultpath = os.path.join(repopath, good_name)
-
- # rename from silly name to not silly name
- os.rename(tmp_result_path, resultpath)
- compressed_name = '%s.bz2' % good_name
- result_compressed = os.path.join(repopath, compressed_name)
- db_csums[ftype] = getChecksum(sumtype, resultpath)
-
- # compress the files
- bzipFile(resultpath, result_compressed)
- # csum the compressed file
- db_compressed_sums[ftype] = getChecksum(sumtype, result_compressed)
- # remove the uncompressed file
- os.unlink(resultpath)
-
- # timestamp the compressed file
- db_timestamp = os.stat(result_compressed)[8]
-
- # add this data as a section to the repomdxml
- db_data_type = '%s_db' % ftype
- data = node.newChild(None, 'data', None)
- data.newProp('type', db_data_type)
- location = data.newChild(None, 'location', None)
- if cmds['baseurl'] is not None:
- location.newProp('xml:base', cmds['baseurl'])
-
- location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
- checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
- checksum.newProp('type', sumtype)
- db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
- unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
- unchecksum.newProp('type', sumtype)
- database_version = data.newChild(None, 'database_version', dbversion)
-
-
- data = node.newChild(None, 'data', None)
- data.newProp('type', ftype)
- location = data.newChild(None, 'location', None)
- if cmds['baseurl'] is not None:
- location.newProp('xml:base', cmds['baseurl'])
- location.newProp('href', os.path.join(cmds['finaldir'], file))
- checksum = data.newChild(None, 'checksum', csum)
- checksum.newProp('type', sumtype)
- timestamp = data.newChild(None, 'timestamp', str(timestamp))
- unchecksum = data.newChild(None, 'open-checksum', uncsum)
- unchecksum.newProp('type', sumtype)
-
- # if we've got a group file then checksum it once and be done
- if cmds['groupfile'] is not None:
- grpfile = cmds['groupfile']
- timestamp = os.stat(grpfile)[8]
- sfile = os.path.basename(grpfile)
- fo = open(grpfile, 'r')
- output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
- output.write(fo.read())
- output.close()
- fo.seek(0)
- csum = getChecksum(sumtype, fo)
- fo.close()
-
- data = node.newChild(None, 'data', None)
- data.newProp('type', 'group')
- location = data.newChild(None, 'location', None)
- if cmds['baseurl'] is not None:
- location.newProp('xml:base', cmds['baseurl'])
- location.newProp('href', os.path.join(cmds['finaldir'], sfile))
- checksum = data.newChild(None, 'checksum', csum)
- checksum.newProp('type', sumtype)
- timestamp = data.newChild(None, 'timestamp', str(timestamp))
-
-
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
index bef0225..1650a38 100755
--- a/genpkgmetadata.py
+++ b/genpkgmetadata.py
@@ -17,30 +17,27 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2004 Duke University
-# $Id$
-
import os
import sys
import getopt
-import rpm
import libxml2
import string
import fnmatch
import shutil
+import rpm
-import dumpMetadata
-import readMetadata
-from dumpMetadata import _gzipOpen
-__version__ = '0.4.9'
+# for now, for later, we move all this around
+import createrepo
+from createrepo import MDError
+import createrepo.yumbased
+import createrepo.utils
-def errorprint(stuff):
- print >> sys.stderr, stuff
+from createrepo.utils import _gzipOpen, errorprint, _
-def _(args):
- """Stub function for translation"""
- return args
+__version__ = '0.9'
+# cli
def usage(retval=1):
print _("""
createrepo [options] directory-of-packages
@@ -69,6 +66,7 @@ def usage(retval=1):
sys.exit(retval)
+# module
class MetaDataGenerator:
def __init__(self, cmds):
self.cmds = cmds
@@ -76,6 +74,7 @@ class MetaDataGenerator:
self.pkgcount = 0
self.files = []
+ # module
def _os_path_walk(self, top, func, arg):
"""Directory tree walk with callback function.
copy of os.path.walk, fixes the link/stating problem
@@ -90,7 +89,7 @@ class MetaDataGenerator:
name = os.path.join(top, name)
if os.path.isdir(name):
self._os_path_walk(name, func, arg)
-
+ # module
def getFileList(self, basepath, directory, ext):
"""Return all files in path matching ext, store them in filelist,
recurse dirs. Returns a list object"""
@@ -112,7 +111,7 @@ class MetaDataGenerator:
startdir = os.path.join(basepath, directory) + '/'
self._os_path_walk(startdir, extension_visitor, filelist)
return filelist
-
+ #module
def checkTimeStamps(self, directory):
if self.cmds['checkts']:
files = self.getFileList(self.cmds['basedir'], directory, '.rpm')
@@ -124,7 +123,7 @@ class MetaDataGenerator:
if os.path.getctime(fn) > self.cmds['mdtimestamp']:
return False
return True
-
+ #module
def trimRpms(self, files):
badrpms = []
for file in files:
@@ -144,7 +143,7 @@ class MetaDataGenerator:
# rpms we're going to be dealing with
if self.cmds['update']:
#build the paths
- basefile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile'])
+ primaryfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile'])
flfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['filelistsfile'])
otherfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['otherfile'])
opts = {
@@ -152,71 +151,62 @@ class MetaDataGenerator:
'pkgdir' : os.path.normpath(os.path.join(self.cmds['basedir'], directory))
}
#and scan the old repo
- self.oldData = readMetadata.MetadataIndex(self.cmds['outputdir'],
- basefile, flfile, otherfile, opts)
+ self.oldData = createrepo.readMetadata.MetadataIndex(self.cmds['outputdir'],
+ primaryfile, flfile, otherfile, opts)
if self.cmds['pkglist']:
- files = self.cmds['pkglist']
+ packages = self.cmds['pkglist']
else:
- files = self.getFileList(self.cmds['basedir'], directory, '.rpm')
+ packages = self.getFileList(self.cmds['basedir'], directory, '.rpm')
- files = self.trimRpms(files)
- self.pkgcount = len(files)
+ packages = self.trimRpms(packages)
+ self.pkgcount = len(packages)
self.openMetadataDocs()
- self.writeMetadataDocs(files, directory)
+ self.writeMetadataDocs(packages, directory)
self.closeMetadataDocs()
-
+ # module
def openMetadataDocs(self):
- self._setupBase()
- self._setupFilelists()
- self._setupOther()
-
- def _setupBase(self):
- # setup the base metadata doc
- self.basedoc = libxml2.newDoc("1.0")
- self.baseroot = self.basedoc.newChild(None, "metadata", None)
- basens = self.baseroot.newNs('http://linux.duke.edu/metadata/common', None)
- self.formatns = self.baseroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm')
- self.baseroot.setNs(basens)
- basefilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile'])
- self.basefile = _gzipOpen(basefilepath, 'w')
- self.basefile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
- self.basefile.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
+ self.primaryfile = self._setupPrimary()
+ self.flfile = self._setupFilelists()
+ self.otherfile = self._setupOther()
+
+ def _setupPrimary(self):
+ # setup the primary metadata file
+ primaryfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile'])
+ fo = _gzipOpen(primaryfilepath, 'w')
+ fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
self.pkgcount)
+ return fo
def _setupFilelists(self):
- # setup the file list doc
- self.filesdoc = libxml2.newDoc("1.0")
- self.filesroot = self.filesdoc.newChild(None, "filelists", None)
- filesns = self.filesroot.newNs('http://linux.duke.edu/metadata/filelists', None)
- self.filesroot.setNs(filesns)
+ # setup the filelist file
filelistpath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['filelistsfile'])
- self.flfile = _gzipOpen(filelistpath, 'w')
- self.flfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
- self.flfile.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">\n' %
+ fo = _gzipOpen(filelistpath, 'w')
+ fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s">\n' %
self.pkgcount)
-
+ return fo
+
def _setupOther(self):
- # setup the other doc
- self.otherdoc = libxml2.newDoc("1.0")
- self.otherroot = self.otherdoc.newChild(None, "otherdata", None)
- otherns = self.otherroot.newNs('http://linux.duke.edu/metadata/other', None)
- self.otherroot.setNs(otherns)
+ # setup the other file
otherfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['otherfile'])
- self.otherfile = _gzipOpen(otherfilepath, 'w')
- self.otherfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
- self.otherfile.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">\n' %
+ fo = _gzipOpen(otherfilepath, 'w')
+ fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s">\n' %
self.pkgcount)
-
- def _getNodes(self, file, directory, current):
+ return fo
+
+ def _getNodes(self, pkg, directory, current):
+ # delete function since it seems to nothing anymore
basenode = None
filesnode = None
othernode = None
try:
rpmdir= os.path.join(self.cmds['basedir'], directory)
- mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, file, self.cmds)
+ mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, pkg, self.cmds)
except dumpMetadata.MDError, e:
- errorprint('\n%s - %s' % (e, file))
+ errorprint('\n%s - %s' % (e, pkg))
return None
try:
basenode = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype'])
@@ -235,48 +225,60 @@ class MetaDataGenerator:
return None
return basenode,filesnode,othernode
- def writeMetadataDocs(self, files, directory, current=0):
- for file in files:
+ def read_in_package(self, directory, rpmfile):
+ # XXX fixme try/excepts here
+ # directory is stupid - just make it part of the class
+ rpmfile = '%s/%s/%s' % (self.cmds['basedir'], directory, rpmfile)
+ po = createrepo.yumbased.CreateRepoPackage(self.ts, rpmfile)
+ return po
+
+ def writeMetadataDocs(self, pkglist, directory, current=0):
+ # FIXME
+ # directory is unused, kill it, pkglist should come from self
+ # I don't see why current needs to be this way at all
+ for pkg in pkglist:
current+=1
recycled = False
sep = '-'
+
+ # look to see if we can get the data from the old repodata
+ # if so write this one out that way
if self.cmds['update']:
#see if we can pull the nodes from the old repo
- nodes = self.oldData.getNodes(file)
+ nodes = self.oldData.getNodes(pkg)
if nodes is not None:
recycled = True
- sep = '*'
+
+
+ # otherwise do it individually
if not recycled:
#scan rpm files
- nodes = self._getNodes(file, directory, current)
- if nodes is None:
- continue
- basenode, filenode, othernode = nodes
- del nodes
+ po = self.read_in_package(directory, pkg)
+ self.primaryfile.write(po.do_primary_xml_dump())
+ self.flfile.write(po.do_filelists_xml_dump())
+ self.otherfile.write(po.do_other_xml_dump())
+ else:
+ sep = '*'
+ primarynode, filenode, othernode = nodes
+
+ for node, outfile in ((primarynode,self.primaryfile),
+ (filenode,self.flfile),
+ (othernode,self.otherfile)):
+ if node is None:
+ break
+ output = node.serialize('UTF-8', self.cmds['pretty'])
+ outfile.write(output)
+ outfile.write('\n')
+
+ self.oldData.freeNodes(pkg)
+
if not self.cmds['quiet']:
if self.cmds['verbose']:
- print '%d/%d %s %s' % (current, self.pkgcount, sep, file)
+ print '%d/%d %s %s' % (current, self.pkgcount, sep, pkg)
else:
sys.stdout.write('\r' + ' ' * 80)
- sys.stdout.write("\r%d/%d %s %s" % (current, self.pkgcount, sep, file))
+ sys.stdout.write("\r%d/%d %s %s" % (current, self.pkgcount, sep, pkg))
sys.stdout.flush()
- if basenode is None:
- continue
-
- for node, outfile in ((basenode,self.basefile),
- (filenode,self.flfile),
- (othernode,self.otherfile)):
- if node is None:
- break
- output = node.serialize('UTF-8', self.cmds['pretty'])
- outfile.write(output)
- outfile.write('\n')
- if not recycled:
- #recycled nodes can be multiply referenced
- node.unlinkNode()
- node.freeNode()
- if recycled:
- self.oldData.freeNodes(file)
return current
@@ -288,21 +290,18 @@ class MetaDataGenerator:
# save them up to the tmp locations:
if not self.cmds['quiet']:
print _('Saving Primary metadata')
- self.basefile.write('\n</metadata>')
- self.basefile.close()
- self.basedoc.freeDoc()
+ self.primaryfile.write('\n</metadata>')
+ self.primaryfile.close()
if not self.cmds['quiet']:
print _('Saving file lists metadata')
self.flfile.write('\n</filelists>')
self.flfile.close()
- self.filesdoc.freeDoc()
if not self.cmds['quiet']:
print _('Saving other metadata')
self.otherfile.write('\n</otherdata>')
self.otherfile.close()
- self.otherdoc.freeDoc()
def doRepoMetadata(self):
"""wrapper to generate the repomd.xml file that stores the info on the other files"""
@@ -313,8 +312,8 @@ class MetaDataGenerator:
repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['repomdfile'])
try:
- dumpMetadata.repoXML(reporoot, self.cmds)
- except dumpMetadata.MDError, e:
+ createrepo.repoXML(reporoot, self.cmds)
+ except MDError, e:
errorprint(_('Error generating repo xml file: %s') % e)
sys.exit(1)
diff --git a/readMetadata.py b/readMetadata.py
deleted file mode 100644
index 0d9dacf..0000000
--- a/readMetadata.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/python -t
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2006 Red Hat
-
-import os
-import sys
-import libxml2
-import stat
-
-def errorprint(stuff):
- print >> sys.stderr, stuff
-
-def _(args):
- """Stub function for translation"""
- return args
-
-class MetadataIndex(object):
-
- def __init__(self, outputdir, basefile, filelistfile, otherfile, opts=None):
- if opts is None:
- opts = {}
- self.opts = opts
- self.outputdir = outputdir
- self.files = {'base' : basefile,
- 'filelist' : filelistfile,
- 'other' : otherfile}
- self.scan()
-
- def scan(self):
- """Read in and index old repo data"""
- self.basenodes = {}
- self.filesnodes = {}
- self.othernodes = {}
- self.pkg_ids = {}
- if self.opts.get('verbose'):
- print _("Scanning old repo data")
- for file in self.files.values():
- if not os.path.exists(file):
- #cannot scan
- errorprint(_("Previous repo file missing: %s") % file)
- return
- root = libxml2.parseFile(self.files['base']).getRootElement()
- self._scanPackageNodes(root, self._handleBase)
- if self.opts.get('verbose'):
- print _("Indexed %i base nodes" % len(self.basenodes))
- root = libxml2.parseFile(self.files['filelist']).getRootElement()
- self._scanPackageNodes(root, self._handleFiles)
- if self.opts.get('verbose'):
- print _("Indexed %i filelist nodes" % len(self.filesnodes))
- root = libxml2.parseFile(self.files['other']).getRootElement()
- self._scanPackageNodes(root, self._handleOther)
- if self.opts.get('verbose'):
- print _("Indexed %i other nodes" % len(self.othernodes))
- #reverse index pkg ids to track references
- self.pkgrefs = {}
- for relpath, pkgid in self.pkg_ids.iteritems():
- self.pkgrefs.setdefault(pkgid,[]).append(relpath)
-
- def _scanPackageNodes(self, root, handler):
- node = root.children
- while node is not None:
- if node.type != "element":
- node = node.next
- continue
- if node.name == "package":
- handler(node)
- node = node.next
-
- def _handleBase(self, node):
- top = node
- node = node.children
- pkgid = None
- mtime = None
- size = None
- relpath = None
- while node is not None:
- if node.type != "element":
- node = node.next
- continue
- if node.name == "checksum":
- pkgid = node.content
- elif node.name == "time":
- mtime = int(node.prop('file'))
- elif node.name == "size":
- size = int(node.prop('package'))
- elif node.name == "location":
- relpath = node.prop('href')
- node = node.next
- if relpath is None:
- print _("Incomplete data for node")
- return
- if pkgid is None:
- print _("pkgid missing for %s") % relpath
- return
- if mtime is None:
- print _("mtime missing for %s") % relpath
- return
- if size is None:
- print _("size missing for %s") % relpath
- return
- filepath = os.path.join(self.opts['pkgdir'], relpath)
- try:
- st = os.stat(filepath)
- except OSError:
- #file missing -- ignore
- return
- if not stat.S_ISREG(st.st_mode):
- #ignore non files
- return
- #check size and mtime
- if st.st_size != size:
- if self.opts.get('verbose'):
- print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
- return
- if st.st_mtime != mtime:
- if self.opts.get('verbose'):
- print _("Modification time changed for %s") % filepath
- return
- #otherwise we index
- self.basenodes[relpath] = top
- self.pkg_ids[relpath] = pkgid
-
- def _handleFiles(self, node):
- pkgid = node.prop('pkgid')
- if pkgid:
- self.filesnodes[pkgid] = node
-
- def _handleOther(self, node):
- pkgid = node.prop('pkgid')
- if pkgid:
- self.othernodes[pkgid] = node
-
- def getNodes(self, relpath):
- """Return base, filelist, and other nodes for file, if they exist
-
- Returns a tuple of nodes, or None if not found
- """
- bnode = self.basenodes.get(relpath,None)
- if bnode is None:
- return None
- pkgid = self.pkg_ids.get(relpath,None)
- if pkgid is None:
- print _("No pkgid found for: %s") % relpath
- return None
- fnode = self.filesnodes.get(pkgid,None)
- if fnode is None:
- return None
- onode = self.othernodes.get(pkgid,None)
- if onode is None:
- return None
- return bnode, fnode, onode
-
- def freeNodes(self,relpath):
- #causing problems
- """Free up nodes corresponding to file, if possible"""
- bnode = self.basenodes.get(relpath,None)
- if bnode is None:
- print "Missing node for %s" % relpath
- return
- bnode.unlinkNode()
- bnode.freeNode()
- del self.basenodes[relpath]
- pkgid = self.pkg_ids.get(relpath,None)
- if pkgid is None:
- print _("No pkgid found for: %s") % relpath
- return None
- del self.pkg_ids[relpath]
- dups = self.pkgrefs.get(pkgid)
- dups.remove(relpath)
- if len(dups):
- #still referenced
- return
- del self.pkgrefs[pkgid]
- for nodes in self.filesnodes, self.othernodes:
- node = nodes.get(pkgid)
- if node is not None:
- node.unlinkNode()
- node.freeNode()
- del nodes[pkgid]
-
-
-if __name__ == "__main__":
- #test code - attempts to read a repo in working directory
- idx = MetadataIndex(".", "repodata/primary.xml.gz", "repodata/filelists.xml.gz",
- "repodata/other.xml.gz", {'verbose':1})
More information about the Rpm-metadata
mailing list