[Yum] [PATCH 4 of 4] hard link packages between local caches
Daniel P. Berrange
berrange at redhat.com
Wed May 30 18:15:40 UTC 2007
When syncing multiple repositories across many architectures there will be
a fairly large number of duplicate RPMs present in all repositories. This
is particularly true of x86_64 multilib distros which share most of the
i386 packages. Downloading many GB of packages for x86_64 repo which are
already present in a i386 synced repo is wasteful of time, disk space and
bandwidth. So this patch adds an extra argument --package-cache which takes
a directory path. This argument can be repeated multiple times. Before
downloading a package, reposync will check each of these directories to see
if they already contain a copy of the package. If so, the local package will
be hard-linked into the destdir.
Typical usage would be to list the i386 repo cache, when running a sync of
the x86_64 repository, and vica-verca.
To give an idea of the saving, Fedora rawhide currently has ~7400 rpms in
i386 repos, and 13,000 in x86_64 trees. ~6900 of these RPMs where present
in both trees. Hardlinking saved 8 GB of disk space & avoid 8 GB of file
downloads :-)
Dan
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
-------------- next part --------------
diff -r 09e3be327b9e reposync.py
--- a/reposync.py Wed May 30 13:26:50 2007 -0400
+++ b/reposync.py Wed May 30 13:26:53 2007 -0400
@@ -103,6 +103,8 @@ def parseArgs():
help="Use a temp dir for storing/accessing yum-cache")
parser.add_option("-d", "--delete", default=False, action="store_true",
help="delete local packages no longer present in repository")
+ parser.add_option("-k", "--package-cache", default=[], dest='pkgcache', action='append',
+ help="additional directory to search for pre-existing packages")
parser.add_option("-p", "--download_path", dest='destdir',
default=os.getcwd(), help="Path to download packages to: defaults to current dir")
parser.add_option("-g", "--gpgcheck", default=False, action="store_true",
@@ -135,6 +137,14 @@ def main():
my = RepoSync(opts=opts)
my.doConfigSetup(fn=opts.config, init_plugins=False)
+ # Populate cache of existing download RPMs from other
+ # repositories we can link to
+ pkgcache = {}
+ for dir in opts.pkgcache:
+ cache = localpkgs(dir)
+ for k in cache.keys():
+ pkgcache[k] = cache[k]
+
# Force unprivileged users to have a private temporary cachedir
# if they've not given an explicit cachedir
if os.getuid() != 0 and not opts.cachedir:
@@ -183,7 +193,18 @@ def main():
download_list = list(reposack)
local_repo_path = opts.destdir + '/' + repo.id
- if opts.delete and os.path.exists(local_repo_path):
+ # make sure the repo subdir is here before we go on.
+ if not os.path.exists(local_repo_path):
+ try:
+ os.makedirs(local_repo_path)
+ except IOError, e:
+ my.logger.error("Could not make repo subdir: %s" % e)
+ my.closeRpmDB()
+ sys.exit(1)
+
+ # Check if there's any local files no longer on the remote
+ # repo which need purging
+ if opts.delete:
current_pkgs = localpkgs(local_repo_path)
download_set = {}
@@ -199,6 +220,7 @@ def main():
if not opts.quiet:
my.logger.info("Removing obsolete %s", pkg)
os.unlink(current_pkgs[pkg]['path'])
+
download_list.sort(sortPkgObj)
n = 0
@@ -206,47 +228,63 @@ def main():
n = n + 1
repo = my.repos.getRepo(pkg.repoid)
remote = pkg.returnSimple('relativepath')
+ rpmname = os.path.basename(remote)
local = local_repo_path + '/' + remote
localdir = os.path.dirname(local)
+ pkg.localpath = local # Hack: to set the localpath we want.
if not os.path.exists(localdir):
os.makedirs(localdir)
- if (os.path.exists(local) and
+ # If we have a local RPM with same name, and it is
+ # on the same storage device, and it has same size
+ # then we can hardlink it into local dir.
+ if (not os.path.exists(local) and
+ pkgcache.has_key(rpmname) and
+ os.stat(local_repo_path).st_dev == pkgcache[rpmname]['device'] and
+ pkgcache[rpmname]['size'] == int(pkg.returnSimple('packagesize'))):
+
+ if not opts.quiet:
+ my.logger.info("[%s: %-5d of %-5d ] Linking existing %s" % (repo.id, n, len(download_list), remote))
+ os.link(pkgcache[rpmname]['path'], local)
+
+ # Optionally check gpg signature of local package
+ if os.path.exists(local) and opts.gpgcheck:
+ result, error = my.sigCheckPkg(pkg)
+ if result != 0:
+ if not opts.quiet:
+ my.logger.error("[%s: %-5d of %-5d ] Removing non-matching %s" % (repo.id, n, len(download_list), remote))
+ os.unlink(local)
+
+ # If we have a local pkg with same name, check its size
+ if (os.path.exists(local) and
str(os.path.getsize(local)) == pkg.returnSimple('packagesize')):
if not opts.quiet:
my.logger.error("[%s: %-5d of %-5d ] Skipping existing %s" % (repo.id, n, len(download_list), remote))
continue
-
+
+ # If we're just printing URLs, skip to next repo
if opts.urls:
url = urljoin(repo.urls[0],remote)
print '%s' % url
continue
- # make sure the repo subdir is here before we go on.
- if not os.path.exists(local_repo_path):
- try:
- os.makedirs(local_repo_path)
- except IOError, e:
- my.logger.error("Could not make repo subdir: %s" % e)
- my.closeRpmDB()
- sys.exit(1)
# Disable cache otherwise things won't download
repo.cache = 0
if not opts.quiet:
my.logger.info( '[%s: %-5d of %-5d ] Downloading %s' % (repo.id, n, len(download_list), remote))
- pkg.localpath = local # Hack: to set the localpath we want.
+
path = repo.getPackage(pkg)
if opts.gpgcheck:
result, error = my.sigCheckPkg(pkg)
if result != 0:
if result == 1:
- my.logger.warning('Removing %s, due to missing GPG key.' % os.path.basename(remote))
+ my.logger.warning('Removing %s, due to missing GPG key.' % rpmname)
elif result == 2:
- my.logger.warning('Removing %s due to failed signature check.' % os.path.basename(remote))
+ my.logger.warning('Removing %s due to failed signature check.' % rpmname)
else:
- my.logger.warning('Removing %s due to failed signature check: %s' % (os.path.basename(remote), error))
+ my.logger.warning('Removing %s due to failed signature check: %s' % (rpmname, error))
os.unlink(path)
continue
More information about the Yum
mailing list