[Yum-devel] [PATCH] Use multiple applydeltarpm workers

Zdenek Pavlas zpavlas at redhat.com
Fri Feb 22 15:36:18 UTC 2013


Did some real-world testing.  Haven't been updating my F17
for some time, and now 88 updates are available.  Skipped
applying updates with --downloadonly so only the setup, DL,
and rebuild times are reported:

1) download RPMs only
Total 235 MB
real    0m35.433s
user    0m9.543s
sys 0m2.865s

2) use yum-presto-0.7.3-1.fc17.noarch
Presto reduced the update size by 88% (from 197 M to 25 M).
real    2m50.171s
user    2m1.482s
sys 0m4.633s

3) use the new drpm code
Delta RPMs reduced 197 M of updates to 25 M (87% saved)

1 worker:
real	2m17.530s
user	2m6.819s
sys	0m3.539s

2 workers:
real	1m21.588s
user	2m20.474s
sys	0m2.965s

4 workers:
real	1m10.760s
user	2m40.738s
sys	0m3.699s

6 workers:
real	1m23.832s
user	4m33.390s
sys	0m6.313s

There's only a little gain in parallelizing downloads, since
I use a single fast local mirror and only one concurrent
connection is used.  Spawning more workers helps a lot.
---
 docs/yum.conf.5 |  5 ++---
 yum/__init__.py |  1 +
 yum/config.py   |  2 +-
 yum/presto.py   | 35 ++++++++++++++++++++++++++---------
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/docs/yum.conf.5 b/docs/yum.conf.5
index 93cb297..4897dd7 100644
--- a/docs/yum.conf.5
+++ b/docs/yum.conf.5
@@ -374,9 +374,8 @@ and the downloader honors these too.
 .IP
 \fBpresto\fR
 
-Either `0' or `1'. Set this to `1' to use delta-RPM files, if available.
-This reduces the download size of updates significantly, but local rebuild
-is CPU intensive.  Default is `1' (on).
+When non-zero, delta-RPM files are used if available.  The value specifies
+the maximum number of "applydeltarpm" processes Yum will spawn. (8 by default).
 
 .IP
 \fBsslcacert \fR
diff --git a/yum/__init__.py b/yum/__init__.py
index 01f038c..f03bcb8 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -2320,6 +2320,7 @@ much more problems).
                     adderror(po, exception2msg(e))
             if async:
                 urlgrabber.grabber.parallel_wait()
+            presto.wait()
 
             if hasattr(urlgrabber.progress, 'text_meter_total_size'):
                 urlgrabber.progress.text_meter_total_size(0)
diff --git a/yum/config.py b/yum/config.py
index d279ab3..4aae831 100644
--- a/yum/config.py
+++ b/yum/config.py
@@ -791,7 +791,7 @@ class YumConf(StartupConf):
             allowed = ('ipv4', 'ipv6', 'whatever'),
             mapper  = {'4': 'ipv4', '6': 'ipv6'})
     max_connections = IntOption(0)
-    presto = BoolOption(True)
+    presto = IntOption(4)
 
     http_caching = SelectionOption('all', ('none', 'packages', 'all'))
     metadata_expire = SecondsOption(60 * 60 * 6) # Time in seconds (6h).
diff --git a/yum/presto.py b/yum/presto.py
index 1805974..cb33918 100644
--- a/yum/presto.py
+++ b/yum/presto.py
@@ -23,7 +23,7 @@ from yum.i18n import exception2msg, _
 from urlgrabber import grabber
 async = hasattr(grabber, 'parallel_wait')
 from xml.etree.cElementTree import iterparse
-import os, gzip, subprocess
+import os, gzip
 
 class Presto:
     def __init__(self, ayum, pkgs):
@@ -32,6 +32,8 @@ class Presto:
         self._rpmsave = {}
         self.rpmsize = 0
         self.deltasize = 0
+        self.jobs = {}
+        self.limit = ayum.conf.presto
 
         # calculate update sizes
         pinfo = {}
@@ -41,6 +43,7 @@ class Presto:
                 continue
             if po.state != TS_UPDATE and po.name not in ayum.conf.installonlypkgs:
                 continue
+            self.limit = max(self.limit, po.repo.presto)
             pinfo.setdefault(po.repo, {})[po.pkgtup] = po
             reposize[po.repo] = reposize.get(po.repo, 0) + po.size
 
@@ -118,17 +121,31 @@ class Presto:
         po.packagesize, po.relativepath, po.localpath = self._rpmsave.pop(po)
         del po.returnIdSum
 
+    def wait(self, limit = 1):
+        # wait for some jobs, run callbacks
+        while len(self.jobs) >= limit:
+            pid, code = os.wait()
+            # urlgrabber spawns child jobs, too.  But they exit synchronously,
+            # so we should never see an unknown pid here.
+            assert pid in self.jobs
+            callback = self.jobs.pop(pid)
+            callback(code)
+
     def rebuild(self, po, adderror):
         # restore rpm values
         deltapath = po.localpath
         po.packagesize, po.relativepath, po.localpath = self._rpmsave.pop(po)
         del po.returnIdSum
 
-        # rebuild it from drpm
-        if subprocess.call(['/usr/bin/applydeltarpm', deltapath, po.localpath]) != 0:
-            return adderror(po, _('Delta RPM rebuild failed'))
-        # source drpm was already checksummed.. is this necessary?
-        if not po.verifyLocalPkg():
-            return adderror(po, _('Checksum of the delta-rebuilt RPM failed'))
-        # no need to keep this
-        os.unlink(deltapath)
+        # this runs when worker finishes
+        def callback(code):
+            if code != 0:
+                return adderror(po, _('Delta RPM rebuild failed'))
+            if not po.verifyLocalPkg():
+                return adderror(po, _('Checksum of the delta-rebuilt RPM failed'))
+            os.unlink(deltapath)
+
+        # spawn a worker process
+        self.wait(self.limit)
+        pid = os.spawnl(os.P_NOWAIT, '/usr/bin/applydeltarpm', 'applydeltarpm', deltapath, po.localpath)
+        self.jobs[pid] = callback
-- 
1.7.11.7



More information about the Yum-devel mailing list