[Yum-devel] [PATCH] DMD: use pkgId to join filelists_db & primary_db.

Zdeněk Pavlas zpavlas at redhat.com
Thu Nov 8 11:31:54 UTC 2012


Yum relies too much on createrepo inner workings, assumes that pkgKeys
in filelists_db and primary_db are equal.  This only holds if databases
are always created from scratch and if <package> tags in filelists.xml
follow primary.xml order.  This has never been guaranteed, also with
delta-metadata and local updates, consider the following:

1) Yum downloads primary_db.sqlite with packages A, B.
   pkgKeys: A=1, B=2
2) repository changes: pkg A is removed, pkg C is added.
3) Yum downloads primary_delta.xml, updates primary_db.sqlite.
   pkgKeys: B=2, C=3 after the update
4) Yum needs filelists, downloads filelists_db.sqlite.
   pkgKeys: B=1, C=2

To reproduce:

$ sudo sqlite3 /var/cache/yum/fedora/gen/filelists_db.sqlite '
UPDATE packages SET pkgKey = pkgKey + 1000000;
UPDATE filelist SET pkgKey = pkgKey + 1000000;
'
$ sudo touch -r /var/cache/yum/fedora/*filelists* /var/cache/yum/fedora/gen/filelists_db.sqlite
$ yum provides /usr/share/mc
Error: pkgKey 1006963 doesn't exist in repo fedora

Seems there were only 3 queries that leak pkgKey from non-primary
DB and use it to query primary DB. _loadFiles() and _loadChangelog()
are fine, too.

Performance:

There's an extra JOIN needed to get pkgId, should be fairly
cheap.  Translating pkgId to pkgKey runs for free.
---
 yum/sqlitesack.py |   33 ++++++++++++++++++++++++++-------
 1 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index a955895..5e53bb8 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -438,6 +438,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             'requires' : { },
             }
         self._key2pkg = {}
+        self._id2key = {}
         self._pkgname2pkgkeys = {}
         self._pkgtup2pkgs = {}
         self._pkgnames_loaded = set()
@@ -504,6 +505,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             del self.pkgobjlist
         self._pkgobjlist_dirty = False
         self._key2pkg = {}
+        self._id2key = {}
         self._pkgname2pkgkeys = {}
         self._pkgnames_loaded = set()
         self._pkgmatch_fails = set()
@@ -825,13 +827,30 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         return self.searchPrco(name, 'provides')
 
     def _sql_pkgKey2po(self, repo, cur, pkgs=None, have_data=False):
-        """ Takes a cursor and maps the pkgKey rows into a list of packages. """
+        """ Takes a cursor and maps the pkgKey or pkgId rows into a list of packages. """
         if pkgs is None: pkgs = []
         for ob in cur:
-            if have_data:
-                pkg = self._packageByKeyData(repo, ob['pkgKey'], ob)
+            ob_has_data = have_data
+            try:
+                key = ob['pkgKey']
+            except IndexError: # yes, IndexError
+                pkgId = ob['pkgId']
+                try:
+                    key = self._id2key[repo][pkgId]
+                except KeyError:
+                    ob_has_data = True # piggybacked on pkgKey query
+                    ob = self._sql_MD('primary', repo, '''
+                        SELECT pkgKey, pkgId, name, epoch, version, release, arch
+                        FROM packages WHERE pkgId = ?''', (pkgId,)).fetchone()
+                    if ob is None:
+                        msg = "pkgId %s doesn't exist in repo %s" % (pkgId, repo)
+                        raise Errors.RepoError, msg
+                    key = ob['pkgKey']
+                    self._id2key.setdefault(repo, {})[pkgId] = key
+            if ob_has_data:
+                pkg = self._packageByKeyData(repo, key, ob)
             else:
-                pkg = self._packageByKey(repo, ob['pkgKey'])
+                pkg = self._packageByKey(repo, key)
             if pkg is None:
                 continue
             pkgs.append(pkg)
@@ -964,7 +983,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
 
                 cur = cache.cursor()
                 sql_params.append(dirname)
-                executeSQL(cur, """SELECT pkgKey FROM filelist
+                executeSQL(cur, """SELECT pkgId FROM filelist JOIN packages USING(pkgKey)
                                    WHERE dirname %s ?""" % (querytype,),
                            sql_params)
                 self._sql_pkgKey2po(rep, cur, pkgs)
@@ -979,7 +998,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
 
             # grab the entries that are a single file in the 
             # filenames section, use sqlites globbing if it is a glob
-            executeSQL(cur, "select pkgKey from filelist where \
+            executeSQL(cur, "SELECT pkgId FROM filelist JOIN packages USING(pkgKey) WHERE \
                     %s length(filetypes) = 1 and \
                     dirname || ? || filenames \
                     %s ?" % (dirname_check, querytype), sql_params + ['/',name])
@@ -1005,7 +1024,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             cache.create_function("filelist_globber", 2, filelist_globber)
             # for all the ones where filenames is multiple files, 
             # make the files up whole and use python's globbing method
-            executeSQL(cur, "select pkgKey from filelist where \
+            executeSQL(cur, "SELECT pkgId FROM filelist JOIN packages USING(pkgKey) WHERE \
                              %s length(filetypes) > 1 \
                              and filelist_globber(dirname,filenames)" % dirname_check,
                        sql_params)
-- 
1.7.4.4



More information about the Yum-devel mailing list