[Yum-devel] [PATCH] DMD: use pkgId to join filelists_db & primary_db.
Zdeněk Pavlas
zpavlas at redhat.com
Thu Nov 8 11:31:54 UTC 2012
Yum relies too much on createrepo inner workings, assumes that pkgKeys
in filelists_db and primary_db are equal. This only holds if databases
are always created from scratch and if <package> tags in filelists.xml
follow primary.xml order. This has never been guaranteed, also with
delta-metadata and local updates, consider the following:
1) Yum downloads primary_db.sqlite with packages A, B.
pkgKeys: A=1, B=2
2) repository changes: pkg A is removed, pkg C is added.
3) Yum downloads primary_delta.xml, updates primary_db.sqlite.
pkgKeys: B=2, C=3 after the update
4) Yum needs filelists, downloads filelists_db.sqlite.
pkgKeys: B=1, C=2
To reproduce:
$ sudo sqlite3 /var/cache/yum/fedora/gen/filelists_db.sqlite '
UPDATE packages SET pkgKey = pkgKey + 1000000;
UPDATE filelist SET pkgKey = pkgKey + 1000000;
'
$ sudo touch -r /var/cache/yum/fedora/*filelists* /var/cache/yum/fedora/gen/filelists_db.sqlite
$ yum provides /usr/share/mc
Error: pkgKey 1006963 doesn't exist in repo fedora
Seems there were only 3 queries that leak pkgKey from non-primary
DB and use it to query primary DB. _loadFiles() and _loadChangelog()
are fine, too.
Performance:
There's an extra JOIN needed to get pkgId, should be fairly
cheap. Translating pkgId to pkgKey runs for free.
---
yum/sqlitesack.py | 33 ++++++++++++++++++++++++++-------
1 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index a955895..5e53bb8 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -438,6 +438,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
'requires' : { },
}
self._key2pkg = {}
+ self._id2key = {}
self._pkgname2pkgkeys = {}
self._pkgtup2pkgs = {}
self._pkgnames_loaded = set()
@@ -504,6 +505,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
del self.pkgobjlist
self._pkgobjlist_dirty = False
self._key2pkg = {}
+ self._id2key = {}
self._pkgname2pkgkeys = {}
self._pkgnames_loaded = set()
self._pkgmatch_fails = set()
@@ -825,13 +827,30 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
return self.searchPrco(name, 'provides')
def _sql_pkgKey2po(self, repo, cur, pkgs=None, have_data=False):
- """ Takes a cursor and maps the pkgKey rows into a list of packages. """
+ """ Takes a cursor and maps the pkgKey or pkgId rows into a list of packages. """
if pkgs is None: pkgs = []
for ob in cur:
- if have_data:
- pkg = self._packageByKeyData(repo, ob['pkgKey'], ob)
+ ob_has_data = have_data
+ try:
+ key = ob['pkgKey']
+ except IndexError: # yes, IndexError
+ pkgId = ob['pkgId']
+ try:
+ key = self._id2key[repo][pkgId]
+ except KeyError:
+ ob_has_data = True # piggybacked on pkgKey query
+ ob = self._sql_MD('primary', repo, '''
+ SELECT pkgKey, pkgId, name, epoch, version, release, arch
+ FROM packages WHERE pkgId = ?''', (pkgId,)).fetchone()
+ if ob is None:
+ msg = "pkgId %s doesn't exist in repo %s" % (pkgId, repo)
+ raise Errors.RepoError, msg
+ key = ob['pkgKey']
+ self._id2key.setdefault(repo, {})[pkgId] = key
+ if ob_has_data:
+ pkg = self._packageByKeyData(repo, key, ob)
else:
- pkg = self._packageByKey(repo, ob['pkgKey'])
+ pkg = self._packageByKey(repo, key)
if pkg is None:
continue
pkgs.append(pkg)
@@ -964,7 +983,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
cur = cache.cursor()
sql_params.append(dirname)
- executeSQL(cur, """SELECT pkgKey FROM filelist
+ executeSQL(cur, """SELECT pkgId FROM filelist JOIN packages USING(pkgKey)
WHERE dirname %s ?""" % (querytype,),
sql_params)
self._sql_pkgKey2po(rep, cur, pkgs)
@@ -979,7 +998,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
# grab the entries that are a single file in the
# filenames section, use sqlites globbing if it is a glob
- executeSQL(cur, "select pkgKey from filelist where \
+ executeSQL(cur, "SELECT pkgId FROM filelist JOIN packages USING(pkgKey) WHERE \
%s length(filetypes) = 1 and \
dirname || ? || filenames \
%s ?" % (dirname_check, querytype), sql_params + ['/',name])
@@ -1005,7 +1024,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
cache.create_function("filelist_globber", 2, filelist_globber)
# for all the ones where filenames is multiple files,
# make the files up whole and use python's globbing method
- executeSQL(cur, "select pkgKey from filelist where \
+ executeSQL(cur, "SELECT pkgId FROM filelist JOIN packages USING(pkgKey) WHERE \
%s length(filetypes) > 1 \
and filelist_globber(dirname,filenames)" % dirname_check,
sql_params)
--
1.7.4.4
More information about the Yum-devel
mailing list