[Yum] yum performance
Dimitrios Apostolou
jimis at gmx.net
Fri Aug 14 13:57:55 UTC 2009
On Thu, 13 Aug 2009, Dimitrios Apostolou wrote:
> What I'm thinking to do next is store the excluded PkgIds in a sqlite
> temporary table and use JOINs for the queries that need excludes. That way
> even more logic will be moved to SQL and we'll avoid even more the overhead
> of converting from SQL to lists/dicts/tuples and back to SQL...
Please ignore the previous patches and see the one attached (it applies
again to the original yum-3_2_X branch), it includes these changes. That
is, for every repo a TEMP table "excludedIds" is created which has all the
excluded packages. Then it is used in subsequent queries that need to take
exclusions into account.
I haven't tested this one much because I currently don't have any updates
pending. Could you try various glob's and tell me if exlcusions happen
correctly?
Thanks,
Dimitris
-------------- next part --------------
diff --git a/yum/packageSack.py b/yum/packageSack.py
index b71356a..1f574b8 100644
--- a/yum/packageSack.py
+++ b/yum/packageSack.py
@@ -921,9 +921,8 @@ class PackageSack(PackageSackBase):
"""returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
# Don't cache due to excludes
- return [pkg.pkgtup for pkg in self.returnPackages(patterns=patterns,
- ignore_case=False)]
-
+ return self.returnPackageTuples(patterns=patterns, ignore_case=False)
+
def printPackages(self):
for pkg in self.returnPackages():
print pkg
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 643f1f6..b06187d 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._excludes = set() # of (repo, pkgKey)
self._exclude_whitelist = set() # of (repo, pkgKey)
self._all_excludes = {}
+ self._excludedIds = None
self._search_cache = {
'provides' : { },
'requires' : { },
@@ -424,6 +425,83 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._pkgExcludeIds = {}
self._pkgobjlist_dirty = False
+ def _excludedIdsQuery(self):
+ """Return a list of pkgId that are excluded"""
+
+ # TODO: sqlite GLOB is case sensitive so even though it's handy because of
+ # its wildcars, perhaps we should use LIKE and transform wildcards
+ def buildQuery():
+ """Build a query in the following form:
+
+SELECT pkgId FROM packages
+WHERE
+ NOT
+ (pkgName GLOB self._pkgExcluder[i][2].lower()
+ (only if self._pkgExcluder[i][1]=="include.match")
+ )
+ AND
+ (
+ (repo = self._excludes[i][0] AND
+ pkgKey = self._excludes[i][1])
+ OR
+ repo IN (self._all_excludes[i])
+ OR
+ arch NOT IN (self._arch_allowed[i])
+ OR
+ (pkgName GLOB self._pkgExcluder[i][2].lower()
+ (only if self._pkgExcluder[i][1]=="exclude.match")
+ )
+ )
+"""
+
+ import itertools
+
+ incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+ incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+
+ excl_L=[]
+ # itertools.chain seems the most elegant way to flatten a nested list
+ excl_vars1= list(itertools.chain(*self._excludes))
+ excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+ if len(excl_vars1)>0:
+ excl_L+= [excl_q1]
+ excl_vars2= list(self._all_excludes)
+ excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2) ) + ")"
+ if len(excl_vars2)>0:
+ excl_L+= [excl_q2]
+ excl_vars3= list(self._arch_allowed)
+ excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+ if len(excl_vars3)>0:
+ excl_L+= [excl_q3]
+ excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+ excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+ if len(excl_vars4)>0:
+ excl_L+= [excl_q4]
+ excl_q= " OR ".join(excl_L)
+ excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+ q= "INSERT INTO excludedIds SELECT pkgId FROM packages WHERE "
+ if len(incl_vars)>0 or len(excl_vars)>0:
+ if len(incl_vars)>0:
+ q+= " NOT (" + incl_q1 + ")"
+ if len(excl_vars)>0:
+ q+= " AND "
+ if len(excl_vars)>0:
+ q+= "(" + excl_q + ")"
+ else:
+ q+= "0"
+
+ return q, incl_vars+excl_vars
+
+ returnList=[]
+ (q,v)= buildQuery()
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur = cache.execute("CREATE TEMP TABLE excludedIds (pkgId TEXT)")
+ cur = cache.execute(q, v)
+ return returnList
+
+
@catchSqliteException
def _sql_MD(self, MD, repo, sql, *args):
""" Exec SQL against an MD of the repo, return a cursor. """
@@ -994,8 +1072,44 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
result.append((pkg, ob['total']))
return result
- @catchSqliteException
+# @catchSqliteException
def returnObsoletes(self, newest=False):
+ """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted
+ packages"""
+
+ def buildQuery():
+ """Build a query in the following form:
+
+SELECT
+ packages.name,
+ packages.arch,
+ packages.epoch,
+ packages.version,
+ packages.release,
+ obsoletes.name,
+ obsoletes.flags,
+ obsoletes.epoch,
+ obsoletes.version,
+ obsoletes.release
+FROM packages, obsoletes
+WHERE
+ (packages.pkgId NOT IN (SELECT pkgId FROM excludedIds))
+ AND
+ (obsoletes.pkgKey = packages.pkgKey)
+"""
+
+ excl_vars= self._excludedIds
+ excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+ q="SELECT packages.name, packages.arch, packages.epoch, "\
+ "packages.version, packages.release, obsoletes.name, "\
+ "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+ "obsoletes.release FROM packages, obsoletes WHERE "\
+ "(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds)) "\
+ " AND (obsoletes.pkgKey = packages.pkgKey)"
+
+ return q
+
if self._skip_all():
return {}
@@ -1003,32 +1117,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
raise NotImplementedError()
obsoletes = {}
- for (rep,cache) in self.primarydb.items():
- cur = cache.cursor()
- executeSQL(cur, "select packages.name as name,\
- packages.pkgKey as pkgKey,\
- packages.arch as arch, packages.epoch as epoch,\
- packages.release as release, packages.version as version,\
- obsoletes.name as oname, obsoletes.epoch as oepoch,\
- obsoletes.release as orelease, obsoletes.version as oversion,\
- obsoletes.flags as oflags\
- from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
- for ob in cur:
- key = ( _share_data(ob['name']), _share_data(ob['arch']),
- _share_data(ob['epoch']), _share_data(ob['version']),
- _share_data(ob['release']))
- if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
- continue
-
- (n,f,e,v,r) = ( _share_data(ob['oname']),
- _share_data(ob['oflags']),
- _share_data(ob['oepoch']),
- _share_data(ob['oversion']),
- _share_data(ob['orelease']))
-
- key = _share_data(key)
- val = _share_data((n,f,(e,v,r)))
- obsoletes.setdefault(key,[]).append(val)
+ q= buildQuery()
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur= cache.execute(q)
+ results= cur.fetchall()
+ for l in results:
+ l= list(l)
+ obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
return obsoletes
@@ -1512,7 +1608,37 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._pkgnames_loaded.update([po.name for po in returnList])
return returnList
-
+
+ def returnPackageTuples(self, ignore_case=False, patterns=None):
+ """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+ """
+
+ # Where should I initialise the _excludedIds list??? Obviously here is not best...
+ if self._excludedIds is None:
+ self._excludedIds= self._excludedIdsQuery()
+
+ def buildQuery():
+ """Build a query in the following form:
+
+SELECT name, arch, epoch, version, release FROM packages
+WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)
+ """
+
+ q= "SELECT name, arch, epoch, version, release FROM packages "\
+ "WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)"
+
+ return q
+
+ returnList=[]
+ q= buildQuery()
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur = cache.execute(q)
+ returnList.extend(cur.fetchall())
+ return [tuple(i) for i in returnList]
+
+
+
def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
"""Returns a list of packages, only containing nevra information. The
packages are processed for excludes. Note that patterns is just
More information about the Yum
mailing list