[Yum] yum performance

Dimitrios Apostolou jimis at gmx.net
Fri Aug 14 13:57:55 UTC 2009


On Thu, 13 Aug 2009, Dimitrios Apostolou wrote:

> What I'm thinking to do next is store the excluded PkgIds in a sqlite 
> temporary table and use JOINs for the queries that need excludes. That way 
> even more logic will be moved to SQL and we'll avoid even more the overhead 
> of converting from SQL to lists/dicts/tuples and back to SQL...

Please ignore the previous patches and see the one attached (it applies 
again to the original yum-3_2_X branch), it includes these changes. That 
is, for every repo a TEMP table "excludedIds" is created which has all the 
excluded packages. Then it is used in subsequent queries that need to take 
exclusions into account.

I haven't tested this one much because I currently don't have any updates 
pending. Could you try various glob's and tell me if exlcusions happen 
correctly?


Thanks,
Dimitris
-------------- next part --------------
diff --git a/yum/packageSack.py b/yum/packageSack.py
index b71356a..1f574b8 100644
--- a/yum/packageSack.py
+++ b/yum/packageSack.py
@@ -921,9 +921,8 @@ class PackageSack(PackageSackBase):
         """returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
         
         # Don't cache due to excludes
-        return [pkg.pkgtup for pkg in self.returnPackages(patterns=patterns,
-                                                          ignore_case=False)]
-                       
+        return self.returnPackageTuples(patterns=patterns, ignore_case=False)
+
     def printPackages(self):
         for pkg in self.returnPackages():
             print pkg
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 643f1f6..b06187d 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._excludes = set() # of (repo, pkgKey)
         self._exclude_whitelist = set() # of (repo, pkgKey)
         self._all_excludes = {}
+        self._excludedIds = None
         self._search_cache = {
             'provides' : { },
             'requires' : { },
@@ -424,6 +425,83 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._pkgExcludeIds = {}
         self._pkgobjlist_dirty = False
 
+    def _excludedIdsQuery(self):
+        """Return a list of pkgId that are excluded"""
+        
+        # TODO: sqlite GLOB is case sensitive so even though it's handy because of 
+        #	its wildcars, perhaps we should use LIKE and transform wildcards
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT pkgId FROM packages
+WHERE
+	NOT
+	(pkgName GLOB self._pkgExcluder[i][2].lower() 
+		(only if self._pkgExcluder[i][1]=="include.match")
+	)
+	AND
+	(
+		(repo = self._excludes[i][0] AND
+		pkgKey = self._excludes[i][1])
+		OR
+		repo IN (self._all_excludes[i])
+		OR
+		arch NOT IN (self._arch_allowed[i])
+		OR
+		(pkgName GLOB self._pkgExcluder[i][2].lower() 
+			(only if self._pkgExcluder[i][1]=="exclude.match")
+		)
+	)
+"""
+
+            import itertools
+
+            incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+            incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+            
+            excl_L=[]
+            # itertools.chain seems the most elegant way to flatten a nested list
+            excl_vars1= list(itertools.chain(*self._excludes))
+            excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+            if len(excl_vars1)>0:
+                excl_L+= [excl_q1]
+            excl_vars2= list(self._all_excludes)
+            excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2)  ) + ")"
+            if len(excl_vars2)>0:
+                excl_L+= [excl_q2]
+            excl_vars3= list(self._arch_allowed)
+            excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+            if len(excl_vars3)>0:
+                excl_L+= [excl_q3]
+            excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+            excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+            if len(excl_vars4)>0:
+                excl_L+= [excl_q4]
+            excl_q= " OR ".join(excl_L)
+            excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+            q= "INSERT INTO excludedIds SELECT pkgId FROM packages WHERE "
+            if len(incl_vars)>0 or len(excl_vars)>0:
+                if len(incl_vars)>0:
+                    q+= " NOT (" + incl_q1 + ")"
+                    if len(excl_vars)>0:
+                        q+= " AND "
+                if len(excl_vars)>0:
+                    q+= "(" + excl_q + ")"
+            else:
+                q+= "0"
+
+            return q, incl_vars+excl_vars
+
+        returnList=[]
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute("CREATE TEMP TABLE excludedIds (pkgId TEXT)")
+            cur = cache.execute(q, v)
+        return returnList
+
+
     @catchSqliteException
     def _sql_MD(self, MD, repo, sql, *args):
         """ Exec SQL against an MD of the repo, return a cursor. """
@@ -994,8 +1072,44 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 result.append((pkg, ob['total']))
         return result
         
-    @catchSqliteException
+#    @catchSqliteException
     def returnObsoletes(self, newest=False):
+        """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted 
+           packages"""
+        
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT 
+	packages.name, 
+	packages.arch, 
+	packages.epoch, 
+	packages.version, 
+	packages.release,
+	obsoletes.name,
+	obsoletes.flags,
+	obsoletes.epoch,
+	obsoletes.version,
+	obsoletes.release
+FROM packages, obsoletes
+WHERE
+	(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds))
+	AND
+	(obsoletes.pkgKey = packages.pkgKey)
+"""
+
+            excl_vars= self._excludedIds
+            excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+            q="SELECT packages.name, packages.arch, packages.epoch, "\
+                "packages.version, packages.release, obsoletes.name, "\
+                "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+                "obsoletes.release FROM packages, obsoletes WHERE "\
+                "(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds)) "\
+                " AND (obsoletes.pkgKey = packages.pkgKey)"
+
+            return q
+
         if self._skip_all():
             return {}
 
@@ -1003,32 +1117,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             raise NotImplementedError()
 
         obsoletes = {}
-        for (rep,cache) in self.primarydb.items():
-            cur = cache.cursor()
-            executeSQL(cur, "select packages.name as name,\
-                packages.pkgKey as pkgKey,\
-                packages.arch as arch, packages.epoch as epoch,\
-                packages.release as release, packages.version as version,\
-                obsoletes.name as oname, obsoletes.epoch as oepoch,\
-                obsoletes.release as orelease, obsoletes.version as oversion,\
-                obsoletes.flags as oflags\
-                from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
-            for ob in cur:
-                key = ( _share_data(ob['name']), _share_data(ob['arch']),
-                        _share_data(ob['epoch']), _share_data(ob['version']),
-                        _share_data(ob['release']))
-                if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
-                    continue
-
-                (n,f,e,v,r) = ( _share_data(ob['oname']),
-                                _share_data(ob['oflags']),
-                                _share_data(ob['oepoch']),
-                                _share_data(ob['oversion']),
-                                _share_data(ob['orelease']))
-
-                key = _share_data(key)
-                val = _share_data((n,f,(e,v,r)))
-                obsoletes.setdefault(key,[]).append(val)
+        q= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur= cache.execute(q)
+            results= cur.fetchall()
+            for l in results:
+                l= list(l)
+                obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
 
         return obsoletes
 
@@ -1512,7 +1608,37 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             self._pkgnames_loaded.update([po.name for po in returnList])
 
         return returnList
-                
+
+    def returnPackageTuples(self, ignore_case=False, patterns=None):
+        """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+        """
+        
+        # Where should I initialise the _excludedIds list??? Obviously here is not best...
+        if self._excludedIds is None:
+            self._excludedIds= self._excludedIdsQuery()
+
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT name, arch, epoch, version, release FROM packages
+WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)
+	    """
+
+            q= "SELECT name, arch, epoch, version, release FROM packages "\
+               "WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)"
+
+            return q
+
+        returnList=[]
+        q= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute(q)
+            returnList.extend(cur.fetchall())
+        return [tuple(i) for i in returnList]
+            
+        
+
     def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
         """Returns a list of packages, only containing nevra information. The
            packages are processed for excludes. Note that patterns is just


More information about the Yum mailing list