[Yum] yum performance

Dimitrios Apostolou jimis at gmx.net
Mon Aug 17 20:56:11 UTC 2009


On Mon, 17 Aug 2009, James Antill wrote:

> James Antill <james-yum at and.org> writes:
>
>>  If you just
>> stop the package creation, does that help? -- ie. have simplePkgList()
>> return the pkgtups without creating package objects first?
>
> I've just posted a patch to the yum-devel-list for review, which does
> this. It makes a significant difference to update and check-update,
> if you have time testing that on your machines and posting results to
> yum-devel-list would be great.

I'll post here because I just subscribed to that list and I can't reply to 
the thread.

I just tried your committed changes and indeed it makes a big difference. 
Thanks for incorporating these ideas. The average times I see for 
check-update are about the following:

 		without-exclude	with-exclude
yum-original	26s		24s
yum-james	15s		20s
yum-jimis	12s		11s

For excludes I used "-x '*p*'" which is probably an extreme case. I also 
included my version just out of curiosity. I attach the diff for the 
version I used, which includes small doc fixes and overrides properly the 
simplePkgList() method as you suggested.


Dimitris

>
> -- 
> James Antill -- james at and.org
> _______________________________________________
> Yum mailing list
> Yum at lists.baseurl.org
> http://lists.baseurl.org/mailman/listinfo/yum
>
-------------- next part --------------
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 3f9dc37..f88ab0d 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._excludes = set() # of (repo, pkgKey)
         self._exclude_whitelist = set() # of (repo, pkgKey)
         self._all_excludes = {}
+        self._excludedIds = None
         self._search_cache = {
             'provides' : { },
             'requires' : { },
@@ -424,6 +425,85 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._pkgExcludeIds = {}
         self._pkgobjlist_dirty = False
 
+    def _excludedIdsQuery(self):
+        """Creates a table "excludedIds in each repo database that contains
+           all pkgIds of excluded packages. Currently it returns an empty list FIXME"""
+        
+        # TODO: sqlite GLOB is case sensitive so even though it's handy because of 
+        #	its wildcars, perhaps we should use LIKE and transform wildcards
+        def buildQuery():
+            """Return a query that inserts into a new table the excluded 
+               packages as given from the following query:
+
+SELECT pkgId FROM packages
+WHERE
+	NOT
+	(pkgName GLOB self._pkgExcluder[i][2].lower() 
+		(only if self._pkgExcluder[i][1]=="include.match")
+	)
+	AND
+	(
+		(repo = self._excludes[i][0] AND
+		pkgKey = self._excludes[i][1])
+		OR
+		repo IN (self._all_excludes[i])
+		OR
+		arch NOT IN (self._arch_allowed[i])
+		OR
+		(pkgName GLOB self._pkgExcluder[i][2].lower() 
+			(only if self._pkgExcluder[i][1]=="exclude.match")
+		)
+	)
+"""
+
+            import itertools
+
+            incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+            incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+            
+            excl_L=[]
+            # itertools.chain seems the most elegant way to flatten a nested list
+            excl_vars1= list(itertools.chain(*self._excludes))
+            excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+            if len(excl_vars1)>0:
+                excl_L+= [excl_q1]
+            excl_vars2= list(self._all_excludes)
+            excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2)  ) + ")"
+            if len(excl_vars2)>0:
+                excl_L+= [excl_q2]
+            excl_vars3= list(self._arch_allowed)
+            excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+            if len(excl_vars3)>0:
+                excl_L+= [excl_q3]
+            excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+            excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+            if len(excl_vars4)>0:
+                excl_L+= [excl_q4]
+            excl_q= " OR ".join(excl_L)
+            excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+            q= "INSERT INTO excludedIds SELECT pkgId FROM packages WHERE "
+            if len(incl_vars)>0 or len(excl_vars)>0:
+                if len(incl_vars)>0:
+                    q+= " NOT (" + incl_q1 + ")"
+                    if len(excl_vars)>0:
+                        q+= " AND "
+                if len(excl_vars)>0:
+                    q+= "(" + excl_q + ")"
+            else:
+                q+= "0"
+
+            return q, incl_vars+excl_vars
+
+        returnList=[]
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute("CREATE TEMP TABLE excludedIds (pkgId TEXT)")
+            cur = cache.execute(q, v)
+        return returnList
+
+
     @catchSqliteException
     def _sql_MD(self, MD, repo, sql, *args):
         """ Exec SQL against an MD of the repo, return a cursor. """
@@ -725,19 +805,6 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             pkgkeys.append(pkgKey)
         return self._key2pkg[repo][data['pkgKey']]
 
-    def _pkgtupByKeyData(self, repo, pkgKey, data):
-        """ Like _packageByKeyData() but we don't create the package, we just
-            return the pkgtup. """
-        if self._pkgExcludedRKD(repo, pkgKey, data):
-            return None
-        if repo not in self._key2pkg:
-            self._key2pkg[repo] = {}
-            self._pkgname2pkgkeys[repo] = {}
-        if data['pkgKey'] in self._key2pkg.get(repo, {}):
-            return self._key2pkg[repo][data['pkgKey']].pkgtup
-        return (data['name'], data['arch'],
-                data['epoch'], data['version'], data['release'])
-
     def _packagesByName(self, pkgname):
         """ Load all pkgnames from cache, with a given name. """
         ret = []
@@ -1007,8 +1074,44 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 result.append((pkg, ob['total']))
         return result
         
-    @catchSqliteException
+#    @catchSqliteException
     def returnObsoletes(self, newest=False):
+        """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted 
+           packages, minus excludes"""
+        
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT 
+	packages.name, 
+	packages.arch, 
+	packages.epoch, 
+	packages.version, 
+	packages.release,
+	obsoletes.name,
+	obsoletes.flags,
+	obsoletes.epoch,
+	obsoletes.version,
+	obsoletes.release
+FROM packages, obsoletes
+WHERE
+	(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds))
+	AND
+	(obsoletes.pkgKey = packages.pkgKey)
+"""
+
+            excl_vars= self._excludedIds
+            excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+            q="SELECT packages.name, packages.arch, packages.epoch, "\
+                "packages.version, packages.release, obsoletes.name, "\
+                "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+                "obsoletes.release FROM packages, obsoletes WHERE "\
+                "(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds)) "\
+                " AND (obsoletes.pkgKey = packages.pkgKey)"
+
+            return q
+
         if self._skip_all():
             return {}
 
@@ -1016,32 +1119,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             raise NotImplementedError()
 
         obsoletes = {}
-        for (rep,cache) in self.primarydb.items():
-            cur = cache.cursor()
-            executeSQL(cur, "select packages.name as name,\
-                packages.pkgKey as pkgKey,\
-                packages.arch as arch, packages.epoch as epoch,\
-                packages.release as release, packages.version as version,\
-                obsoletes.name as oname, obsoletes.epoch as oepoch,\
-                obsoletes.release as orelease, obsoletes.version as oversion,\
-                obsoletes.flags as oflags\
-                from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
-            for ob in cur:
-                key = ( _share_data(ob['name']), _share_data(ob['arch']),
-                        _share_data(ob['epoch']), _share_data(ob['version']),
-                        _share_data(ob['release']))
-                if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
-                    continue
-
-                (n,f,e,v,r) = ( _share_data(ob['oname']),
-                                _share_data(ob['oflags']),
-                                _share_data(ob['oepoch']),
-                                _share_data(ob['oversion']),
-                                _share_data(ob['orelease']))
-
-                key = _share_data(key)
-                val = _share_data((n,f,(e,v,r)))
-                obsoletes.setdefault(key,[]).append(val)
+        q= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur= cache.execute(q)
+            results= cur.fetchall()
+            for l in results:
+                l= list(l)
+                obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
 
         return obsoletes
 
@@ -1447,13 +1532,16 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         unmatched = misc.unique(unmatched)
         return exactmatch, matched, unmatched
 
-    def _setupPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
-        """Setup need_full and patterns for _yieldSQLDataList, also see if
-           we can get away with just using searchNames(). """
+    @catchSqliteException
+    def _buildPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
+        """Builds a list of packages, only containing nevra information. No
+           excludes are done at this stage. """
 
         if patterns is None:
             patterns = []
 
+        returnList = []
+        
         fields = ['name', 'sql_nameArch', 'sql_nameVerRelArch',
                   'sql_nameVer', 'sql_nameVerRel',
                   'sql_envra', 'sql_nevra']
@@ -1481,14 +1569,8 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 else:
                     tmp.append((pat, '='))
             if not need_full and not need_glob and patterns:
-                return (need_full, patterns, fields, True)
+                return self.searchNames(patterns)
             patterns = tmp
-        return (need_full, patterns, fields, False)
-
-    @catchSqliteException
-    def _yieldSQLDataList(self, repoid, patterns, fields, ignore_case):
-        """Yields all the package data for the given params. Excludes are done
-           at this stage. """
 
         for (repo,cache) in self.primarydb.items():
             if (repoid == None or repoid == repo.id):
@@ -1509,26 +1591,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 if pat_sqls:
                     qsql = _FULL_PARSE_QUERY_BEG + " OR ".join(pat_sqls)
                 executeSQL(cur, qsql, pat_data)
+                #  Note: If we are building the pkgobjlist, we don't exclude
+                # here, so that we can un-exclude later on ... if that matters.
                 for x in cur:
-                    yield (repo, x)
-
-    def _buildPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
-        """Builds a list of packages, only containing nevra information.
-           Excludes are done at this stage. """
-
-        returnList = []
-
-        data = self._setupPkgObjList(repoid, patterns, ignore_case)
-        (need_full, patterns, fields, names) = data
-        if names:
-            return self.searchNames(patterns)
-
-        for (repo, x) in self._yieldSQLDataList(repoid, patterns, fields,
-                                                ignore_case):
-            po = self._packageByKeyData(repo, x['pkgKey'], x)
-            if po is None:
-                continue
-            returnList.append(po)
+                    exclude = not patterns
+                    if True: # NOTE: Can't unexclude things...
+                        exclude = True
+                    po = self._packageByKeyData(repo, x['pkgKey'], x,
+                                                exclude=exclude)
+                    if po is None:
+                        continue
+                    returnList.append(po)
         if not patterns and repoid is None:
             self.pkgobjlist = returnList
             self._pkgnames_loaded = set() # Save memory
@@ -1537,7 +1610,26 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             self._pkgnames_loaded.update([po.name for po in returnList])
 
         return returnList
-                
+
+    def simplePkgList(self, patterns=None, ignore_case=False):
+        """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+        """
+        
+        # Where should I initialise the _excludedIds list??? Obviously here is not best...
+        if self._excludedIds is None:
+            self._excludedIds= self._excludedIdsQuery()
+
+        returnList=[]
+        q= "SELECT name, arch, epoch, version, release FROM packages "\
+           "WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)"
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute(q)
+            returnList.extend(cur.fetchall())
+        return [tuple(i) for i in returnList]
+            
+        
+
     def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
         """Returns a list of packages, only containing nevra information. The
            packages are processed for excludes. Note that patterns is just
@@ -1573,32 +1665,6 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
 
         return returnList
 
-    def simplePkgList(self, patterns=None, ignore_case=False):
-        """Returns a list of pkg tuples (n, a, e, v, r), optionally from a
-           single repoid. """
-
-        if self._skip_all():
-            return []
-
-        internal_pkgoblist = hasattr(self, 'pkgobjlist')
-        if internal_pkgoblist:
-            return yumRepo.YumPackageSack.simplePkgList(self, patterns,
-                                                        ignore_case)
-
-        repoid = None
-        returnList = []
-        # Haven't loaded everything, so _just_ get the pkgtups...
-        data = self._setupPkgObjList(repoid, patterns, ignore_case)
-        (need_full, patterns, fields, names) = data
-        for (repo, x) in self._yieldSQLDataList(repoid, patterns, fields,
-                                                ignore_case):
-            # NOTE: Can't unexclude things...
-            pkgtup = self._pkgtupByKeyData(repo, x['pkgKey'], x)
-            if pkgtup is None:
-                continue
-            returnList.append(pkgtup)
-        return returnList
-
     @catchSqliteException
     def searchNevra(self, name=None, epoch=None, ver=None, rel=None, arch=None):        
         """return list of pkgobjects matching the nevra requested"""


More information about the Yum mailing list