[yum-git] 2 commits - yum/__init__.py yummain.py yum/sqlitesack.py

Seth Vidal skvidal at linux.duke.edu
Tue Jan 22 14:15:38 UTC 2008


 yum/__init__.py   |   99 ++++++++++++++++++++++++------------------------------
 yum/sqlitesack.py |   44 ++++++++++++++++++++++++
 yummain.py        |    3 +
 3 files changed, 91 insertions(+), 55 deletions(-)

New commits:
commit ff06d9f3546852ad305d8c41adda62387f363d69
Merge: e009052... fb42ce2...
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Tue Jan 22 09:15:33 2008 -0500

    Merge branch 'master' of ssh://login.linux.duke.edu/home/groups/yum/git/yum
    
    * 'master' of ssh://login.linux.duke.edu/home/groups/yum/git/yum: (29 commits)
      Two more "install kernel" test cases
      Fix for InstallKernel operation test.
      make str(po) show epoch:name-ver-rel.arch
       This might be the wrong fix. This makes sure that __len__ always returns a
      Fix doc. comment, repomd.xml isn't usually gotten here
      Fix spacing
      Fix variable name
      Fix function and argument names, due to usage
      Don't re-download when we have uncompressed data
      Remove updated/obsoleted txmbr when removing deps during skip-broken
      Add requiringPO as dep when updating for dependency (needed for skip-broken)
      Some more skip-broken tests
      Make the depsolver debug messages show full package name, instead of only name, to make it easier to track down problems
      make the depsolver errors show the full package names & repo
      make str(po) return foo-1.0-1.fc8.noarch, insted of foo - 1.0-1.fc8.noarch
      Docs for mdpolicy
      show releated errmsg when printing packages with problems, it dont make much sense, to write that foo has a depsolve problem with telling what the problem is :)'
      Grouped the output of skipped packages, when using --skip-broken'
      skip "skipping reposetup" debug message
      skip-broken: next iteration
      ...

commit e009052d5b4ed340572341a2fcd691c24672322f
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Tue Jan 22 09:14:57 2008 -0500

    - fix search lockup and inability to ctrl-c it
    - make search much faster by much-enhanced sql query
    - clean up unicode errors when piping to a file.

diff --git a/yum/__init__.py b/yum/__init__.py
index 8489562..efc3e8c 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -29,6 +29,8 @@ import glob
 import fnmatch
 import logging
 import logging.config
+import operator
+
 try:
     from iniparse.compat import ParsingError, ConfigParser
 except ImportError:
@@ -1349,77 +1351,64 @@ class YumBase(depsolve.Depsolve):
             else:
                 sql_fields.append(f)
 
-        scores = {}
-        my_sets = {}
         matched_values = {}
 
-        def __sortbyVal(x, y):
-            (k, v) = x
-            (k2, v2) = y
-            if v > v2:
-                return 1
-            if v < v2:
-                return -1
-            if v == v2:
-                return 0
-        
-        # go through each item in the criteria list
-        # figure out if it matches and what it matches
-        # tally up the scores for the pkgs
         # yield the results in order of most terms matched first
-        
+        sorted_lists = {}
+        tmpres = []
+        real_crit = []
         for s in criteria:
-            narrowed_list = []
-            my_sets[s] = []
-            if s.find('%') != -1:
-                continue
-            
-            for sack in self.pkgSack.sacks.values():
-                narrowed_list.extend(sack.searchPrimaryFields(sql_fields, s))
-        
-            for po in narrowed_list:
-                tmpvalues = []
+            if s.find('%') == -1:
+                real_crit.append(s)
+
+        for sack in self.pkgSack.sacks.values():
+            tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, real_crit))
+
+        for (po, count) in tmpres:
+            # check the pkg for sanity
+            # pop it into the sorted lists
+            tmpvalues = []
+            if count not in sorted_lists: sorted_lists[count] = []
+            for s in real_crit:
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-                    
-            for po in self.rpmdb:
-                tmpvalues = []
+            if len(tmpvalues) > 0:
+                sorted_lists[count].append((po, tmpvalues))
+
+            
+        
+        for po in self.rpmdb:
+            tmpvalues = []
+            criteria_matched = 0
+            for s in real_crit:
+                matched_s = False
                 for field in fields:
                     value = getattr(po, field)
                     if value and value.lower().find(s.lower()) != -1:
+                        if not matched_s:
+                            criteria_matched += 1
+                            matched_s = True
+                        
                         tmpvalues.append(value)
 
-                if len(tmpvalues) > 0:
-                    matched_values[po] = tmpvalues
-                    my_sets[s].append(po)
-        
-        for pkg in matched_values:
-            if scores.has_key(pkg):
-                continue
-            count = 0
-            
-            for this_set in my_sets.itervalues():
-                if pkg in this_set:
-                    count += 1
-            
-            scores[pkg] = count
 
-        i = scores.items()
-        i.sort(__sortbyVal)
-        i.reverse()
+            if len(tmpvalues) > 0:
+                if criteria_matched not in sorted_lists: sorted_lists[criteria_matched] = []
+                sorted_lists[criteria_matched].append((po, tmpvalues))
+                
+
+        # close our rpmdb connection so we can ctrl-c, kthxbai                    
+        self.closeRpmDB()
         
-        for (pkg,count) in i:
-            if matched_values.has_key(pkg):
-                yield (pkg, matched_values[pkg])
-            else:
-                print pkg
-            
+        yielded = {}
+        for val in reversed(sorted(sorted_lists)):
+            for (po, matched) in sorted(sorted_lists[val], key=operator.itemgetter(0)):
+                if (po.name, po.arch) not in yielded:
+                    yield (po, matched)
+                    yielded[(po.name, po.arch)] = 1
 
 
     def searchPackages(self, fields, criteria, callback=None):
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index d5e2b0a..6675d01 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -423,6 +423,50 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 if self._pkgKeyExcluded(rep, ob['pkgKey']):
                     continue
                 result.append(self._packageByKey(rep, ob['pkgKey']))
+        return result    
+
+    @catchSqliteException
+    def searchPrimaryFieldsMultipleStrings(self, fields, searchstrings):
+        """search arbitrary fields from the primarydb for a multiple strings
+           return packages, number of items it matched as a list of tuples"""
+           
+        result = [] # (pkg, num matches)
+        if len(fields) < 1:
+            return result
+        
+       
+        unionstring = "select pkgKey, SUM(cumul) AS total from ( "
+        endunionstring = ")GROUP BY pkgKey ORDER BY total DESC"
+                
+        #SELECT pkgkey, SUM(cumul) AS total FROM (SELECT pkgkey, 1 
+        #AS cumul FROM packages WHERE description LIKE '%foo%' UNION ... ) 
+        #GROUP BY pkgkey ORDER BY total DESC;
+        selects = []
+        
+        # select pkgKey, 1 AS cumul from packages where description 
+        # like '%devel%' or description like '%python%' or description like '%ssh%'
+#        for f in fields:
+#            basestring = "select pkgKey, 1 AS cumul from packages where %s like '%%%s%%' " % (f,searchstrings[0]) 
+#            for s in searchstrings[1:]:
+#                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+#            selects.append(basestring)
+            
+        for s in searchstrings:         
+            basestring="select pkgKey,1 AS cumul from packages where %s like '%%%s%%' " % (fields[0], s)
+            for f in fields[1:]:
+                basestring = "%s or %s like '%%%s%%' " % (basestring, f, s)
+            selects.append(basestring)
+        
+        totalstring = unionstring + " UNION ALL ".join(selects) + endunionstring
+#        print totalstring
+        
+        for (rep,cache) in self.primarydb.items():
+            cur = cache.cursor()
+            executeSQL(cur, totalstring)
+            for ob in cur:
+                if self._pkgKeyExcluded(rep, ob['pkgKey']):
+                    continue
+                result.append((self._packageByKey(rep, ob['pkgKey']), ob['total']))
         return result
         
     @catchSqliteException
diff --git a/yummain.py b/yummain.py
index e3eb396..13215d3 100755
--- a/yummain.py
+++ b/yummain.py
@@ -31,6 +31,9 @@ import cli
 
 def main(args):
     """This does all the real work"""
+    if not sys.stdout.isatty():
+        import codecs
+        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
 
     def exUserCancel():
         logger.critical('\n\nExiting on user cancel')



More information about the Yum-cvs-commits mailing list