[yum-cvs] yum/yum __init__.py,1.305,1.306 sqlitesack.py,1.82,1.83

Seth Vidal skvidal at linux.duke.edu
Mon Mar 5 03:51:41 UTC 2007


Update of /home/groups/yum/cvs/yum/yum
In directory login1.linux.duke.edu:/tmp/cvs-serv26689/yum

Modified Files:
	__init__.py sqlitesack.py 
Log Message:

clean up the 'yum provides' command by fixing searchALL, implementing
searchFiles() properly and by jbowes showing me how sqlite is stupid with
the ordering of the tables you're selecting from.


Index: __init__.py
===================================================================
RCS file: /home/groups/yum/cvs/yum/yum/__init__.py,v
retrieving revision 1.305
retrieving revision 1.306
diff -u -r1.305 -r1.306
--- __init__.py	4 Mar 2007 21:37:58 -0000	1.305
+++ __init__.py	5 Mar 2007 03:51:39 -0000	1.306
@@ -24,6 +24,7 @@
 import time
 import sre_constants
 import glob
+import fnmatch
 import logging
 import logging.config
 from ConfigParser import ParsingError, ConfigParser
@@ -1227,67 +1228,51 @@
     def searchPackageProvides(self, args, callback=None):
         
         matches = {}
-
-        # search pkgSack - fully populate the worthwhile metadata to search
-        # if it even vaguely matches
-        self.verbose_logger.log(logginglevels.DEBUG_1,
-            'fully populating the necessary data')
-        for arg in args:
-            matched = False
-            globs = ['.*bin\/.*', '.*\/etc\/.*', '^\/usr\/lib\/sendmail$']
-            for glob in globs:
-                globc = re.compile(glob)
-                if globc.match(arg):
-                    matched = True
-            if not matched:
-                self.doSackFilelistPopulate()
-
         for arg in args:
-            # assume we have to search every package, unless we can refine the search set
-            where = self.pkgSack
-            
-            # this is annoying. If the user doesn't use any glob or regex-like
-            # or regexes then we can use the where 'like' search in sqlite
-            # if they do use globs or regexes then we can't b/c the string
-            # will no longer have much meaning to use it for matches
-            
-            if hasattr(self.pkgSack, 'searchAll'):
-                if not re.match('.*[\*,\[,\],\{,\},\?,\+,\%].*', arg):
-                    self.verbose_logger.log(logginglevels.DEBUG_1,
-                        'Using the like search')
-                    where = self.pkgSack.searchAll(arg, query_type='like')
-            
+            if not re.match('.*[\*\?\[\]].*', arg):
+                isglob = False
+                if arg[0] != '/':
+                    canBeFile = False
+                else:
+                    canBeFile = True
+            else:
+                isglob = True
+                canBeFile = True
+                
+            if not isglob:
+                usedDepString = True
+                where = self.returnPackagesByDep(arg)
+            else:
+                usedDepString = False
+                print time.time()
+                where = self.pkgSack.searchAll(arg, False)
+                print time.time()
             self.verbose_logger.log(logginglevels.DEBUG_1,
                 'Searching %d packages', len(where))
-            self.verbose_logger.log(logginglevels.DEBUG_1,
-                'refining the search expression of %s', arg) 
-            restring = misc.refineSearchPattern(arg)
-            self.verbose_logger.log(logginglevels.DEBUG_1,
-                'refined search: %s', restring)
-            try: 
-                arg_re = re.compile(restring, flags=re.I)
-            except sre_constants.error, e:
-                raise Errors.MiscError, \
-                  'Search Expression: %s is an invalid Regular Expression.\n' % arg
-
+            
             for po in where:
                 self.verbose_logger.log(logginglevels.DEBUG_2,
                     'searching package %s', po)
                 tmpvalues = []
                 
-                self.verbose_logger.log(logginglevels.DEBUG_2,
-                    'searching in file entries')
-                for filetype in po.returnFileTypes():
-                    for fn in po.returnFileEntries(ftype=filetype):
-                        if arg_re.search(fn):
-                            tmpvalues.append(fn)
+                
+                if usedDepString:
+                    tmpvalues.append(arg)
+
+                if isglob or canBeFile:
+                    self.verbose_logger.log(logginglevels.DEBUG_2,
+                        'searching in file entries')
+                    for thisfile in po.dirlist + po.filelist + po.ghostlist:
+                        if fnmatch.fnmatch(thisfile, arg):
+                            tmpvalues.append(thisfile)
                 
                 self.verbose_logger.log(logginglevels.DEBUG_2,
                     'searching in provides entries')
                 for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides:
-                    if arg_re.search(p_name):
-                        prov = misc.prco_tuple_to_string((p_name, p_flag, (p_e, p_v, p_r)))
-                        tmpvalues.append(prov)
+                    prov = misc.prco_tuple_to_string((p_name, p_flag, (p_e, p_v, p_r)))
+                    if not usedDepString:
+                        if fnmatch.fnmatch(p_name, arg) or fnmatch.fnmatch(prov, arg):
+                            tmpvalues.append(prov)
 
                 if len(tmpvalues) > 0:
                     if callback:
@@ -1296,38 +1281,54 @@
         
         # installed rpms, too
         taglist = ['filelist', 'dirnames', 'provides_names']
-        arg_re = []
         for arg in args:
-            restring = misc.refineSearchPattern(arg)
-
-            try: reg = re.compile(restring, flags=re.I)
-            except sre_constants.error, e:
-                raise Errors.MiscError, \
-                 'Search Expression: %s is an invalid Regular Expression.\n' % arg
+            if not re.match('.*[\*\?\[\]].*', arg):
+                isglob = False
+                if arg[0] != '/':
+                    canBeFile = False
+                else:
+                    canBeFile = True
+            else:
+                isglob = True
+                canBeFile = True
             
-
-            for po in self.rpmdb:
-                tmpvalues = []
-                searchlist = []
-                for tag in taglist:
-                    tagdata = getattr(po, tag)
-                    if tagdata is None:
-                        continue
-                    if type(tagdata) is types.ListType:
-                        searchlist.extend(tagdata)
-                    else:
-                        searchlist.append(tagdata)
-                
-                for item in searchlist:
-                    if reg.search(item):
-                        tmpvalues.append(item)
-    
-                del searchlist
-    
-                if len(tmpvalues) > 0:
-                    if callback:
-                        callback(po, tmpvalues)
-                    matches[po] = tmpvalues
+            if not isglob:
+                where = self.returnInstalledPackagesByDep(arg)
+                usedDepString = True
+                for po in where:
+                    tmpvalues = []
+                    msg = 'Provides-match: %s' % arg
+                    tmpvalues.append(msg)
+
+                    if len(tmpvalues) > 0:
+                        if callback:
+                            callback(po, tmpvalues)
+                        matches[po] = tmpvalues
+
+            else:
+                usedDepString = False
+                where = self.rpmdb
+                
+                for po in where:
+                    searchlist = []
+                    tmpvalues = []
+                    for tag in taglist:
+                        tagdata = getattr(po, tag)
+                        if tagdata is None:
+                            continue
+                        if type(tagdata) is types.ListType:
+                            searchlist.extend(tagdata)
+                        else:
+                            searchlist.append(tagdata)
+                    
+                    for item in searchlist:
+                        if fnmatch.fnmatch(item, arg):
+                            tmpvalues.append(item)
+                
+                    if len(tmpvalues) > 0:
+                        if callback:
+                            callback(po, tmpvalues)
+                        matches[po] = tmpvalues
             
             
         return matches

Index: sqlitesack.py
===================================================================
RCS file: /home/groups/yum/cvs/yum/yum/sqlitesack.py,v
retrieving revision 1.82
retrieving revision 1.83
diff -u -r1.82 -r1.83
--- sqlitesack.py	4 Mar 2007 21:34:10 -0000	1.82
+++ sqlitesack.py	5 Mar 2007 03:51:39 -0000	1.83
@@ -262,72 +262,28 @@
     # Search packages that either provide something containing name
     # or provide a file containing name 
     def searchAll(self,name, query_type='like'):
-    
-        # This should never be called with a name containing a %
-        assert(name.find('%') == -1)
-        result = []
-        for (rep,cache) in self.primarydb.items():
-            cur = cache.cursor()
-            executeSQL(cur, "select DISTINCT packages.pkgId as pkgId from provides,packages where provides.name LIKE ? AND provides.pkgKey = packages.pkgKey", ("%%%s%%" % name,))
-            for ob in cur:
-                if self._excluded(rep, ob['pkgId']):
-                    continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                result.append((self.pc(rep,pkg)))
+        # this function is just silly and it reduces down to just this
+        return self.searchPrco(name, 'provides')
 
-            cur = cache.cursor()
-            executeSQL(cur, "select DISTINCT packages.pkgId as pkgId from files,packages where files.name LIKE ? and files.pkgKey = packages.pkgKey", ("%%%s%%" % name,))
-            for ob in cur.fetchall():
-                if self._excluded(rep,ob['pkgId']):
-                    continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                result.append(self.pc(rep,pkg))
-
-        for (rep,cache) in self.filelistsdb.items():
-            cur = cache.cursor()
-            (dirname,filename) = os.path.split(name)
-            # This query means:
-            # Either name is a substring of dirname or the directory part
-            # in name is a substring of dirname and the file part is part
-            # of filelist
-            executeSQL(cur, "select packages.pkgId as pkgId,\
-                filelist.dirname as dirname,\
-                filelist.filetypes as filetypes,\
-                filelist.filenames as filenames \
-                from packages,filelist where \
-                (filelist.dirname LIKE ? \
-                OR (filelist.dirname LIKE ? AND\
-                filelist.filenames LIKE ?))\
-                AND (filelist.pkgKey = packages.pkgKey)",
-                ("%%%s%%" % name, "%%%s%%" % dirname, "%%%s%%" % filename))
-
-            # cull the results for false positives
-            for ob in cur:
-                # Check if it is an actual match
-                # The query above can give false positives, when
-                # a package provides /foo/aaabar it will also match /foo/bar
-                if self._excluded(rep, ob['pkgId']):
-                    continue
-                real = False
-                for filename in decodefilenamelist(ob['filenames']):
-                    if (ob['dirname']+'/'+filename).find(name) != -1:
-                        real = True
-                if (not real):
-                    continue
-                pkg = self.getPackageDetails(ob['pkgId'])
-                result.append((self.pc(rep,pkg)))
-
-        result = misc.unique(result)
-        return result
 
     def searchFiles(self, name):
         """search primary if file will be in there, if not, search filelists, use globs, if possible"""
         
+        # optimizations:
+        # if it is not  glob, then see if it is in the primary.xml filelists, 
+        # if so, just use those for the lookup
+        
         glob = True
         if not re.match('.*[\*\?\[\]].*', name):
             glob = False
         
         pkgs = {}
+        if len(self.filelistsdb.keys()) == 0:
+            # grab repo object from primarydb and force filelists population in this sack using repo
+            # sack.populate(repo, mdtype, callback, cacheonly)
+            for (repo,cache) in self.primarydb.items():
+                self.populate(repo, mdtype='filelists')
+
         for (rep,cache) in self.filelistsdb.items():
             cur = cache.cursor()
 
@@ -455,11 +411,18 @@
     
     def searchPrco(self, name, prcotype):
         """return list of packages having prcotype name (any evr and flag)"""
-        
+        glob = True
+        if not re.match('.*[\*\?\[\]].*', name):
+            glob = False
+
         results = []
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select packages.* from packages,%s where %s.name =? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,prcotype), (name,))
+            if glob:
+                executeSQL(cur, "select packages.* from %s,packages where %s.name  glob ? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,prcotype), (name,))
+            else:
+                executeSQL(cur, "select packages.* from %s,packages where %s.name =? and %s.pkgKey=packages.pkgKey" % (prcotype,prcotype,prcotype), (name,))
+        
             for x in cur:
                 if self._excluded(rep, x['pkgId']):
                     continue
@@ -467,74 +430,82 @@
         
         # If it's not a provides or a filename, we are done
         if prcotype != "provides" or name[0] != '/':
-            return results
-
+            if not glob:
+                return results
 
         # If it is a filename, search the primary.xml file info
         for (rep,cache) in self.primarydb.items():
             cur = cache.cursor()
-            executeSQL(cur, "select packages.* from files,packages where files.name = ? and files.pkgKey = packages.pkgKey" , (name,))
+            if glob:
+                executeSQL(cur, "select packages.* from files,packages where files.name glob ? and files.pkgKey = packages.pkgKey" , (name,))
+            else:
+                executeSQL(cur, "select packages.* from files,packages where files.name = ? and files.pkgKey = packages.pkgKey" , (name,))    
+                
             for x in cur:
                 if self._excluded(rep,x['pkgId']):
                     continue
                 results.append(self.pc(rep,x))
-
+        
         matched = 0
         globs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
-        for glob in globs:
-            globc = re.compile(glob)
+        for thisglob in globs:
+            globc = re.compile(thisglob)
             if globc.match(name):
                 matched = 1
 
-        if matched: # if its in the primary.xml files then skip the other check
+        if matched and not glob: # if its in the primary.xml files then skip the other check
             return results
 
-        #FIXME - sort this all out.
         # If it is a filename, search the files.xml file info
-        for (rep,cache) in self.filelistsdb.items():
-            cur = cache.cursor()
-            (dirname,filename) = os.path.split(name)
-            # FIXME: why doesn't this work???
-            if 0: # name.find('%') == -1: # no %'s in the thing safe to LIKE
-                executeSQL(cur, "select packages.pkgId as pkgId,\
-                    filelist.dirname as dirname,\
-                    filelist.filetypes as filetypes,\
-                    filelist.filenames as filenames \
-                    from packages,filelist where \
-                    (filelist.dirname LIKE ? \
-                    OR (filelist.dirname LIKE ? AND\
-                    filelist.filenames LIKE ?))\
-                    AND (filelist.pkgKey = packages.pkgKey)", (name,dirname,filename))
-            else: 
-                executeSQL(cur, "select packages.pkgId as pkgId,\
-                    filelist.dirname as dirname,\
-                    filelist.filetypes as filetypes,\
-                    filelist.filenames as filenames \
-                    from filelist,packages where dirname = ? AND filelist.pkgKey = packages.pkgKey" , (dirname,))
-
-            matching_ids = []
-            for res in cur:
-                if self._excluded(rep, res['pkgId']):
-                    continue
+        results.extend(self.searchFiles(name))
+        return results
+        
+        
+        #~ #FIXME - comment this all out below here
+        #~ for (rep,cache) in self.filelistsdb.items():
+            #~ cur = cache.cursor()
+            #~ (dirname,filename) = os.path.split(name)
+            #~ # FIXME: why doesn't this work???
+            #~ if 0: # name.find('%') == -1: # no %'s in the thing safe to LIKE
+                #~ executeSQL(cur, "select packages.pkgId as pkgId,\
+                    #~ filelist.dirname as dirname,\
+                    #~ filelist.filetypes as filetypes,\
+                    #~ filelist.filenames as filenames \
+                    #~ from packages,filelist where \
+                    #~ (filelist.dirname LIKE ? \
+                    #~ OR (filelist.dirname LIKE ? AND\
+                    #~ filelist.filenames LIKE ?))\
+                    #~ AND (filelist.pkgKey = packages.pkgKey)", (name,dirname,filename))
+            #~ else: 
+                #~ executeSQL(cur, "select packages.pkgId as pkgId,\
+                    #~ filelist.dirname as dirname,\
+                    #~ filelist.filetypes as filetypes,\
+                    #~ filelist.filenames as filenames \
+                    #~ from filelist,packages where dirname = ? AND filelist.pkgKey = packages.pkgKey" , (dirname,))
+
+            #~ matching_ids = []
+            #~ for res in cur:
+                #~ if self._excluded(rep, res['pkgId']):
+                    #~ continue
                 
-                #FIXME - optimize the look up here by checking for single-entry filenames
-                quicklookup = {}
-                for fn in decodefilenamelist(res['filenames']):
-                    quicklookup[fn] = 1
+                #~ #FIXME - optimize the look up here by checking for single-entry filenames
+                #~ quicklookup = {}
+                #~ for fn in decodefilenamelist(res['filenames']):
+                    #~ quicklookup[fn] = 1
                 
-                # If it matches the dirname, that doesnt mean it matches
-                # the filename, check if it does
-                if filename and not quicklookup.has_key(filename):
-                    continue
+                #~ # If it matches the dirname, that doesnt mean it matches
+                #~ # the filename, check if it does
+                #~ if filename and not quicklookup.has_key(filename):
+                    #~ continue
                 
-                matching_ids.append(str(res['pkgId']))
+                #~ matching_ids.append(str(res['pkgId']))
                 
             
-            pkgs = self._getListofPackageDetails(matching_ids)
-            for pkg in pkgs:
-                results.append(self.pc(rep,pkg))
+            #~ pkgs = self._getListofPackageDetails(matching_ids)
+            #~ for pkg in pkgs:
+                #~ results.append(self.pc(rep,pkg))
         
-        return results
+        #~ return results
 
     def searchProvides(self, name):
         """return list of packages providing name (any evr and flag)"""




More information about the Yum-cvs-commits mailing list