[yum-cvs] yum/yum mdparser.py, NONE, 1.1 sqlitecache.py, NONE, 1.1 sqlitesack.py, NONE, 1.1 __init__.py, 1.84, 1.85 repos.py, 1.65, 1.66

Gijs Hollestelle gijs at login.linux.duke.edu
Fri Feb 25 19:30:25 UTC 2005


Update of /home/groups/yum/cvs/yum/yum
In directory login:/tmp/cvs-serv15277/yum

Modified Files:
	__init__.py repos.py 
Added Files:
	mdparser.py sqlitecache.py sqlitesack.py 
Log Message:
Adding the sqlite stuff to yum HEAD.
Note that currently package excludes are non-functional and there are a number
of outstanding TODOs.



--- NEW FILE mdparser.py ---
import libxml2
try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

#TODO: document everything here

class MDParser:

    def __init__(self, filename):

        # Set up mapping of meta types to handler classes 
        handlers = {
            'metadata': PrimaryEntry,
            'filelists': FilelistsEntry,
            'otherdata': OtherEntry,
        }
            
        self.reader = libxml2.newTextReaderFilename(filename)
        self.total = None
        self.count = 0
        self._handlercls = None

        # Read in type, set package node handler and get total number of
        # packages
        while self.reader.Read():
            if self.reader.NodeType() != 1:
                continue

            # Read the metadata type and determine handler class
            metadatatype = self.reader.LocalName()
            self._handlercls = handlers.get(metadatatype, None)

            if not self._handlercls:
                raise ValueError('Unknown repodata type "%s" in %s' % (
                    metadatatype, filename))
           
            # Get the total number of packages
            try:
                self.total = int(self.reader.GetAttribute('packages'))
            except ValueError: 
                pass
        
            break

        # Handle broken input
        if not self._handlercls:
            raise ValueError('no valid repository metadata found in %s' % (
                filename))

    def __iter__(self):
        return self

    def next(self):
        while self.reader.Read():

            name = self.reader.LocalName()
            if name != 'package':
                continue

            self.count += 1
            return self._handlercls(self.reader)

        raise StopIteration


class BaseEntry:
    def __init__(self, reader):
        self._p = {} 

    def __getitem__(self, k):
        '''Access entry data. 

        Returns None by default to avoid having to set defaults for all
        attributes.
        '''
        if self._p.has_key(k):
            return self._p[k]
        return None

    def keys(self):
        return self._p.keys()

    def values(self):
        return self._p.values()

    def has_key(self, k):
        return self._p.has_key(k)

    def __str__(self):
        out = StringIO()
        keys = self.keys()
        keys.sort()
        for k in keys:
            out.write('%s=%s\n' % (k, self[k]))
        return out.getvalue()

    def _props(self, reader, keyprefix=''):
        if not reader.HasAttributes(): return {}
        propdict = {}
        reader.MoveToFirstAttribute()
        while 1:
            propdict[keyprefix+reader.LocalName()] = reader.Value()
            if not reader.MoveToNextAttribute(): break
        reader.MoveToElement()
        return propdict
        
    def _value(self, reader):
        if reader.IsEmptyElement(): return ''
        val = ''
        while reader.Read():
            if reader.NodeType() == 3: val += reader.Value()
            else: break
        return val

    def _propswithvalue(self, reader, keyprefix=''):
        out = self._props(reader, keyprefix)
        out[keyprefix+'value'] = self._value(reader)
        return out

    def _getFileEntry(self, reader):
        type = 'file'
        props = self._props(reader)
        if props.has_key('type'): type = props['type']
        value = self._value(reader)
        return (type, value)

class PrimaryEntry(BaseEntry):
    def __init__(self, reader):

        BaseEntry.__init__(self, reader)

        # Avoid excess typing :)
        p = self._p

        self.prco = {}
        self.files = {}

        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()

            if name in ('name', 'arch', 'summary', 'description', 'url', 
                    'packager'): 
                p[name] = self._value(reader)

            elif name == 'version': 
                p.update(self._props(reader))

            elif name in ('time', 'size'):
                p.update(self._props(reader, name+'_'))

            elif name in ('checksum', 'location'): 
                p.update(self._propswithvalue(reader, name+'_'))
            
            elif name == 'format': 
                self.setFormat(reader)

        p['pkgid'] = p['checksum_value']

    def setFormat(self, reader):

        # Avoid excessive typing :)
        p = self._p

        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'format':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()

            if name in ('license', 'vendor', 'group', 'buildhost',
                        'sourcerpm'):
                p[name] = self._value(reader)

            elif name in ('provides', 'requires', 'conflicts', 
                          'obsoletes'):
                self.setPrco(reader)

            elif name == 'header-range':
                p.update(self._props(reader, 'rpm_header_'))

            elif name == 'file':
                (type, value) = self._getFileEntry(reader)
                self.files[value] = type

    def setPrco(self, reader):
        members = []
        myname = reader.LocalName()
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == myname:
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            members.append(self._props(reader))
        self.prco[myname] = members
        
        
class FilelistsEntry(BaseEntry):
    def __init__(self, reader):
        BaseEntry.__init__(self, reader)
        self._p['pkgid'] = reader.GetAttribute('pkgid')
        self.files = {}

        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name == 'file':
                (type, value) = self._getFileEntry(reader)
                self.files[value] = type
                
class OtherEntry(BaseEntry):
    def __init__(self, reader):
        BaseEntry.__init__(self, reader)

        self._p['pkgid'] = reader.GetAttribute('pkgid')
        self._p['changelog'] = []
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name == 'changelog':
                entry = self._props(reader)
                entry['value'] = self._value(reader)
                self._p['changelog'].append(entry)




def test():
    import sys

    parser = MDParser(sys.argv[1])

    for pkg in parser:
        print '-' * 40
        print pkg
        pass

    print parser.total
    print parser.count

if __name__ == '__main__':
    test()

--- NEW FILE sqlitecache.py ---
#!/usr/bin/python -tt
#
# TODO
# - Add support for multiple checksums per rpm (is this required??)
# - Store filetypes as one char per type instead of a string
# - display the name of the repository when caching metadata
# - don't use print for output, use yum's output functions instead
# - don't use | as a seperator for files, use / or to be cleaner (but
#   slower) use cpickle.dumps to store an array of filenames in a string

import os
import sys
import mdcache
import sqlite
import time
import mdparser

# This version refers to the internal structure of the sqlite cache files
# increasing this number forces all caches of a lower version number
# to be re-generated
dbversion = '4'

# TODO
# We probably don't really need to subclass RepodataParser anymore, just use
# the same interface. sqlitecache.py and mdparser.py combined replaces all the
# functionality from mdcache.py. We should aim for removal of mdcache.py.
class RepodataParserSqlite(mdcache.RepodataParser):
    
    def loadCache(self,filename):
        """Load cache from filename, check if it is valid and that dbversion 
        matches the required dbversion"""
        db = sqlite.connect(filename)
        cur = db.cursor()
        cur.execute("select * from db_info")
        info = cur.fetchone()
        # If info is not in there this is an incompelete cache file
        # (this could happen when the user hits ctrl-c or kills yum
        # when the cache is being generated or updated)
        if (not info):
            raise sqlite.DatabaseError, "Incomplete database cache file"

        # Now check the database version
        if (info['dbversion'] != dbversion):
            print "Warning your cache file is version %s, we need %s" % (info['dbversion'],dbversion)
            raise sqlite.DatabaseError, "Older version of yum sqlite"
        # This appears to be a valid database, return checksum value and 
        # database object
        return (info['checksum'],db)
        
    def getFilename(self,location):
        return location + '.sqlite'
            
    def getDatabase(self, location, cachetype):
        filename = self.getFilename(location)
        dbchecksum = ''
        # First try to open an existing database
        try:
            f = open(filename)
            (dbchecksum,db) = self.loadCache(filename)
        except (IOError,sqlite.DatabaseError,KeyError):
            # If it doesn't exist, create it
            db = self.makeSqliteCacheFile(filename,cachetype)
        return (db,dbchecksum)

    def _getbase(self, location, checksum, metadatatype):
        (db, dbchecksum) = self.getDatabase(location, metadatatype)
        # db should now contain a valid database object, check if it is
        # up to date
        if (checksum != dbchecksum):

            print "%s sqlite cache needs updating, reading in metadata" % (
                    metadatatype)
            parser = mdparser.MDParser(location)
            self.updateSqliteCache(db, parser, checksum, metadatatype)
        db.commit()
        return db

    def getPrimary(self, location, checksum):
        """Load primary.xml.gz from an sqlite cache and update it 
           if required"""
        return self._getbase(location, checksum, 'primary')

    def getFilelists(self, location, checksum):
        """Load filelist.xml.gz from an sqlite cache and update it if 
           required"""
        return self._getbase(location, checksum, 'filelists')

    def getOtherdata(self, location, checksum):
        """Load other.xml.gz from an sqlite cache and update it if required"""
        return self._getbase(location, checksum, 'other')
         
    def createTablesFilelists(self,db):
        """Create the required tables for filelists metadata in the sqlite 
           database"""
        cur = db.cursor()
        self.createDbInfo(cur)
        # This table is needed to match pkgKeys to pkgIds
        cur.execute("""CREATE TABLE packages(
            pkgKey INTEGER PRIMARY KEY,
            pkgId TEXT)
        """)
        cur.execute("""CREATE TABLE filelist(
            pkgKey INTEGER,
            dirname TEXT,
            filenames TEXT,
            filetypes TEXT)
        """)
        cur.execute("CREATE INDEX keyfile ON filelist (pkgKey)")
        cur.execute("CREATE INDEX pkgId ON packages (pkgId)")
    
    def createTablesOther(self,db):
        """Create the required tables for other.xml.gz metadata in the sqlite 
           database"""
        cur = db.cursor()
        self.createDbInfo(cur)
        # This table is needed to match pkgKeys to pkgIds
        cur.execute("""CREATE TABLE packages(
            pkgKey INTEGER PRIMARY KEY,
            pkgId TEXT)
        """)
        cur.execute("""CREATE TABLE changelog(
            pkgKey INTEGER,
            author TEXT,
            date TEXT,
            changelog TEXT)
        """)
        cur.execute("CREATE INDEX keychange ON changelog (pkgKey)")
        cur.execute("CREATE INDEX pkgId ON packages (pkgId)")
        
    def createTablesPrimary(self,db):
        """Create the required tables for primary metadata in the sqlite 
           database"""

        cur = db.cursor()
        self.createDbInfo(cur)
        # The packages table contains most of the information in primary.xml.gz

        q = 'CREATE TABLE packages(\n' \
            'pkgKey INTEGER PRIMARY KEY,\n'

        cols = []
        for col in PackageToDBAdapter.COLUMNS:
            cols.append('%s TEXT' % col)
        q += ',\n'.join(cols) + ')'

        cur.execute(q)

        # Create requires, provides, conflicts and obsoletes tables
        # to store prco data
        for t in ('requires','provides','conflicts','obsoletes'):
            cur.execute("""CREATE TABLE %s (
              name TEXT,
              flags TEXT,
              epoch TEXT,
              version TEXT,
              release TEXT,
              pkgKey TEXT)
            """ % (t))
        # Create the files table to hold all the file information
        cur.execute("""CREATE TABLE files (
            name TEXT,
            type TEXT,
            pkgKey TEXT)
        """)
        # Create indexes for faster searching
        cur.execute("CREATE INDEX packagename ON packages (name)")
        cur.execute("CREATE INDEX providesname ON provides (name)")
        cur.execute("CREATE INDEX packageId ON packages (pkgId)")
        db.commit()
    
    def createDbInfo(self,cur):
        # Create the db_info table, this contains sqlite cache metadata
        cur.execute("""CREATE TABLE db_info (
            dbversion TEXT,
            checksum TEXT)
        """)

    def insertHash(self,table,hash,cursor):
        """Insert the key value pairs in hash into a database table"""

        keys = hash.keys()
        values = hash.values()
        query = "INSERT INTO %s (" % (table)
        query += ",".join(keys)
        query += ") VALUES ("
        # Quote all values by replacing None with NULL and ' by ''
        for x in values:
            if (x == None):
              query += "NULL,"
            else:
              query += "'%s'," % (str(x).replace("'","''"))
        # Remove the last , from query
        query = query[:-1]
        # And replace it with )
        query += ")"
        cursor.execute(query)
        return cursor.lastrowid
             
    def makeSqliteCacheFile(self, filename, cachetype):
        """Create an initial database in the given filename"""

        # If it exists, remove it as we were asked to create a new one
        if (os.path.exists(filename)):
            print "Warning file already exists, removing old version"
            os.unlink(filename)

        # Try to create the databse in filename, or use in memory when
        # this fails
        # TODO in memory sqlite probably is not worth the overhead of creating
        # the database, maybe we should fall back to using old skool pickle
        # caches if this fails, i.e. raise some exception that yum
        # will catch and will cause it to fall back to non-sqlite for
        # this repo
        try:
            f = open(filename,'w')
            db = sqlite.connect(filename) 
        except IOError:
            print "Warning could not create sqlite cache file, using in memory cache instead"
            db = sqlite.connect(":memory:")

        # The file has been created, now create the tables and indexes
        if (cachetype == 'primary'):
            self.createTablesPrimary(db)
        elif (cachetype == 'filelists'):
            self.createTablesFilelists(db)
        elif (cachetype == 'other'):
            self.createTablesOther(db)
        else:
            raise sqlite.DatabaseError, "Sorry don't know how to store %s" % (cachetype)
        return db

    def addPrimary(self, pkgId, package, cur):
        """Add a package to the primary cache"""
        # Store the package info into the packages table
        pkgKey = self.insertHash('packages', PackageToDBAdapter(package), cur)

        # Now store all prco data
        for ptype in package.prco:
            for entry in package.prco[ptype]:
                data = {
                    'pkgKey': pkgKey,
                    'name': entry.get('name'),
                    'flags': entry.get('flags'),
                    'epoch': entry.get('epoch'),
                    'version': entry.get('ver'),
                    'release': entry.get('rel'),
                }
                self.insertHash(ptype,data,cur)
        
        # Now store all file information
        for f in package.files:
            data = {
                'name': f,
                'type': package.files[f],
                'pkgKey': pkgKey,
            }
            self.insertHash('files',data,cur)

    def addFilelists(self, pkgId, package,cur):
        """Add a package to the filelists cache"""
        sep = '|'
        pkginfo = {'pkgId': pkgId}
        pkgKey = self.insertHash('packages',pkginfo, cur)
        dirs = {}
        for (filename,filetype) in package.files.iteritems():
            (dirname,filename) = (os.path.split(filename))
            if (dirs.has_key(dirname)):
                dirs[dirname]['files'] += filename+'|'
                dirs[dirname]['types'] += filetype+'|'
            else:
                dirs[dirname] = {}
                dirs[dirname]['files'] = '|'+filename+'|'
                dirs[dirname]['types'] = '|'+filetype+'|'

        for (dirname,dir) in dirs.items():
            data = {
                'pkgKey': pkgKey,
                'dirname': dirname,
                'filenames': dir['files'],
                'filetypes': dir['types']
            }
            self.insertHash('filelist',data,cur)

    def addOther(self, pkgId, package,cur):
        pkginfo = {'pkgId': pkgId}
        pkgKey = self.insertHash('packages', pkginfo, cur)
        for entry in package['changelog']:
            data = {
                'pkgKey': pkgKey,
                'author': entry.get('author'),
                'date': entry.get('date'),
                'changelog': entry.get('value'),
            }
            self.insertHash('changelog', data, cur)

    def updateSqliteCache(self, db, parser, checksum, cachetype):
        """Update the sqlite cache by making it fit the packages described
        in dobj (info that has been read from primary.xml metadata) afterwards
        update the checksum of the database to checksum"""
       
        t = time.time()
        delcount = 0
        newcount = 0

        # We start be removing the old db_info, as it is no longer valid
        cur = db.cursor()
        cur.execute("DELETE FROM db_info") 

        # First create a list of all pkgIds that are in the database now
        cur.execute("SELECT pkgId, pkgKey from packages")
        currentpkgs = {}
        for pkg in cur.fetchall():
            currentpkgs[pkg['pkgId']] = pkg['pkgKey']

        if (cachetype == 'primary'):
            deltables = ("packages","files","provides","requires", 
                        "conflicts","obsoletes")
        elif (cachetype == 'filelists'):
            deltables = ("packages","filelist")
        elif (cachetype == 'other'):
            deltables = ("packages","changelog")
        else:
            raise sqlite.DatabaseError,"Unknown type %s" % (cachetype)
        
        # Add packages that are not in the database yet and build up a list of
        # all pkgids in the current metadata

        all_pkgIds = {}
        for package in parser:

            if self.callback is not None:
                self.callback(parser.count, parser.total, "Caching")

            pkgId = package['pkgId']
            all_pkgIds[pkgId] = 1

            # This package is already in the database, skip it now
            if (currentpkgs.has_key(pkgId)):
                continue

            # This is a new package, lets insert it
            newcount += 1
            if cachetype == 'primary':
                self.addPrimary(pkgId, package, cur)
            elif cachetype == 'filelists':
                self.addFilelists(pkgId, package, cur)
            elif cachetype == 'other':
                self.addOther(pkgId, package, cur)

        # Remove those which are not in dobj
        delpkgs = []
        for (pkgId, pkgKey) in currentpkgs.items():
            if not all_pkgIds.has_key(pkgId):
                delcount += 1
                delpkgs.append(str(pkgKey))
        delpkgs = "("+",".join(delpkgs)+")"
        for table in deltables:
            cur.execute("DELETE FROM "+table+ " where pkgKey in %s" % delpkgs)

        cur.execute("INSERT into db_info (dbversion,checksum) VALUES (%s,%s)",
                (dbversion,checksum))
        db.commit()
        print "Added %s new packages, deleted %s old in %.2f seconds" % (
                newcount, delcount, time.time()-t)
        return db


class PackageToDBAdapter:

    '''
    Adapt a PrimaryEntry instance to suit the sqlite database. 

    This hides certain attributes and converts some column names in order to
    decouple the parser implementation from the sqlite database schema.
    '''

    NAME_MAPS = {
        'pkgId': 'pkgid',
        'rpm_package': 'package',
        'version': 'ver',
        'release': 'rel',
        'rpm_license': 'license',
        'rpm_vendor': 'vendor',
        'rpm_group': 'group',
        'rpm_buildhost': 'buildhost',
        'rpm_sourcerpm': 'sourcerpm',
        }
    
    
    COLUMNS = (
            'pkgId',
            'name',
            'arch',
            'version',
            'epoch',
            'release',
            'summary',
            'description',
            'url',
            'time_file',
            'time_build',
            'rpm_license',
            'rpm_vendor',
            'rpm_group',
            'rpm_buildhost',
            'rpm_sourcerpm',
            'rpm_header_start',
            'rpm_header_end',
            'rpm_packager',
            'size_package',
            'size_installed',
            'size_archive',
            'location_href',
            'checksum_type',
            'checksum_value',
            )

    def __init__(self, package):
        self._pkg = package

    def __getitem__(self, k):
        return self._pkg[self.NAME_MAPS.get(k, k)]

    def keys(self):
        return self.COLUMNS

    def values(self):
        out = []
        for k in self.keys():
            out.append(self[k])
        return out


--- NEW FILE sqlitesack.py ---
#!/usr/bin/python -tt
#
# Implementation of the YumPackageSack class that uses an sqlite backend
#


import os
import os.path
import types
import repos
from packages import YumAvailablePackage
from repomd import mdUtils

# Simple subclass of YumAvailablePackage that can load 'simple headers' from
# the database when they are requested
class YumAvailablePackageSqlite(YumAvailablePackage):
    def __init__(self, pkgdict, repoid):
        YumAvailablePackage.__init__(self,pkgdict,repoid)
        self.sack = pkgdict.sack
        self.pkgId = pkgdict.pkgId
        self.simple['id'] = self.pkgId
        self.changelog = None
    
    def loadChangelog(self):
        self.changelog = self.sack.getChangelog(self.pkgId)

    def returnSimple(self, varname):
        if (not self.simple.has_key(varname) and not hasattr(self,'dbusedsimple')):
            # Make sure we only try once to get the stuff from the database
            self.dbusedsimple = 1
            details = self.sack.getPackageDetails(self.pkgId)
            self.importFromDict(details,self.simple['repoid'])

        return YumAvailablePackage.returnSimple(self,varname)

    def loadFiles(self):
        if (hasattr(self,'dbusedfiles')):
            return
        self.files = self.sack.getFiles(self.pkgId)
            
    def returnFileEntries(self, ftype='file'):
        self.loadFiles()
        return YumAvailablePackage.returnFileEntries(self,ftype)
    
    def returnFileTypes(self):
        self.loadFiles()
        return YumAvailablePackage.returnFileTypes(self)
        

class YumSqlitePackageSack(repos.YumPackageSack):
    """ Implementation of a PackageSack that uses sqlite cache instead of fully
    expanded metadata objects to provide information """

    def __init__(self, packageClass):
        # Just init as usual and create a dict to hold the databases
        repos.YumPackageSack.__init__(self,packageClass)
        self.primarydb = {}
        self.filelistsdb = {}
        self.otherdb = {}
        
    def buildIndexes(self):
        # We don't need these
        return

    def _checkIndexes(self, failure='error'):
        return

    def addDict(self, repoid, datatype, dataobj, callback=None):
        if datatype == 'metadata':
            if (self.primarydb.has_key(repoid)):
              return
            self.added[repoid] = ['primary']
            self.primarydb[repoid] = dataobj
        elif datatype == 'filelists':
            if (self.filelistsdb.has_key(repoid)):
              return
            self.added[repoid] = ['filelists']
            self.filelistsdb[repoid] = dataobj
            print "Added filelists for %s" % (repoid)
        elif datatype == 'otherdata':
            if (self.otherdb.has_key(repoid)):
              return
            self.added[repoid] = ['otherdata']
            self.otherdb[repoid] = dataobj
            print "Added otherdata for %s" % (repoid)
        else:
            # We can not handle this yet...
            raise "Sorry sqlite does not support %s" % (datatype)
    
    def getChangelog(self,pkgId):
        result = []
        for (rep,cache) in self.filelistsdb.items():
            cur.execute("select * from packages,changelog where packages.pkgId = %s and packages.pkgKey = changelog.pkgKey",pkgId)
            for ob in cur.fetchall():
                result.append({ 'author': ob['author'],
                                'value': ob['changelog'],
                                'data': ob['data']
                              })
        return result

    # Get all files for a certain pkgId from the filelists.xml metadata
    def getFiles(self,pkgId):
        for (rep,cache) in self.filelistsdb.items():
            found = False
            result = {}
            cur = cache.cursor()
            cur.execute("select * from packages,filelist where packages.pkgId = %s and packages.pkgKey = filelist.pkgKey", pkgId)
            for ob in cur.fetchall():
                found = True
                dirname = ob['filelist.dirname']
                filetypes = ob['filelist.filetypes'].split('|')[1:-2]
                filenames = ob['filelist.filenames'].split('|')[1:-2]
                while(filenames):
                    filename = dirname+'/'+filenames.pop()
                    filetype = filetypes.pop()
                    result.setdefault(filetype,[]).append(filename)
            if (found):
                return result    
        return {}
            
    def returnObsoletes(self):
        obsoletes = {}
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select * from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
            for ob in cur.fetchall():
                key = ( ob['packages.name'],ob['packages.arch'],
                        ob['packages.epoch'],ob['packages.version'],
                        ob['packages.release'])
                (n,f,e,v,r) = ( ob['obsoletes.name'],ob['obsoletes.flags'],
                                ob['obsoletes.epoch'],ob['obsoletes.version'],
                                ob['obsoletes.release'])

                obsoletes.setdefault(key,[]).append((n,f,(e,v,r)))

        return obsoletes

    def getPackageDetails(self,pkgId):
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select * from packages where pkgId = %s",pkgId)
            for ob in cur.fetchall():
                pkg = self.db2class(ob)
                return pkg
    
    def searchProvides(self, name):
        """return list of package providing the name (any evr and flag)"""
        provides = []
        # First search the provides cache
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select * from provides where name = %s" , (name))
            provs = cur.fetchall()
            for res in provs:
                cur.execute("select * from packages where pkgKey = %s" , (res['pkgKey']))
                for x in cur.fetchall():
                    pkg = self.db2class(x)
                    # Add this provides to prco otherwise yum doesn't understand
                    # that it matches
                    pkg.prco = {'provides': 
                      [
                      { 'name': res.name,
                        'flags': res.flags,
                        'rel': res.release,
                        'ver': res.version,
                        'epoch': res.epoch
                      }
                      ]
                    }
                    provides.append(self.pc(pkg,rep))

        # If it's not a filename, we are done
        if (name.find('/') != 0):
            return provides

        # If it is a filename, search the primary.xml file info
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select * from files where name = %s" , (name))
            provs = cur.fetchall()
            for res in provs:
                cur.execute("select * from packages where pkgKey = %s" , (res['pkgKey']))
                for x in cur.fetchall():
                    pkg = self.db2class(x)
                    pkg.files = {name: res['type']}
                    provides.append(self.pc(pkg,rep))

        # If it is a filename, search the primary.xml file info
        for (rep,cache) in self.filelistsdb.items():
            cur = cache.cursor()
            (dirname,filename) = os.path.split(name)
            cur.execute("select * from filelist,packages where dirname = %s AND filelist.pkgKey = packages.pkgKey" , (dirname))
            provs = cur.fetchall()
            for res in provs:
                # If it matches the dirname, that doesnt mean it matches
                # the filename, check if it does
                if (filename and res['filelist.filenames'].find('|%s|' % filename) == -1):
                    continue
                # If it matches we only know the packageId
                pkg = self.getPackageDetails(res['packages.pkgId'])
                provides.append(self.pc(pkg,rep))
        return provides
                
    # TODO this seems a bit ugly and hackish
    def db2class(self,db,nevra_only=False):
      class tmpObject:
        pass
      y = tmpObject()
      y.nevra = (db.name,db.epoch,db.version,db.release,db.arch)
      y.sack = self
      y.pkgId = db.pkgId
      if (nevra_only):
        return y
      y.hdrange = {'start': db.rpm_header_start,'end': db.rpm_header_end}
      y.location = {'href': db.location_href,'value':''}
      y.checksum = {'pkgid': 'YES','type': db.checksum_type, 
                    'value': db.checksum_value }
      y.time = {'build': db.time_build, 'file': db.time_file }
      y.size = {'package': db.size_package, 'archive': db.size_archive, 'installed': db.size_installed }
      y.info = {'summary': db.summary, 'description': db['description'],
                'packager': db.rpm_packager, 'group': db.rpm_group,
                'buildhost': db.rpm_buildhost, 'sourcerpm': db.rpm_sourcerpm,
                'url': db.url, 'vendor': db.rpm_vendor, 'license': db.rpm_license }
      return y

    def simplePkgList(self, repoid=None):
        """returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
        if (hasattr(self,'simplelist') and not repoid):
            return self.simplelist 
        simplelist = []
        for (rep,cache) in self.primarydb.items():
            if (repoid == None or repoid == x):
                cur = cache.cursor()
                cur.execute("select name,epoch,version,release,arch from packages")
                simplelist.extend([(pkg.name, pkg.arch, pkg.epoch, pkg.version, pkg.release) for pkg in cur.fetchall()])
        if (not repoid):
            self.simplelist = simplelist
        return simplelist

    def returnNewestByNameArch(self, naTup=None):
        # If naTup is set do it from the database otherwise use our parent's
        # returnNewestByNameArch
        if (not naTup):
           return repos.YumPackageSack.returnNewestByNameArch(self, naTup)

        # First find all packages that fulfill naTup
        allpkg = []
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select pkgId,name,epoch,version,release,arch from packages where name=%s and arch=%s",naTup)
            res = [self.pc(self.db2class(x,True),rep) for x in cur.fetchall()] 
            allpkg.extend(res)
        # Now find the newest one
        newest = allpkg.pop()
        for pkg in allpkg:
            (e2, v2, r2) = newest.returnEVR()
            (e,v,r) = pkg.returnEVR()
            rc = mdUtils.compareEVR((e,v,r), (e2, v2, r2))
            if (rc > 0):
                newest = pkg
        return newest

    def returnPackages(self, repoid=None):
        """Returns a list of packages, only containing nevra information """
        returnList = []
        for (rep,cache) in self.primarydb.items():
            if (repoid == None or repoid == x):
                cur = cache.cursor()
                cur.execute("select pkgId,name,epoch,version,release,arch from packages")
                res = [self.pc(self.db2class(x,True),rep) for x in cur.fetchall()] 
                returnList.extend(res)
        return returnList

    def searchNevra(self, name=None, epoch=None, ver=None, rel=None, arch=None):        
        """return list of pkgobjects matching the nevra requested"""
        returnList = []
        nevra = (name,epoch,ver,rel,arch)
        # Search all repositories
        for (rep,cache) in self.primarydb.items():
            cur = cache.cursor()
            cur.execute("select * from packages WHERE name = %s AND epoch = %s AND version = %s AND release = %s AND arch = %s" , (name,epoch,ver,rel,arch))
            returnList.extend([self.pc(self.db2class(x),rep) for x in cur.fetchall()]
            )
        return returnList

Index: __init__.py
===================================================================
RCS file: /home/groups/yum/cvs/yum/yum/__init__.py,v
retrieving revision 1.84
retrieving revision 1.85
diff -u -r1.84 -r1.85
--- __init__.py	14 Feb 2005 06:00:20 -0000	1.84
+++ __init__.py	25 Feb 2005 19:30:23 -0000	1.85
@@ -51,6 +51,8 @@
         depsolve.Depsolve.__init__(self)
         self.localdbimported = 0
         self.repos = repos.RepoStorage() # class of repositories
+        if (not self.repos.sqlite):
+            self.log(1,"Warning, could not load sqlite, falling back to pickle")
     
     def log(self, value, msg):
         """dummy log stub"""

Index: repos.py
===================================================================
RCS file: /home/groups/yum/cvs/yum/yum/repos.py,v
retrieving revision 1.65
retrieving revision 1.66
diff -u -r1.65 -r1.66
--- repos.py	21 Feb 2005 15:46:35 -0000	1.65
+++ repos.py	25 Feb 2005 19:30:23 -0000	1.66
@@ -90,9 +90,23 @@
     def __init__(self):
         self.repos = {} # list of repos by repoid pointing a repo object 
                         # of repo options/misc data
-        self.pkgSack = YumPackageSack(YumAvailablePackage)
         self.callback = None # progress callback used for populateSack() for importing the xml files
         self.cache = 0
+        # Check to see if we can import sqlite stuff
+        try:
+            import sqlitecache
+            import sqlitesack
+        except ImportError:
+            self.sqlite = False
+        else:
+            self.sqlite = True
+            self.sqlitecache = sqlitecache
+            
+        if (self.sqlite):
+            self.pkgSack = sqlitesack.YumSqlitePackageSack(sqlitesack.YumAvailablePackageSqlite)
+        else:
+            self.pkgSack = YumPackageSack(YumAvailablePackage)
+            
     
     def __str__(self):
         return self.repos.keys()
@@ -194,10 +208,13 @@
             data = ['metadata', 'filelists', 'otherdata']
         else:
             data = [ with ]
-        
+         
         for repo in myrepos:
             if not hasattr(repo, 'cacheHandler'):
-                repo.cacheHandler = mdcache.RepodataParser(storedir=repo.cachedir, callback=callback)
+                if (self.sqlite):
+                    repo.cacheHandler = self.sqlitecache.RepodataParserSqlite(storedir=repo.cachedir, callback=callback)
+                else:
+                    repo.cacheHandler = mdcache.RepodataParser(storedir=repo.cachedir, callback=callback)
             for item in data:
                 if self.pkgSack.added.has_key(repo.id):
                     if item in self.pkgSack.added[repo.id]:




More information about the Yum-cvs-commits mailing list