[Yum-devel] quick & dirty pysqlite2 experiments
Pekka Pietikainen
pp at ee.oulu.fi
Wed Feb 8 23:07:08 UTC 2006
After being dared to submit patches on #fedora-devel I decided to quickly
do just that. These patches will eat your dog etc. and need quite a bit of
thought, but just to get some ideas out...
Cache creation time went down from 20 to 15 secs with this, the SELECT stuff
did slow down a bit, but my benchmark was probably pretty bad (just a time
yum list), went 6 to 9 secs for whatever reason.
TODO: (at least)
- Check all the select statements. In general no sql escaping needs to be done
manually, just use ? in the SQL and give a tuple of parameters to the
execute.
- For mass inserts, executemany() could be worth a try. Something like:
def insertHashMany(self,table,keys, values_iterator, cursor):
"""Insert several hashes into a database table"""
query = "INSERT INTO %s (%s) VALUES (%s)" % (table,','.join(keys),",".join(len(keys)*'?'))
cursor.executemany(query, values_iterator())
return cursor.lastrowid
keys=('pkgKey','name','flags','epoch','version','release')
for ptype in package.prco:
def prco_iterator():
for entry in package.prco[ptype]:
yield((pkgKey, entry.get('name'),
entry.get('flags'), entry.get('epoch'),
entry.get('ver'), entry.get('rel')))
self.insertHashMany(ptype,keys, prco_iterator, cur)
(didn't seem to make things much faster, maybe 14.5 vs. 15 secs for a 4000
package insert, I didn't convert all of the code though.
- Just make the db2class stuff a row factory, and drop that hacky foo-class :D
- I just noticed the LIKE '%%?%%' changes I did are totally fubar :D
Not sure if you can do
execute('SELECT ... WHERE x LIKE ?', ('%FOO%'))
if that works it's the most robust way, definately.
All in all, an upgrade should allow for cleaner and faster code, but it
will require some work and probably refactoring too.
Please Cc: on replies, I'm not on the list
diff -urN orig/sqlitecache.py pysqlite2/sqlitecache.py
--- orig/sqlitecache.py 2006-02-09 00:03:00.000000000 +0200
+++ pysqlite2/sqlitecache.py 2006-02-09 00:15:22.000000000 +0200
@@ -19,7 +19,8 @@
# - Add support for multiple checksums per rpm (not required)
import os
-import sqlite
+#import sqlite
+from pysqlite2 import dbapi2 as sqlite
import time
import mdparser
from sqlitesack import encodefiletypelist,encodefilenamelist
@@ -46,6 +47,7 @@
matches the required dbversion"""
db = sqlite.connect(filename)
cur = db.cursor()
+ cur.row_factory = sqlite.Row
cur.execute("select * from db_info")
info = cur.fetchone()
# If info is not in there this is an incompelete cache file
@@ -195,23 +197,8 @@
keys = hash.keys()
values = hash.values()
- query = "INSERT INTO %s (" % (table)
- query += ",".join(keys)
- query += ") VALUES ("
- # Quote all values by replacing None with NULL and ' by ''
- for x in values:
- if (x == None):
- query += "NULL,"
- else:
- try:
- query += "'%s'," % (x.replace("'","''"))
- except AttributeError:
- query += "'%s'," % x
- # Remove the last , from query
- query = query[:-1]
- # And replace it with )
- query += ")"
- cursor.execute(query.encode('utf8'))
+ query = "INSERT INTO %s (%s) VALUES (%s)" % (table,','.join(keys),",".join(len(keys)*'?'))
+ cursor.execute(query, tuple(values))
return cursor.lastrowid
def makeSqliteCacheFile(self, filename, cachetype):
@@ -319,6 +306,7 @@
# We start be removing the old db_info, as it is no longer valid
cur = db.cursor()
+ cur.row_factory = sqlite.Row
cur.execute("DELETE FROM db_info")
# First create a list of all pkgIds that are in the database now
@@ -372,7 +360,7 @@
for table in deltables:
cur.execute("DELETE FROM "+table+ " where pkgKey in %s" % delpkgs)
- cur.execute("INSERT into db_info (dbversion,checksum) VALUES (%s,%s)",
+ cur.execute("INSERT into db_info (dbversion,checksum) VALUES (?,?)",
(dbversion,checksum))
db.commit()
self.log(2, "Added %s new packages, deleted %s old in %.2f seconds" % (
diff -urN orig/sqlitesack.py pysqlite2/sqlitesack.py
--- orig/sqlitesack.py 2006-02-09 00:02:53.000000000 +0200
+++ pysqlite2/sqlitesack.py 2006-02-09 00:15:22.000000000 +0200
@@ -25,7 +25,20 @@
import repos
from packages import YumAvailablePackage
from repomd import mdUtils, mdErrors
+from pysqlite2 import dbapi2 as sqlite
+class foo:
+ def __init__(self):
+ pass
+ def __getitem__(self, item):
+ return getattr(self, item)
+
+def class_factory(cursor, row):
+ d = foo()
+ for idx, col in enumerate(cursor.description):
+ setattr(d,col[0],row[idx])
+ return d
+
# Simple subclass of YumAvailablePackage that can load 'simple headers' from
# the database when they are requested
class YumAvailablePackageSqlite(YumAvailablePackage):
@@ -128,7 +141,7 @@
cur = cache.cursor()
cur.execute("select changelog.date as date,\
changelog.author as author,\
- changelog.changelog as changelog from packages,changelog where packages.pkgId = %s and packages.pkgKey = changelog.pkgKey",pkgId)
+ changelog.changelog as changelog from packages,changelog where packages.pkgId = ? and packages.pkgKey = changelog.pkgKey",pkgId)
for ob in cur.fetchall():
result.append(( ob['date'],
ob['author'],
@@ -144,10 +157,10 @@
for (rep, cache) in self.primarydb.items():
cur = cache.cursor()
for prco in result.keys():
- cur.execute("select %s.name as name, %s.version as version,\
- %s.release as release, %s.epoch as epoch, %s.flags as flags\
- from packages,%s\
- where packages.pkgId = %s and packages.pkgKey = %s.pkgKey", prco, prco, prco, prco, prco, prco, pkgId, prco)
+ cur.execute("select ?.name as name, ?.version as version,\
+ ?.release as release, ?.epoch as epoch, ?.flags as flags\
+ from packages,?\
+ where packages.pkgId = ? and packages.pkgKey = ?.pkgKey", prco, prco, prco, prco, prco, prco, pkgId, prco)
for ob in cur.fetchall():
name = ob['name']
version = ob['version']
@@ -166,7 +179,7 @@
cur.execute("select filelist.dirname as dirname,\
filelist.filetypes as filetypes,\
filelist.filenames as filenames from packages,filelist\
- where packages.pkgId = %s and packages.pkgKey = filelist.pkgKey", pkgId)
+ where packages.pkgId = ? and packages.pkgKey = filelist.pkgKey", pkgId)
for ob in cur.fetchall():
found = True
dirname = ob['dirname']
@@ -192,7 +205,7 @@
quotename = name.replace("'","''")
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- cur.execute("select DISTINCT packages.pkgId as pkgId from provides,packages where provides.name LIKE '%%%s%%' AND provides.pkgKey = packages.pkgKey" % quotename)
+ cur.execute("select DISTINCT packages.pkgId as pkgId from provides,packages where provides.name LIKE '%%%s%%' AND provides.pkgKey = packages.pkgKey" % quotename)
for ob in cur.fetchall():
if (self.excludes[rep].has_key(ob['pkgId'])):
continue
@@ -211,9 +224,9 @@
filelist.filetypes as filetypes,\
filelist.filenames as filenames \
from packages,filelist where \
- (filelist.dirname LIKE '%%%s%%' \
- OR (filelist.dirname LIKE '%%%s%%' AND\
- filelist.filenames LIKE '%%%s%%'))\
+ (filelist.dirname LIKE '%%?%%' \
+ OR (filelist.dirname LIKE '%%?%%' AND\
+ filelist.filenames LIKE '%%?%%'))\
AND (filelist.pkgKey = packages.pkgKey)" % (quotename,dir,filename))
for ob in cur.fetchall():
# Check if it is an actual match
@@ -235,6 +248,7 @@
obsoletes = {}
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
+ cur.row_factory = sqlite.Row
cur.execute("select packages.name as name,\
packages.pkgId as pkgId,\
packages.arch as arch, packages.epoch as epoch,\
@@ -262,7 +276,7 @@
def getPackageDetails(self,pkgId):
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- cur.execute("select * from packages where pkgId = %s",pkgId)
+ cur.execute("select * from packages where pkgId = ?",pkgId)
for ob in cur.fetchall():
pkg = self.db2class(ob)
return pkg
@@ -272,7 +286,7 @@
results = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
- cur.execute("select * from %s where name = %s" , (prcotype, name))
+ cur.execute("select * from ? where name = ?" , (prcotype, name))
prcos = cur.fetchall()
for res in prcos:
cur.execute("select * from packages where pkgKey = %s" , (res['pkgKey']))
@@ -371,7 +385,7 @@
'value': db.checksum_value }
y.time = {'build': db.time_build, 'file': db.time_file }
y.size = {'package': db.size_package, 'archive': db.size_archive, 'installed': db.size_installed }
- y.info = {'summary': db.summary, 'description': db['description'],
+ y.info = {'summary': db.summary, 'description': db.description,
'packager': db.rpm_packager, 'group': db.rpm_group,
'buildhost': db.rpm_buildhost, 'sourcerpm': db.rpm_sourcerpm,
'url': db.url, 'vendor': db.rpm_vendor, 'license': db.rpm_license }
@@ -383,11 +397,12 @@
for (rep,cache) in self.primarydb.items():
if (repoid == None or repoid == rep):
cur = cache.cursor()
+ cur.row_factory=sqlite.Row
cur.execute("select pkgId,name,epoch,version,release,arch from packages")
for pkg in cur.fetchall():
- if (self.excludes[rep].has_key(pkg.pkgId)):
+ if (self.excludes[rep].has_key(pkg['pkgId'])):
continue
- simplelist.append((pkg.name, pkg.arch, pkg.epoch, pkg.version, pkg.release))
+ simplelist.append((pkg['name'], pkg['arch'], pkg['epoch'], pkg['version'], pkg['release']))
return simplelist
@@ -402,6 +417,7 @@
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
+ cur.row_factory=class_factory
cur.execute("select pkgId,name,epoch,version,release,arch from packages where name=%s and arch=%s",naTup)
for x in cur.fetchall():
if (self.excludes[rep].has_key(x.pkgId)):
@@ -423,6 +439,7 @@
allpkg = []
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
+ cur.row_factory = class_factory
cur.execute("select pkgId,name,epoch,version,release,arch from packages where name=%s", name)
for x in cur.fetchall():
if (self.excludes[rep].has_key(x.pkgId)):
@@ -440,6 +457,7 @@
for (rep,cache) in self.primarydb.items():
if (repoid == None or repoid == rep):
cur = cache.cursor()
+ cur.row_factory = class_factory
cur.execute("select pkgId,name,epoch,version,release,arch from packages")
for x in cur.fetchall():
if (self.excludes[rep].has_key(x.pkgId)):
@@ -472,6 +490,7 @@
# Search all repositories
for (rep,cache) in self.primarydb.items():
cur = cache.cursor()
+ cur.row_factory = class_factory
#cur.execute("select * from packages WHERE name = %s AND epoch = %s AND version = %s AND release = %s AND arch = %s" , (name,epoch,ver,rel,arch))
cur.execute(q)
for x in cur.fetchall():
@@ -490,6 +509,7 @@
querystring = tmpstring[:last]
for (rep, cache) in self.primarydb.items():
cur = cache.cursor()
+ cur.row_factory = class_factory
cur.execute(querystring)
for x in cur.fetchall():
obj = self.pc(self.db2class(x), rep)
More information about the Yum-devel
mailing list