[yum-git] Branch 'yum-3_2_X' - 2 commits - yum/misc.py
Seth Vidal
skvidal at linux.duke.edu
Mon Sep 15 20:48:10 UTC 2008
yum/misc.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
New commits:
commit 14a0123336784314311f6ce019c40cdbf7b51088
Merge: 876bdf5... 3bde1ea...
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Mon Sep 15 16:47:00 2008 -0400
Merge branch 'yum-3_2_X' of ssh://login.linux.duke.edu/home/groups/yum/git/yum into yum-3_2_X
* 'yum-3_2_X' of ssh://login.linux.duke.edu/home/groups/yum/git/yum:
Make sure we get two packages with the same nevra, but different pkgKey's
Fix the testcases to not die due to missing persistdir
Speedup for includepkgs, use a set for main in test
Don't use parsePacakges where just returnPackages() dtrt. now
Make rpmdb.returnPackages dtrt. like sqlitesack, always doing the minimization
Patterns max constant change, adding PATTERNS_INDEXED_MAX and comments.
commit 876bdf5559ed483ce75446eb6112742b0523cab8
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Mon Sep 15 16:45:13 2008 -0400
- add crazy utf8 conversion routine for outputting xml for metadata
- if/when non-utf8 filenames/metadata content becomes good and illegal then
we should be able to clean this all out
diff --git a/yum/misc.py b/yum/misc.py
index 2f2b27b..2a64bea 100644
--- a/yum/misc.py
+++ b/yum/misc.py
@@ -621,10 +621,58 @@ def seq_max_split(seq, max_entries):
num -= max_entries
ret.append(seq[beg:])
return ret
+
+def _ugly_utf8_string_hack(item):
+ """hands back a unicoded string"""
+ # this is backward compat for handling non-utf8 filenames
+ # and content inside packages. :(
+ # content that xml can cope with but isn't really kosher
+
+ # if we're anything obvious - do them first
+ if item is None:
+ return ''
+ elif isinstance(item, unicode):
+ return item
+
+ # this handles any bogon formats we see
+ du = False
+ try:
+ x = unicode(item, 'ascii')
+ du = True
+ except UnicodeError:
+ encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
+ for enc in encodings:
+ try:
+ x = unicode(item, enc)
+ except UnicodeError:
+ pass
+
+ else:
+ if x.encode(enc) == item:
+ if enc != 'utf-8':
+ print '\n%s encoding on %s\n' % (enc, item)
+ return x.encode('utf-8')
+
+ # Kill bytes (or libxml will die) not in the small byte portion of:
+ # http://www.w3.org/TR/REC-xml/#NT-Char
+ # we allow high bytes, if it passed the utf8 check above. Eg.
+ # good chars = #x9 | #xA | #xD | [#x20-...]
+ newitem = ''
+ bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32)
+ for char in item:
+ if ord(char) in bad_small_bytes:
+ pass # Just ignore these bytes...
+ elif not du and ord(char) > 127:
+ newitem = newitem + '?' # byte by byte equiv of escape
+ else:
+ newitem = newitem + char
+ return newitem
+
def to_xml(item, attrib=False):
import xml.sax.saxutils
- item = to_utf8(item) # verify this does enough conversion
+ item = _ugly_utf8_string_hack(item)
+ item = to_utf8(item)
item = item.rstrip()
if attrib:
item = xml.sax.saxutils.escape(item, entities={'"':"""})
More information about the Yum-cvs-commits
mailing list