[Yum-devel] [PATCH] Fix main speed issue in to_xml(), slows down new createrepo a lot. BZ 716235.
Zdenek Pavlas
zpavlas at redhat.com
Thu Nov 15 11:02:52 UTC 2012
> + return unicode(item, 'utf-8')
ACK, when amended with following..
> bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32)
Byte 0x08 should be removed as well.
diff --git a/yum/misc.py b/yum/misc.py
index 072c99b..9d3be16 100644
--- a/yum/misc.py
+++ b/yum/misc.py
@@ -897,6 +897,12 @@ def seq_max_split(seq, max_entries):
ret.append(seq[beg:])
return ret
+_bad_small_bytes = {}
+for i in range(0x20):
+ if chr(i) not in '\t\n\r':
+ _bad_small_bytes[i] = None
+del i
+
def _ugly_utf8_string_hack(item):
"""hands back a unicoded string"""
# this is backward compat for handling non-utf8 filenames
@@ -911,7 +917,7 @@ def _ugly_utf8_string_hack(item):
# this handles any bogon formats we see
try:
- return unicode(item, 'utf-8')
+ return unicode(item, 'utf-8').translate(_bad_small_bytes)
except UnicodeError:
encodings = ['iso-8859-1', 'iso-8859-15', 'iso-8859-2']
for enc in encodings:
@@ -932,9 +938,8 @@ def _ugly_utf8_string_hack(item):
# we allow high bytes, if it passed the utf8 check above. Eg.
# good chars = #x9 | #xA | #xD | [#x20-...]
newitem = ''
- bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32)
for char in item:
- if ord(char) in bad_small_bytes:
+ if ord(char) in _bad_small_bytes:
pass # Just ignore these bytes...
elif ord(char) > 127:
newitem = newitem + '?' # byte by byte equiv of escape
More information about the Yum-devel
mailing list