[yum-commits] urlgrabber/grabber.py urlgrabber/mirror.py

skvidal at osuosl.org skvidal at osuosl.org
Fri Sep 3 19:45:30 UTC 2010


 urlgrabber/grabber.py |   26 ++++++++++++++++++--------
 urlgrabber/mirror.py  |    5 +++--
 2 files changed, 21 insertions(+), 10 deletions(-)

New commits:
commit b8b51530f3522a17e84f7319f773d4fadc887c9e
Author: Toshio Kuratomi <toshio at fedoraproject.org>
Date:   Fri Sep 3 15:45:10 2010 -0400

    Patch to allow url arguments to be of type unicode
    
    Description of problem:
    
    I am working on some code that handed a url as a unicode string to urlgrabber.
    This lead to a problem with two sections of code.  One is in mirror.py where
    we're only allowing byte strings.  The other is in grabber.py which hands off
    the unicode strings to pycurl.  pycurl only understands byte strings and raises
    an error if a unicode string is given to it.
    
    We need to transform any unicode strings into byte strings before it gets to
    curl.  Since we're doing that, we might as well allow unicode strings in
    mirror.py and transform those to byte strings as well.

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 5e40d45..decefed 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -545,6 +545,22 @@ def _(st):
 #                 END MODULE INITIALIZATION
 ########################################################################
 
+########################################################################
+#                 UTILITY FUNCTIONS
+########################################################################
+
+# These functions are meant to be utilities for the urlgrabber library to use.
+
+def _to_utf8(obj, errors='replace'):
+    '''convert 'unicode' to an encoded utf-8 byte string '''
+    # stolen from yum.i18n
+    if isinstance(obj, unicode):
+        obj = obj.encode('utf-8', errors)
+    return obj
+
+########################################################################
+#                 END UTILITY FUNCTIONS
+########################################################################
 
 
 class URLGrabError(IOError):
@@ -680,6 +696,7 @@ class URLParser:
           opts.quote = 0     --> do not quote it
           opts.quote = None  --> guess
         """
+        url = _to_utf8(url)
         quote = opts.quote
         
         if opts.prefix:
@@ -1274,7 +1291,7 @@ class PyCurlFileObject(object):
         #posts - simple - expects the fields as they are
         if opts.data:
             self.curl_obj.setopt(pycurl.POST, True)
-            self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
+            self.curl_obj.setopt(pycurl.POSTFIELDS, _to_utf8(opts.data))
             
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1693,13 +1710,6 @@ class PyCurlFileObject(object):
             return True
         return False
         
-    def _to_utf8(self, obj, errors='replace'):
-        '''convert 'unicode' to an encoded utf-8 byte string '''
-        # stolen from yum.i18n
-        if isinstance(obj, unicode):
-            obj = obj.encode('utf-8', errors)
-        return obj
-        
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
index dad410b..8731aed 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -90,7 +90,7 @@ CUSTOMIZATION
 import random
 import thread  # needed for locking to make this threadsafe
 
-from grabber import URLGrabError, CallbackObject, DEBUG
+from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
 
 def _(st): 
     return st
@@ -263,7 +263,8 @@ class MirrorGroup:
     def _parse_mirrors(self, mirrors):
         parsed_mirrors = []
         for m in mirrors:
-            if type(m) == type(''): m = {'mirror': m}
+            if isinstance(m, basestring):
+                m = {'mirror': _to_utf8(m)}
             parsed_mirrors.append(m)
         return parsed_mirrors
     


More information about the Yum-commits mailing list