[yum-commits] urlgrabber/grabber.py
skvidal at osuosl.org
skvidal at osuosl.org
Wed Nov 11 23:14:40 UTC 2009
urlgrabber/grabber.py | 26 +++++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
New commits:
commit 179fc2fcb291a7a1951b2bdda94f1ff443e83591
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Wed Nov 11 18:13:45 2009 -0500
when we redirect by header we might break, especially if we redirect wrongly.
I think this fixes some of the zaniness.
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 7b7f979..fc28922 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -1053,6 +1053,7 @@ class PyCurlFileObject():
self._prog_running = False
self._error = (None, None)
self.size = 0
+ self._hdr_ended = False
self._do_open()
@@ -1087,8 +1088,12 @@ class PyCurlFileObject():
def _hdr_retrieve(self, buf):
if self._over_max_size(cur=len(self._hdr_dump),
max_size=self.opts.max_header_size):
- return -1
+ return -1
try:
+ if self._hdr_ended:
+ self._hdr_dump = ''
+ self._hdr_ended = False
+
self._hdr_dump += buf
# we have to get the size before we do the progress obj start
# but we can't do that w/o making it do 2 connects, which sucks
@@ -1104,7 +1109,16 @@ class PyCurlFileObject():
s = parse150(buf)
if s:
self.size = int(s)
-
+
+ if buf.lower().find('location') != -1:
+ location = ':'.join(buf.split(':')[1:])
+ location = location.strip()
+ self.scheme = urlparse.urlsplit(location)[0]
+ self.url = location
+
+ if len(self._hdr_dump) != 0 and buf == '\n\n':
+ self._hdr_ended = True
+
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
@@ -1136,6 +1150,7 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
+ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
if DEBUG:
self.curl_obj.setopt(pycurl.VERBOSE, True)
@@ -1291,7 +1306,12 @@ class PyCurlFileObject():
raise err
elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+ if self.scheme in ['http', 'https']:
+ msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+ elif self.scheme in ['ftp']:
+ msg = 'FTP Error %s : %s ' % (self.http_code, self.url)
+ else:
+ msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)
else:
msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
code = errcode
More information about the Yum-commits
mailing list