[yum-commits] urlgrabber/grabber.py
skvidal at osuosl.org
skvidal at osuosl.org
Tue Sep 22 22:07:41 UTC 2009
urlgrabber/grabber.py | 72 ++++++++++++++++++++++++++++++++++++++------------
1 file changed, 55 insertions(+), 17 deletions(-)
New commits:
commit 725b104a9669125c26be307e1977ff7f9ed99f81
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Tue Sep 22 18:04:47 2009 -0400
handle endless-data problems safely:
"A malicious server could cause libcurl
to download an infinite amount of data, potentially causing all of memory or
disk to be filled. Setting the CURLOPT_MAXFILESIZE_LARGE option is not
sufficient to guard against this. Instead, the app should monitor the
amount of data received within the write or progress callback and abort once
the limit is reached."
had to restructure a good bit of the error handling to do this but it works for both
endless headers and endless content.
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 8a50d22..643c1c5 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -875,7 +875,10 @@ class URLGrabberOptions:
self.ssl_cert = None # client cert
self.ssl_cert_type = 'PEM' # (or DER)
self.ssl_key_pass = None # password to access the key
-
+ self.size = None # if we know how big the thing we're getting is going
+ # to be.
+ self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
+
def __repr__(self):
return self.format()
@@ -1476,6 +1479,7 @@ class PyCurlFileObject():
self._amount_read = 0
self._reget_length = 0
self._prog_running = False
+ self._error = (None, None)
self.size = 0
self._do_open()
@@ -1509,6 +1513,9 @@ class PyCurlFileObject():
return -1
def _hdr_retrieve(self, buf):
+ if self._over_max_size(cur=len(self._hdr_dump),
+ max=self.opts.max_header_size):
+ return -1
try:
self._hdr_dump += buf
# we have to get the size before we do the progress obj start
@@ -1646,10 +1653,16 @@ class PyCurlFileObject():
# to other URLGrabErrors from
# http://curl.haxx.se/libcurl/c/libcurl-errors.html
# this covers e.args[0] == 22 pretty well - which will be common
- code = self.http_code
- if e.args[0] == 23 and code >= 200 and code < 299:
+
+ code = self.http_code
+ errcode = e.args[0]
+ if self._error[0]:
+ errcode = self._error[0]
+
+ if errcode == 23 and code >= 200 and code < 299:
err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
err.url = self.url
+
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
@@ -1658,22 +1671,22 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
- elif e.args[0] == 28:
+ elif errcode == 28:
err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
err.url = self.url
raise err
- elif e.args[0] == 35:
+ elif errcode == 35:
msg = _("problem making ssl connection")
err = URLGrabError(14, msg)
err.url = self.url
raise err
- elif e.args[0] == 37:
+ elif errcode == 37:
msg = _("Could not open/read %s") % (self.url)
err = URLGrabError(14, msg)
err.url = self.url
raise err
- elif e.args[0] == 42:
+ elif errcode == 42:
err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
err.url = self.url
# this is probably wrong but ultimately this is what happens
@@ -1684,23 +1697,32 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
- elif e.args[0] == 58:
+ elif errcode == 58:
msg = _("problem with the local client certificate")
err = URLGrabError(14, msg)
err.url = self.url
raise err
- elif e.args[0] == 60:
+ elif errcode == 60:
msg = _("client cert cannot be verified or client cert incorrect")
err = URLGrabError(14, msg)
err.url = self.url
raise err
+ elif errcode == 63:
+ if self._error[1]:
+ msg = self._error[1]
+ else:
+ msg = _("Max download size exceeded on %s") % (self.url)
+ err = URLGrabError(14, msg)
+ err.url = self.url
+ raise err
+
elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
else:
- msg = 'PYCURL ERROR %s - "%s"' % (e.args[0], str(e.args[1]))
- code = e.args[0]
+ msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+ code = errcode
err = URLGrabError(14, msg)
err.code = code
err.exception = e
@@ -1925,13 +1947,29 @@ class PyCurlFileObject():
return
def _progress_update(self, download_total, downloaded, upload_total, uploaded):
- try:
- if self._prog_running:
- downloaded += self._reget_length
- self.opts.progress_obj.update(downloaded)
- except KeyboardInterrupt:
- return -1
+ if self._over_max_size(cur=self._amount_read):
+ return -1
+ try:
+ if self._prog_running:
+ downloaded += self._reget_length
+ self.opts.progress_obj.update(downloaded)
+ except KeyboardInterrupt:
+ return -1
+
+ def _over_max_size(self, cur, max=None):
+
+ if not max:
+ max = self.size
+ if self.opts.size: # if we set an opts size use that, no matter what
+ max = self.opts.size
+ if cur > max + max*.10:
+
+ msg = _("Downloaded more than max size for %s: %s > %s") \
+ % (self.url, cur, max)
+ self._error = (63, msg)
+ return True
+ return False
def _to_utf8(self, obj, errors='replace'):
'''convert 'unicode' to an encoded utf-8 byte string '''
# stolen from yum.i18n
More information about the Yum-commits
mailing list