[yum-commits] 2 commits - urlgrabber/grabber.py

skvidal at osuosl.org skvidal at osuosl.org
Wed Jul 29 20:24:19 UTC 2009


 urlgrabber/grabber.py |   81 +++++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 37 deletions(-)

New commits:
commit d57b5943de46544398cad2e03cfe01f9c841e150
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Wed Jul 29 16:22:02 2009 -0400

    - add range support
    - get rid of the .part file thing - it makes range-regets harder than they need to be
    - make sure regets behave

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 16f2428..75dbbb5 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -1460,6 +1460,7 @@ class PyCurlFileObject():
         self._ttime = time.time()
         self._tsize = 0
         self._amount_read = 0
+        self._reget_length = 0
         self._prog_running = False
         self.size = 0
         self._do_open()
@@ -1475,15 +1476,16 @@ class PyCurlFileObject():
         raise AttributeError, name
 
     def _retrieve(self, buf):
-        if self._amount_read == 0:
+        if not self._prog_running:
             if self.opts.progress_obj:
+                size  = self.size + self._reget_length
                 self.opts.progress_obj.start(self._prog_reportname, 
                                              urllib.unquote(self.url), 
                                              self._prog_basename, 
-                                             size=self.size,
+                                             size=size,
                                              text=self.opts.text)
                 self._prog_running = True
-                self.opts.progress_obj.update(0)
+                self.opts.progress_obj.update(self._amount_read)
 
         self._amount_read += len(buf)
         self.fo.write(buf)
@@ -1538,33 +1540,44 @@ class PyCurlFileObject():
             self.curl_obj.setopt(pycurl.VERBOSE, True)
         if opts.user_agent:
             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
-        if opts.http_headers:
-            headers = []
-            for (tag, content) in opts.http_headers:
-                headers.append('%s:%s' % (tag, content))
-            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
         
         # maybe to be options later
         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1)
         self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
         self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30)
-
+        
+        # timeouts
         timeout = 300
         if opts.timeout:
             timeout = int(opts.timeout)
         self.curl_obj.setopt(pycurl.TIMEOUT, timeout)
+        # ssl options
+        if self.scheme == 'https':
+            if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
+                self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
 
-        if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
-            self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
-        
+        #headers:
+        if opts.http_headers and self.scheme in ('http', 'https'):
+            headers = []
+            for (tag, content) in opts.http_headers:
+                headers.append('%s:%s' % (tag, content))
+            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
+
+        # ranges:
+        if opts.range or opts.reget:
+            range_str = self._build_range()
+            if range_str:
+                self.curl_obj.setopt(pycurl.RANGE, range_str)
+            
         # throttle/bandwidth
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
             
         # proxy settings
         
-        # magic ftp settings
-    
+        
+        # username/password/auth settings
+        
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
         
@@ -1587,7 +1600,6 @@ class PyCurlFileObject():
             err = URLGrabError(14, msg)
             err.code = self.http_code
             err.exception = e
-            # XXX should we rename the .part file? or leave it?
             raise err
             
     def _do_open(self):
@@ -1595,32 +1607,20 @@ class PyCurlFileObject():
         self.reget_time = None
         self.curl_obj = _curl_cache
         self.curl_obj.reset() # reset all old settings away, just in case
+        # setup any ranges
         self._set_opts()
 
         return self.fo
 
-    def _add_headers(self, req):
-        #XXXX
-        return
-        
-        try: req_type = req.get_type()
-        except ValueError: req_type = None
-        if self.opts.http_headers and req_type in ('http', 'https'):
-            for h, v in self.opts.http_headers:
-                req.add_header(h, v)
-        if self.opts.ftp_headers and req_type == 'ftp':
-            for h, v in self.opts.ftp_headers:
-                req.add_header(h, v)
-
-    def _build_range(self, req):
-        #XXXX
-        return
+    def _add_headers(self):
+        pass
         
+    def _build_range(self):
         self.reget_time = None
         self.append = False
         reget_length = 0
         rt = None
-        if have_range and self.opts.reget and type(self.filename) == type(''):
+        if self.opts.reget and type(self.filename) == type(''):
             # we have reget turned on and we're dumping to a file
             try:
                 s = os.stat(self.filename)
@@ -1632,6 +1632,7 @@ class PyCurlFileObject():
 
                 # Set initial length when regetting
                 self._amount_read = reget_length    
+                self._reget_length = reget_length # set where we started from, too
 
                 rt = reget_length, ''
                 self.append = 1
@@ -1648,7 +1649,10 @@ class PyCurlFileObject():
 
         if rt:
             header = range_tuple_to_header(rt)
-            if header: req.add_header('Range', header)
+            if header:
+                return header.split('=')[1]
+
+
 
     def _make_request(self, req, opener):
         #XXXX
@@ -1722,9 +1726,9 @@ class PyCurlFileObject():
             else: mode = 'wb'
 
             if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
-                                 (self.filename + ".part", mode))
+                                 (self.filename, mode))
             try:
-                self.fo = open(self.filename + '.part', mode)
+                self.fo = open(self.filename, mode)
             except IOError, e:
                 err = URLGrabError(16, _(\
                   'error opening local file from %s, IOError: %s') % (self.url, e))
@@ -1743,8 +1747,6 @@ class PyCurlFileObject():
             # if we're a filename - move the file to final location
             self.fo.flush()
             self.fo.close()
-            # XXX - try except and behave quasi-sanely?
-            os.rename(self.filename + '.part', self.filename)
             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
             if mod_time != -1:
                 os.utime(self.filename, (mod_time, mod_time))
@@ -1820,6 +1822,7 @@ class PyCurlFileObject():
 
     def _progress_update(self, download_total, downloaded, upload_total, uploaded):
             if self._prog_running:
+                downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
 
     def read(self, amt=None):
commit c77251a0cc193f7b6b0442a59432bf7d623ae293
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Wed Jul 29 12:42:50 2009 -0400

    implement throttle/bandwidth controls in pycurl
    tested with the progress call back - seems to work very well

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 15803fb..16f2428 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -1557,6 +1557,10 @@ class PyCurlFileObject():
         if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
             self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
         
+        # throttle/bandwidth
+        if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
+            self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
+            
         # proxy settings
         
         # magic ftp settings


More information about the Yum-commits mailing list