[Yum-devel] [PATCH] implement reget=check_timestamp
Zdeněk Pavlas
zpavlas at redhat.com
Fri Jun 22 10:02:30 UTC 2012
---
urlgrabber/grabber.py | 28 +++++++++++++++++-----------
1 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 071146c..3127b48 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -107,10 +107,8 @@ GENERAL ARGUMENTS (kwargs)
begin 100 bytes into the requested file.
'check_timestamp' -- the timestamp of the server file will be
- compared to the timestamp of the local file. ONLY if the
- local file is newer than or the same age as the server file
- will reget be used. If the server file is newer, or the
- timestamp is not returned, the entire file will be fetched.
+ compared to the timestamp of the local file. If the server
+ file is newer, the entire file will be fetched.
NOTE: urlgrabber can do very little to verify that the partial
file on disk is identical to the beginning of the remote file.
@@ -1195,8 +1193,6 @@ class PyCurlFileObject(object):
self.append = False
self.reget_time = None
self.opts = opts
- if self.opts.reget == 'check_timestamp':
- raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this."
self._complete = False
self._rbuf = ''
self._rbufsize = 1024*8
@@ -1232,6 +1228,9 @@ class PyCurlFileObject(object):
text=self.opts.text)
self._prog_running = True
self.opts.progress_obj.update(self._amount_read)
+ if self.opts.reget == 'check_timestamp' and self._amount_read == 0:
+ # replace instead of appending
+ self.fo.truncate(0)
self._amount_read += len(buf)
self.fo.write(buf)
@@ -1361,14 +1360,21 @@ class PyCurlFileObject(object):
self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
#headers:
- if opts.http_headers and self.scheme in ('http', 'https'):
- headers = []
- for (tag, content) in opts.http_headers:
- headers.append('%s:%s' % (tag, content))
+ if self.scheme in ('http', 'https'):
+ headers = ['%s: %s' % i for i in opts.http_headers or []]
+ if opts.reget == 'check_timestamp':
+ try: s = os.stat(self.filename)
+ except OSError: pass
+ else:
+ headers.append(time.strftime(
+ 'If-Modified-Since: %a, %d %b %Y %H:%M:%S +0000',
+ time.gmtime(s[stat.ST_MTIME])
+ ))
+ self.append = 1 # Don't truncate
self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
# ranges:
- if opts.range or opts.reget:
+ if opts.range or opts.reget == 'simple':
range_str = self._build_range()
if range_str:
self.curl_obj.setopt(pycurl.RANGE, range_str)
--
1.7.4.4
More information about the Yum-devel
mailing list