[yum-commits] Branch 'yum-3_2_X' - 7 commits - docs/yum.conf.5 yum/__init__.py yum/config.py yum/metalink.py yum/misc.py yum/repoMDObject.py yum/yumRepo.py

James Antill james at osuosl.org
Mon Oct 6 04:44:10 UTC 2008


 docs/yum.conf.5     |   14 ++++
 yum/__init__.py     |    3 -
 yum/config.py       |    3 +
 yum/metalink.py     |   25 +++++++-
 yum/misc.py         |   79 +++++++++++++++++++++++---
 yum/repoMDObject.py |   11 +++
 yum/yumRepo.py      |  155 +++++++++++++++++++++++++++++++++++++++++++++++-----
 7 files changed, 263 insertions(+), 27 deletions(-)

New commits:
commit b1047995c923b4e7c9c0ea3dec7e87c0304af97a
Author: James Antill <james at and.org>
Date:   Mon Oct 6 00:44:02 2008 -0400

    Add documentation for metalink

diff --git a/docs/yum.conf.5 b/docs/yum.conf.5
index 183944b..40148a6 100644
--- a/docs/yum.conf.5
+++ b/docs/yum.conf.5
@@ -258,7 +258,9 @@ lower the value of this option. You can also change from the default of using
 seconds to using days, hours or minutes by appending a d, h or m respectively.
 The default is 1.5 hours, to compliment yum-updatesd running once an hour.
 It's also possible to use the word "never", meaning that the metadata will
-never expire.
+never expire. Note that when using a metalink file the metalink must always
+be newer than the metadata for the repository, due to the validation, so this
+timeout also applies to the metalink file.
 
 .IP \fBmirrorlist_expire \fR
 Time (in seconds) after which the mirrorlist locally cached will expire. 
@@ -343,6 +345,16 @@ you've been warned.
 You can use HTTP basic auth by prepending "user:password@" to the server
 name in the baseurl line.  For example: "baseurl=http://user:passwd@example.com/".
 
+.IP \fBmetalink\fR
+Specifies a URL to a metalink file for the repomd.xml, a list of mirrors for
+the entire repository are generated by converting the mirrors for the
+repomd.xml file to a baseurl. The metalink file also contains the latest
+timestamp from the data in the repomd.xml, the length of the repomd.xml and
+checksum data. This data is checked against any downloaded repomd.xml file
+and all of the information from the metalink file must match. This can be used
+instead of or with the \fBbaseurl\fR option. Substitution variables, described
+below, can be used with this option. This option disables the mirrorlist option.
+
 .IP \fBmirrorlist\fR
 Specifies a URL to a file containing a list of baseurls. This can be used
 instead of or with the \fBbaseurl\fR option. Substitution variables, described
commit bfc81bda00f4322d0fe81b35a07ed23b536a9d64
Author: James Antill <james at and.org>
Date:   Sat Oct 4 19:18:43 2008 -0400

     Undeprecate metadataCurrent(), and use it for metalink.xml and repomd.xml
       We can't have a newer repomd.xml than we have a metalink file, or it fails.
    
     Download/use metalink data if it's configured.
       It overrides mirrorlist, so you can use both for older versions of yum
       Check repomd.xml against metalink data, if we have it.
         Failover to next mirror, if it fails (like repomd.xml gpg signing).

diff --git a/yum/yumRepo.py b/yum/yumRepo.py
index e3e2a1f..e851d16 100644
--- a/yum/yumRepo.py
+++ b/yum/yumRepo.py
@@ -34,6 +34,7 @@ import sqlitesack
 from yum import config
 from yum import misc
 from constants import *
+import metalink
 
 import logging
 import logginglevels
@@ -240,6 +241,8 @@ class YumRepository(Repository, config.RepoConf):
         self.yumvar = {} # empty dict of yumvariables for $string replacement
         self._proxy_dict = {}
         self.metadata_cookie_fn = 'cachecookie'
+        self._metadataCurrent = None
+        self._metalink = None
         self.groups_added = False
         self.http_headers = {}
         self.repo_config_age = 0 # if we're a repo not from a file then the
@@ -354,7 +357,7 @@ class YumRepository(Repository, config.RepoConf):
                 'gpgcheck', 'repo_gpgcheck', # FIXME: gpgcheck => pkgs_gpgcheck
                 'includepkgs', 'keepalive', 'proxy',
                 'proxy_password', 'proxy_username', 'exclude',
-                'retries', 'throttle', 'timeout', 'mirrorlist',
+                'retries', 'throttle', 'timeout', 'mirrorlist', 'metalink',
                 'cachedir', 'gpgkey', 'pkgdir', 'hdrdir']
         vars.sort()
         for attr in vars:
@@ -516,6 +519,7 @@ class YumRepository(Repository, config.RepoConf):
         self._preload_md_from_system_cache('repomd.xml')
         self._preload_md_from_system_cache('cachecookie')
         self._preload_md_from_system_cache('mirrorlist.txt')
+        self._preload_md_from_system_cache('metalink.xml')
 
 
     def baseurlSetup(self):
@@ -527,12 +531,17 @@ class YumRepository(Repository, config.RepoConf):
         """go through the baseurls and mirrorlists and populate self.urls
            with valid ones, run  self.check() at the end to make sure it worked"""
 
+        self.baseurl = self._replace_and_check_url(self.baseurl)
+
         mirrorurls = []
+        if self.metalink and not self.mirrorlistparsed:
+            # FIXME: This is kind of lying to API callers
+            mirrorurls.extend(list(self.metalink_data.urls()))
+            self.mirrorlistparsed = True
         if self.mirrorlist and not self.mirrorlistparsed:
             mirrorurls.extend(self._getMirrorList())
             self.mirrorlistparsed = True
 
-        self.baseurl = self._replace_and_check_url(self.baseurl)
         self.mirrorurls = self._replace_and_check_url(mirrorurls)
         self._urls = self.baseurl + self.mirrorurls
         # if our mirrorlist is just screwed then make sure we unlink a mirrorlist cache
@@ -580,6 +589,53 @@ class YumRepository(Repository, config.RepoConf):
                     fset=lambda self, value: setattr(self, "_urls", value),
                     fdel=lambda self: setattr(self, "_urls", None))
 
+    def _getMetalink(self):
+        if not self._metalink:
+            self.metalink_filename = self.cachedir + '/' + 'metalink.xml'
+            local = self.metalink_filename + '.tmp'
+            if not self._metalinkCurrent():
+                url = misc.to_utf8(self.metalink)
+                try:
+                    ug = URLGrabber(bandwidth = self.bandwidth,
+                                    retry = self.retries,
+                                    throttle = self.throttle,
+                                    progress_obj = self.callback,
+                                    proxies=self.proxy_dict)
+                    ug.opts.user_agent = default_grabber.opts.user_agent
+                    result = ug.urlgrab(url, local, text=self.id + "/metalink")
+
+                except urlgrabber.grabber.URLGrabError, e:
+                    if not os.path.exists(self.metalink_filename):
+                        msg = ("Cannot retrieve metalink for repository: %s. "
+                               "Please verify its path and try again" % self )
+                        raise Errors.RepoError, msg
+                    #  Now, we have an old usable metalink, so we can't move to
+                    # a newer repomd.xml ... or checksums won't match.
+                    print "Could not get metalink %s error was \n%s" %(url, e)
+                    self._metadataCurrent = True
+
+            if not self._metadataCurrent:
+                try:
+                    self._metalink = metalink.MetaLinkRepoMD(result)
+                    shutil.move(result, self.metalink_filename)
+                except metalink.MetaLinkRepoErrorParseFail, e:
+                    # Downloaded file failed to parse, revert (dito. above):
+                    print "Could not parse metalink %s error was \n%s"%(url, e)
+                    self._metadataCurrent = True
+                    try:
+                        os.unlink(result)
+                    except:
+                        pass
+
+            if self._metadataCurrent:
+                self._metalink = metalink.MetaLinkRepoMD(self.metalink_filename)
+
+        return self._metalink
+
+    metalink_data = property(fget=lambda self: self._getMetalink(),
+                             fset=lambda self, value: setattr(self, "_metalink",
+                                                              value),
+                             fdel=lambda self: setattr(self, "_metalink", None))
 
     def _getFile(self, url=None, relative=None, local=None, start=None, end=None,
             copy_local=None, checkfunc=None, text=None, reget='simple', cache=True):
@@ -716,17 +772,35 @@ class YumRepository(Repository, config.RepoConf):
                         cache=cache,
                         )
 
-
-
     def metadataCurrent(self):
         """Check if there is a metadata_cookie and check its age. If the
         age of the cookie is less than metadata_expire time then return true
-        else return False"""
-        warnings.warn('metadataCurrent() will go away in a future version of Yum.\n \
-                       please use withinCacheAge() instead.',
-                Errors.YumFutureDeprecationWarning, stacklevel=2)
-
-        return self.withinCacheAge(self.metadata_cookie, self.metadata_expire)
+        else return False. This result is cached, so that metalink/repomd.xml
+        are synchronized."""
+        if self._metadataCurrent is None:
+            self._metadataCurrent = self.withinCacheAge(self.metadata_cookie,
+                                                        self.metadata_expire)
+        return self._metadataCurrent
+
+    #  The problem is that the metalink _cannot_ be newer than the repomd.xml
+    # or the checksums can be off.
+    #  Also see _getMetalink()
+    def _metalinkCurrent(self):
+        if self._metadataCurrent is not None:
+            return self._metadataCurrent
+
+        if self.cache and not os.path.exists(self.metalink_filename):
+            raise Errors.RepoError, 'Cannot find metalink.xml file for %s' %self
+
+        if self.cache:
+            self._metadataCurrent = True
+        elif not os.path.exists(self.metalink_filename):
+            self._metadataCurrent = False
+        elif self.withinCacheAge(self.metadata_cookie, self.metadata_expire):
+            self._metadataCurrent = True
+        else:
+            self._metadataCurrent = False
+        return self._metadataCurrent
 
     def withinCacheAge(self, myfile, expiration_time):
         """check if any file is older than a certain amount of time. Used for
@@ -787,8 +861,7 @@ class YumRepository(Repository, config.RepoConf):
         """ Should we cache the current repomd.xml """
         if self.cache and not os.path.exists(local):
             raise Errors.RepoError, 'Cannot find repomd.xml file for %s' % self
-        if self.cache or self.withinCacheAge(self.metadata_cookie,
-                                             self.metadata_expire):
+        if self.cache or self.metadataCurrent():
             return True
         return False
 
@@ -916,6 +989,58 @@ class YumRepository(Repository, config.RepoConf):
             return False
         return True
 
+    def _checkRepoMetalink(self, repoXML=None, metalink_data=None):
+        """ Check the repomd.xml against the metalink data, if we have it. """
+
+        def _chk_repomd(repomd):
+            verbose_logger.log(logginglevels.DEBUG_4, "checking repomd %d> %d",
+                               repoXML.timestamp, repomd.timestamp)
+            if repoXML.timestamp != repomd.timestamp:
+                return False
+            if repoXML.length != repomd.size:
+                return False
+
+            #  MirrorManager isn't generating sha256 yet, and we should probably
+            # not require all of the checksums we produce.
+            done = set()
+            for checksum in repoXML.checksums:
+                if checksum not in repomd.chksums:
+                    continue
+
+                if repoXML.checksums[checksum] != repomd.chksums[checksum]:
+                    return False
+                done.add(checksum)
+
+            #  Only allow approved checksums, might want to not "approve" of
+            # sha1/md5
+            for checksum in ('sha512', 'sha256', 'sha1', 'md5'):
+                if checksum in done:
+                    return True
+
+            return False
+
+        if repoXML is None:
+            repoXML = self._repoXML
+        if metalink_data is None:
+            metalink_data = self.metalink_data
+
+        if _chk_repomd(metalink_data.repomd):
+            return True
+
+        # FIXME: We probably want to skip to the first mirror which has the
+        # latest repomd.xml, but say "if we can't find one, use the newest old
+        # repomd.xml" ... alas. that's not so easy to do in urlgrabber atm.
+        for repomd in self.metalink_data.old_repomds:
+            if _chk_repomd(repomd):
+                verbose_logger.log(logginglevels.DEBUG_2,
+                                   "Using older repomd.xml\n"
+                                   "  Latest: %s\n"
+                                   "  Using: %s" %
+                                   (time.ctime(metalink_data.repomd.timestamp),
+                                    time.ctime(repomd.timestamp)))
+                return True
+        return False
+
     def _commonLoadRepoXML(self, text, mdtypes=None):
         """ Common LoadRepoXML for instant and group, returns False if you
             should just return. """
@@ -1157,10 +1282,14 @@ class YumRepository(Repository, config.RepoConf):
                 raise URLGrabError(-1, 'repomd.xml signature could not be verified for %s' % (self))
 
         try:
-            repoMDObject.RepoMD(self.id, filepath)
+            repoXML = repoMDObject.RepoMD(self.id, filepath)
         except Errors.RepoMDError, e:
             raise URLGrabError(-1, 'Error importing repomd.xml for %s: %s' % (self, e))
 
+        if self.metalink and not self._checkRepoMetalink(repoXML):
+            raise URLGrabError(-1, 'repomd.xml does not match metalink for %s' %
+                               self)
+
 
     def checkMD(self, fn, mdtype, openchecksum=False):
         """check the metadata type against its checksum"""
commit 52ce04b20d73be5e829a0f7c5efad0fc7ca93a07
Author: James Antill <james at and.org>
Date:   Sat Oct 4 19:15:40 2008 -0400

    Add metalink config. option to each repo.

diff --git a/yum/config.py b/yum/config.py
index b269308..471303a 100644
--- a/yum/config.py
+++ b/yum/config.py
@@ -677,6 +677,7 @@ class RepoConf(BaseConfig):
     enabled = Inherit(YumConf.enabled)
     baseurl = UrlListOption()
     mirrorlist = UrlOption()
+    metalink   = UrlOption()
     mediaid = Option()
     gpgkey = UrlListOption()
     exclude = ListOption() 
@@ -700,6 +701,8 @@ class RepoConf(BaseConfig):
     http_caching = Inherit(YumConf.http_caching)
     metadata_expire = Inherit(YumConf.metadata_expire)
     mirrorlist_expire = Inherit(YumConf.mirrorlist_expire)
+    # NOTE: metalink expire _must_ be the same as metadata_expire, due to the
+    #       checksumming of the repomd.xml.
     mdpolicy = Inherit(YumConf.mdpolicy)
     cost = IntOption(1000)
     
commit d547d43efac40d950a3470dce172decf09f0dbdf
Author: James Antill <james at and.org>
Date:   Sat Oct 4 19:15:15 2008 -0400

    Fix metalink exceptions, and make it easier to use for yumRepo

diff --git a/yum/metalink.py b/yum/metalink.py
index 9aa210a..5a57511 100755
--- a/yum/metalink.py
+++ b/yum/metalink.py
@@ -20,19 +20,19 @@
 # Parse the new MirrorManager metalink output:
 
 import sys
-print >>sys.stderr, "Warning: Relying on the API to be stable is not recommended, yet."
-
 import os
 import time
 from urlgrabber.progress import format_number
 
+import Errors
+
 try:
     from xml.etree import cElementTree
 except ImportError:
     import cElementTree
 xmlparse = cElementTree.parse
 
-class MetaLinkRepoErrorParseFail:
+class MetaLinkRepoErrorParseFail(Errors.YumBaseError):
     """ An exception thrown for an unparsable MetaLinkRepoMD file. """
     pass
 
@@ -177,6 +177,8 @@ class MetaLinkRepoMD:
         self.repomd = None
         self.old_repomds = []
         self.mirrors = []
+        if not os.path.exists(filename):
+            raise MetaLinkRepoErrorParseFail, "File %s does not exist" %filename
         root = xmlparse(filename)
 
         for elem in root.findall(__ML_FILE_ELEMENT__):
@@ -211,6 +213,23 @@ class MetaLinkRepoMD:
         if len(self.mirrors) < 1:
             raise MetaLinkRepoErrorParseFail, "No mirror"
 
+    def urls(self):
+        """ Iterate plain urls for the mirrors, like the old mirrorlist. """
+        for mirror in self.mirrors:
+            url = mirror.url
+
+            # This is what yum supports atm. ... no rsync etc.
+            if not (url.startswith("http:") or url.startswith("ftp:") or
+                    url.startswith("file:") or url.startswith("https:")):
+                continue
+
+            #  The mirror urls in the metalink file are for repomd.xml so it
+            # gives a list of mirrors for that one file, but we want the list
+            # of mirror baseurls. Joy of reusing other people's stds. :)
+            if not url.endswith("/repodata/repomd.xml"):
+                continue
+            yield url[:-len("/repodata/repomd.xml")]
+
     def __str__(self):
         ret = str(self.repomd)
         done = False
commit 3c217c2c10c8218fddb740f9854eeba93bc838b4
Author: James Antill <james at and.org>
Date:   Sat Oct 4 19:14:18 2008 -0400

    Cleanup metalink data

diff --git a/yum/__init__.py b/yum/__init__.py
index cb86717..6580daa 100644
--- a/yum/__init__.py
+++ b/yum/__init__.py
@@ -1342,7 +1342,8 @@ class YumBase(depsolve.Depsolve):
         return self._cleanFiles(exts, 'cachedir', 'sqlite')
 
     def cleanMetadata(self):
-        exts = ['xml.gz', 'xml', 'cachecookie', 'mirrorlist.txt']
+        exts = ['xml.gz', 'xml', 'cachecookie', 'mirrorlist.txt',
+                'metalink.xml']
         return self._cleanFiles(exts, 'cachedir', 'metadata') 
 
     def cleanExpireCache(self):
commit d2a77c2f5c2716904bb91aac317c0d0c9590ec02
Author: James Antill <james at and.org>
Date:   Sat Oct 4 15:13:48 2008 -0400

    Add length/checksums to the RepoMD object

diff --git a/yum/repoMDObject.py b/yum/repoMDObject.py
index d1d8cfb..0ec8a04 100755
--- a/yum/repoMDObject.py
+++ b/yum/repoMDObject.py
@@ -22,6 +22,7 @@ iterparse = cElementTree.iterparse
 from Errors import RepoMDError
 
 import sys
+from misc import AutoFileChecksums
 
 def ns_cleanup(qn):
     if qn.find('}') == -1: return qn 
@@ -72,6 +73,8 @@ class RepoMD:
         self.timestamp = 0
         self.repoid = repoid
         self.repoData = {}
+        self.checksums = {}
+        self.length    = 0
         
         if type(srcfile) == type('str'):
             # srcfile is a filename string
@@ -80,6 +83,8 @@ class RepoMD:
             # srcfile is a file object
             infile = srcfile
         
+        infile = AutoFileChecksums(infile, ['md5', 'sha1', 'sha256'],
+                                   ignore_missing=True)
         parser = iterparse(infile)
         
         try:
@@ -95,6 +100,9 @@ class RepoMD:
                             self.timestamp = nts
                     except:
                         pass
+
+            self.checksums = infile.checksums.hexdigests()
+            self.length    = len(infile.checksums)
         except SyntaxError, e:
             raise RepoMDError, "Damaged repomd.xml file"
             
@@ -112,6 +120,9 @@ class RepoMD:
         """dump fun output"""
 
         print "file timestamp: %s" % self.timestamp
+        print "file length   : %s" % self.length
+        for csum in sorted(self.checksums):
+            print "file checksum : %s/%s" % (csum, self.checksums[csum])
         for ft in sorted(self.fileTypes()):
             thisdata = self.repoData[ft]
             print '  datatype: %s' % thisdata.type
commit eff63495586ebef6d578e371fbf3a0a469220a86
Author: James Antill <james at and.org>
Date:   Sat Oct 4 15:12:55 2008 -0400

    Expand on checksum() to allow multiple checksums over a single file

diff --git a/yum/misc.py b/yum/misc.py
index c9ae758..801fc0f 100644
--- a/yum/misc.py
+++ b/yum/misc.py
@@ -172,6 +172,71 @@ def unique(s):
             u.append(x)
     return u
 
+class Checksums:
+    """ Generate checksum(s), on given pieces of data. Producing the
+        Length and the result(s) when complete. """
+
+    def __init__(self, checksums=None, ignore_missing=False):
+        self._checksums = checksums
+        if self._checksums is None:
+            self._checksums = ['sha256']
+        self._sumalgos = []
+        self._sumtypes = []
+        self._len = 0
+
+        done = set()
+        for sumtype in self._checksums:
+            if sumtype in done:
+                continue
+
+            if sumtype in _available_checksums:
+                sumalgo = hashlib.new(sumtype)
+            elif ignore_missing:
+                continue
+            else:
+                raise MiscError, 'Error Checksumming, bad checksum type %s' % sumtype
+            done.add(sumtype)
+            self._sumtypes.append(sumtype)
+            self._sumalgos.append(sumalgo)
+
+    def __len__(self):
+        return self._len
+
+    def update(self, data):
+        self._len += len(data)
+        for sumalgo in self._sumalgos:
+            sumalgo.update(data)
+
+    def read(self, fo, size=2**16):
+        data = fo.read(size)
+        self.update(data)
+        return data
+
+    def hexdigests(self):
+        ret = {}
+        for sumtype, sumdata in zip(self._sumtypes, self._sumalgos):
+            ret[sumtype] = sumdata.hexdigest()
+        return ret
+
+    def hexdigest(self, checksum):
+        return self.hexdigests()[checksum]
+
+
+class AutoFileChecksums:
+    """ Generate checksum(s), on given file/fileobject. Pretending to be a file
+        object (overrrides read). """
+
+    def __init__(self, fo, checksums, ignore_missing=False):
+        self._fo       = fo
+        self.checksums = Checksums(checksums, ignore_missing)
+
+    def __getattr__(self, attr):
+        return getattr(self._fo, attr)
+
+    def read(self, size=-1):
+        return self.checksums.read(self._fo, size)
+
+
 def checksum(sumtype, file, CHUNK=2**16):
     """takes filename, hand back Checksum of it
        sumtype = md5 or sha/sha1/sha256/sha512 (note sha == sha1)
@@ -187,20 +252,16 @@ def checksum(sumtype, file, CHUNK=2**16):
 
         if sumtype == 'sha':
             sumtype = 'sha1'
-        if sumtype in _available_checksums:
-            sumalgo = hashlib.new(sumtype)
-        else:
-            raise MiscError, 'Error Checksumming file, bad checksum type %s' % sumtype
-        chunk = fo.read
-        while chunk: 
-            chunk = fo.read(CHUNK)
-            sumalgo.update(chunk)
+
+        data = Checksums([sumtype])
+        while data.read(fo, CHUNK):
+            pass
 
         if type(file) is types.StringType:
             fo.close()
             del fo
             
-        return sumalgo.hexdigest()
+        return data.hexdigest(sumtype)
     except (IOError, OSError), e:
         raise MiscError, 'Error opening file for checksum: %s' % file
 


More information about the Yum-commits mailing list