From valentina at osuosl.org Fri Mar 24 14:32:06 2017 From: valentina at osuosl.org (valentina at osuosl.org) Date: Fri, 24 Mar 2017 14:32:06 +0000 (UTC) Subject: [Rpm-metadata] 8 commits - createrepo/__init__.py docs/modifyrepo.1 modifyrepo.py Message-ID: <20170324143206.5E25E2347CB@yum.osuosl.org> createrepo/__init__.py | 79 ++++++++++++++++++++++++++++--------------------- docs/modifyrepo.1 | 2 - modifyrepo.py | 3 + 3 files changed, 50 insertions(+), 34 deletions(-) New commits: commit 22e266964b977f796c5096f9658f00b3bfee5454 Merge: 6b76c6d 9dd33d9 Author: Valentina Mukhamedzhanova Date: Fri Mar 24 15:30:30 2017 +0100 Merge pull request #5 from dmnks/bz1287714 modifyrepo: docs: remove compat compress type. BZ 1287714 commit 9dd33d94185b9f78de865af227ec682f81ecab21 Author: Michal Domonkos Date: Fri Mar 24 13:35:16 2017 +0100 modifyrepo: docs: remove compat compress type. BZ 1287714 We never supported "compat" here as it only makes sense in the createrepo domain (it means use a different method for the xml files (gz) and for the sqlite files (bzip2) -- we don't generate sqlite files in modifyrepo). diff --git a/docs/modifyrepo.1 b/docs/modifyrepo.1 index ae2cea5..abd5357 100644 --- a/docs/modifyrepo.1 +++ b/docs/modifyrepo.1 @@ -24,7 +24,7 @@ Compress the new repodata before adding it to the repo. This is used by default. Do not compress the new repodata before adding it to the repo. .IP "\fB\-\-compress-type \fP" -Specify which compression type to use: compat (default), xz (may not be available), gz, bz2. +Specify which compression type to use: gz, xz (may not be available), bz2. .IP "\fB\-s, \-\-checksum \fP" Specify the checksum type to use. commit 6b76c6dba6cf87771fa537aca226698cee275620 Merge: 51c00ac c626c54 Author: Valentina Mukhamedzhanova Date: Thu Mar 23 17:17:37 2017 +0100 Merge pull request #4 from dmnks/bz1287685 modifyrepo: handle empty file with LZMA. BZ 1287685 commit c626c54074b36edb090254fe6f4985bc20893436 Author: Michal Domonkos Date: Thu Mar 23 16:05:16 2017 +0100 modifyrepo: handle empty file with LZMA. BZ 1287685 When trying to compress an empty string with LZMA, we will get the unfriendly "LZMA.error: unknown error!". Let's handle this case ourselves and raise a more user-friendly error instead. diff --git a/modifyrepo.py b/modifyrepo.py index 34b0902..ade5607 100755 --- a/modifyrepo.py +++ b/modifyrepo.py @@ -125,6 +125,9 @@ class RepoMetadata: else: raise MDError, 'invalid metadata type' + if not md and self.compress_type == 'xz': + raise MDError, 'LZMA does not support compressing empty files' + ## Compress the metadata and move it into the repodata mdtype = self._get_mdtype(mdname, mdtype) destmd = os.path.join(self.repodir, mdname) commit 51c00ac3fcc725698e0e43fdd95ee1466b786584 Merge: 659d74b 2f83221 Author: Valentina Mukhamedzhanova Date: Thu Mar 9 16:56:27 2017 +0100 Merge pull request #2 from dmnks/bz1125437 createrepo: allow xz and bz2 for xml files. BZ 1125437 commit 659d74b825c18d0d78509e192c9554671eda83bf Merge: 364dfa5 19711d9 Author: Valentina Mukhamedzhanova Date: Wed Feb 22 12:32:26 2017 +0100 Merge pull request #3 from dmnks/bz1406418-fixup Add safety check for oldData attribute commit 19711d9fd87e39a8bb8ca2c4caf99ec7aaf192d1 Author: Michal Domonkos Date: Mon Feb 20 14:49:17 2017 +0100 Add safety check for oldData attribute We may not have it set up yet at the point of calling the cleanup method. diff --git a/createrepo/__init__.py b/createrepo/__init__.py index 9e89afc..8f021e0 100644 --- a/createrepo/__init__.py +++ b/createrepo/__init__.py @@ -1484,7 +1484,7 @@ class MetaDataGenerator: def _cleanup_update_tmp_dir(self): - if self.conf.update: + if self.conf.update and hasattr(self, 'oldData'): self.oldData.cleanup() commit 2f832212a0ba5fc8d2ac448b0053ae34b134352d Author: Michal Domonkos Date: Fri Nov 25 18:39:35 2016 +0100 createrepo: allow xz and bz2 for xml files. BZ 1125437 Currently, we don't honor --compress-type for the primary/filelists/other.xml files and always force gz because libxml2 (used by yum-metadata-parser) didn't use to support anything other than gz. This has been worked around in yum since then by decompressing the files first before passing them to y-m-p (commit cfe43e8). If we do the same in createrepo (which uses y-m-p to generate the sqlite files), we can enable these additional compress types for primary/filelists/other.xml -- and that's what this commit does. Note that libxml2 also natively supports xz in addition to gz so we only need to do the decompression for bz2. diff --git a/createrepo/__init__.py b/createrepo/__init__.py index 9e89afc..6d56ff8 100644 --- a/createrepo/__init__.py +++ b/createrepo/__init__.py @@ -447,11 +447,10 @@ class MetaDataGenerator: def _setupPrimary(self): # setup the primary metadata file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.primaryfile + '.' + 'gz' + fpz = self.conf.primaryfile + '.' + self.conf.compress_type primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(primaryfilepath, 'w', 'gz') + fo = compressOpen(primaryfilepath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % @@ -460,11 +459,10 @@ class MetaDataGenerator: def _setupFilelists(self): # setup the filelist file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.filelistsfile + '.' + 'gz' + fpz = self.conf.filelistsfile + '.' + self.conf.compress_type filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(filelistpath, 'w', 'gz') + fo = compressOpen(filelistpath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % self.pkgcount) @@ -472,11 +470,10 @@ class MetaDataGenerator: def _setupOther(self): # setup the other file - # FIXME - make this be conf.compress_type once y-m-p is fixed - fpz = self.conf.otherfile + '.' + 'gz' + fpz = self.conf.otherfile + '.' + self.conf.compress_type otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, fpz) - fo = compressOpen(otherfilepath, 'w', 'gz') + fo = compressOpen(otherfilepath, 'w', self.conf.compress_type) fo.write('\n') fo.write('' % @@ -1217,21 +1214,34 @@ class MetaDataGenerator: rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) for (rpm_file, ftype) in workfiles: - # when we fix y-m-p and non-gzipped xml files - then we can make this just add - # self.conf.compress_type - if ftype in ('other', 'filelists', 'primary'): - rpm_file = rpm_file + '.' + 'gz' - elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: + unpath = os.path.join(repopath, rpm_file) + if (ftype in ('other', 'filelists', 'primary') + or (rpm_file.find('.') != -1 and rpm_file.split('.')[-1] + not in _available_compression)): rpm_file = rpm_file + '.' + self.conf.compress_type complete_path = os.path.join(repopath, rpm_file) zfo = compressOpen(complete_path) + dfo = None + if (self.conf.compress_type == 'bz2' and self.conf.database and + ftype in ('other', 'filelists', 'primary')): + # yum-metadata-parser doesn't understand bz2 so let's write the + # decompressed data to a file and pass that via gen_func + # instead of the compressed version + dfo = open(unpath, 'w') # This is misc.checksum() done locally so we can get the size too. data = misc.Checksums([sumtype]) - while data.read(zfo, 2**16): - pass + while True: + chunk = data.read(zfo, 2**16) + if not chunk: + break + if dfo is not None: + dfo.write(chunk) uncsum = data.hexdigest(sumtype) unsize = len(data) zfo.close() + if dfo is not None: + dfo.close() + csum = misc.checksum(sumtype, complete_path) timestamp = os.stat(complete_path)[8] @@ -1244,21 +1254,29 @@ class MetaDataGenerator: self.callback.log("Starting %s db creation: %s" % (ftype, time.ctime())) + gen_func = None if ftype == 'primary': - #FIXME - in theory some sort of try/except here - # TypeError appears to be raised, sometimes :( - rp.getPrimary(complete_path, csum) - + gen_func = rp.getPrimary elif ftype == 'filelists': - #FIXME and here - rp.getFilelists(complete_path, csum) - + gen_func = rp.getFilelists elif ftype == 'other': - #FIXME and here - rp.getOtherdata(complete_path, csum) + gen_func = rp.getOtherdata + if gen_func is not None: + if dfo is None: + #FIXME - in theory some sort of try/except here + # TypeError appears to be raised, sometimes :( + gen_func(complete_path, csum) + else: + #FIXME and here + gen_func(unpath, uncsum) + os.unlink(unpath) if ftype in ['primary', 'filelists', 'other']: - tmp_result_name = '%s.xml.gz.sqlite' % ftype + if dfo is None: + compress_ext = '.%s' % self.conf.compress_type + else: + compress_ext = '' + tmp_result_name = '%s.xml%s.sqlite' % (ftype, compress_ext) tmp_result_path = os.path.join(repopath, tmp_result_name) good_name = '%s.sqlite' % ftype resultpath = os.path.join(repopath, good_name) @@ -1323,13 +1341,8 @@ class MetaDataGenerator: data.openchecksum = (sumtype, uncsum) if self.conf.unique_md_filenames: - if ftype in ('primary', 'filelists', 'other'): - compress = 'gz' - else: - compress = self.conf.compress_type - main_name = '.'.join(rpm_file.split('.')[:-1]) - res_file = '%s-%s.%s' % (csum, main_name, compress) + res_file = '%s-%s.%s' % (csum, main_name, self.conf.compress_type) orig_file = os.path.join(repopath, rpm_file) dest_file = os.path.join(repopath, res_file) os.rename(orig_file, dest_file)