diff -Nur --exclude=CVS --exclude '.#*' --exclude '*.pyc' createrepo.x/genpkgmetadata.py createrepo.y/genpkgmetadata.py --- createrepo.x/genpkgmetadata.py 2007-03-26 12:58:40.000000000 -0400 +++ createrepo.y/genpkgmetadata.py 2007-03-26 13:06:33.000000000 -0400 @@ -30,6 +30,7 @@ import shutil import dumpMetadata +import readMetadata from dumpMetadata import _gzipOpen __version__ = '0.4.8' @@ -61,6 +62,7 @@ -h, --help = show this help -V, --version = output version -p, --pretty = output xml files in pretty format. + --update = update existing metadata (if present) -d, --database = generate the sqlite databases. """) @@ -122,6 +124,18 @@ """all the heavy lifting for the package metadata""" # rpms we're going to be dealing with + if self.cmds['update']: + #build the paths + basefile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile']) + flfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['filelistsfile']) + otherfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['otherfile']) + opts = { + 'verbose' : self.cmds['verbose'], + 'pkgdir' : os.path.normpath(os.path.join(self.cmds['basedir'], directory)) + } + #and scan the old repo + self.oldData = readMetadata.MetadataIndex(self.cmds['outputdir'], + basefile, flfile, otherfile, opts) files = self.getFileList(self.cmds['basedir'], directory, '.rpm') files = self.trimRpms(files) self.pkgcount = len(files) @@ -172,61 +186,76 @@ self.otherfile.write('\n' % self.pkgcount) + def _getNodes(self, file, directory, current): + basenode = None + filesnode = None + othernode = None + try: + rpmdir= os.path.join(self.cmds['basedir'], directory) + mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, file, self.cmds) + except dumpMetadata.MDError, e: + errorprint('\n%s - %s' % (e, file)) + return None + try: + basenode = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype']) + except dumpMetadata.MDError, e: + errorprint(_('\nAn error occurred creating primary metadata: %s') % e) + return None + try: + filesnode = dumpMetadata.fileListXML(self.filesdoc, self.filesroot, mdobj) + except dumpMetadata.MDError, e: + errorprint(_('\nAn error occurred creating filelists: %s') % e) + return None + try: + othernode = dumpMetadata.otherXML(self.otherdoc, self.otherroot, mdobj) + except dumpMetadata.MDError, e: + errorprint(_('\nAn error occurred: %s') % e) + return None + return basenode,filesnode,othernode + def writeMetadataDocs(self, files, directory, current=0): for file in files: current+=1 - try: - rpmdir= os.path.join(self.cmds['basedir'], directory) - mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, file, self.cmds) - if not self.cmds['quiet']: - if self.cmds['verbose']: - print '%d/%d - %s' % (current, len(files), file) - else: - sys.stdout.write('\r' + ' ' * 80) - sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file)) - sys.stdout.flush() - except dumpMetadata.MDError, e: - errorprint('\n%s - %s' % (e, file)) - continue - else: - try: - node = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype']) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred creating primary metadata: %s') % e) - continue + recycled = False + sep = '-' + if self.cmds['update']: + #see if we can pull the nodes from the old repo + nodes = self.oldData.getNodes(file) + if nodes is not None: + recycled = True + sep = '*' + if not recycled: + #scan rpm files + nodes = self._getNodes(file, directory, current) + if nodes is None: + return + basenode, filenode, othernode = nodes + del nodes + if not self.cmds['quiet']: + if self.cmds['verbose']: + print '%d/%d %s %s' % (current, self.pkgcount, sep, file) else: - output = node.serialize('UTF-8', self.cmds['pretty']) - self.basefile.write(output) - self.basefile.write('\n') - node.unlinkNode() - node.freeNode() - del node + sys.stdout.write('\r' + ' ' * 80) + sys.stdout.write("\r%d/%d %s %s" % (current, self.pkgcount, sep, file)) + sys.stdout.flush() + if basenode is None: + continue - try: - node = dumpMetadata.fileListXML(self.filesdoc, self.filesroot, mdobj) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred creating filelists: %s') % e) - continue - else: - output = node.serialize('UTF-8', self.cmds['pretty']) - self.flfile.write(output) - self.flfile.write('\n') + for node, outfile in ((basenode,self.basefile), + (filenode,self.flfile), + (othernode,self.otherfile)): + if node is None: + break + output = node.serialize('UTF-8', self.cmds['pretty']) + outfile.write(output) + outfile.write('\n') + if not recycled: + #recycled nodes can be multiply referenced node.unlinkNode() node.freeNode() - del node + if recycled: + self.oldData.freeNodes(file) - try: - node = dumpMetadata.otherXML(self.otherdoc, self.otherroot, mdobj) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred: %s') % e) - continue - else: - output = node.serialize('UTF-8', self.cmds['pretty']) - self.otherfile.write(output) - self.otherfile.write('\n') - node.unlinkNode() - node.freeNode() - del node return current @@ -377,6 +406,7 @@ cmds['checkts'] = False cmds['mdtimestamp'] = 0 cmds['split'] = False + cmds['update'] = False cmds['outputdir'] = "" cmds['database'] = False cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] @@ -387,7 +417,7 @@ 'quiet', 'verbose', 'cachedir=', 'basedir=', 'baseurl=', 'groupfile=', 'checksum=', 'version', 'pretty', 'split', 'outputdir=', - 'noepoch', 'checkts', 'database']) + 'noepoch', 'checkts', 'database', 'update']) except getopt.error, e: errorprint(_('Options Error: %s.') % e) usage() @@ -445,6 +475,8 @@ elif arg in ['-c', '--cachedir']: cmds['cache'] = True cmds['cachedir'] = a + elif arg == '--update': + cmds['update'] = True elif arg in ['-C', '--checkts']: cmds['checkts'] = True elif arg == '--basedir': diff -Nur --exclude=CVS --exclude '.#*' --exclude '*.pyc' createrepo.x/Makefile createrepo.y/Makefile --- createrepo.x/Makefile 2007-03-26 12:58:40.000000000 -0400 +++ createrepo.y/Makefile 2007-03-26 13:05:15.000000000 -0400 @@ -41,6 +41,7 @@ MODULES = $(srcdir)/genpkgmetadata.py \ $(srcdir)/dumpMetadata.py \ + $(srcdir)/readMetadata.py \ $(srcdir)/modifyrepo.py .SUFFIXES: .py .pyc diff -Nur --exclude=CVS --exclude '.#*' --exclude '*.pyc' createrepo.x/readMetadata.py createrepo.y/readMetadata.py --- createrepo.x/readMetadata.py 1969-12-31 19:00:00.000000000 -0500 +++ createrepo.y/readMetadata.py 2006-09-21 15:12:37.000000000 -0400 @@ -0,0 +1,199 @@ +#!/usr/bin/python -t + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2006 Red Hat + +import os +import sys +import libxml2 +import pprint +import stat + +def errorprint(stuff): + print >> sys.stderr, stuff + +def _(args): + """Stub function for translation""" + return args + +class MetadataIndex(object): + + def __init__(self, outputdir, basefile, filelistfile, otherfile, opts=None): + if opts is None: + opts = {} + self.opts = opts + self.outputdir = outputdir + self.files = {'base' : basefile, + 'filelist' : filelistfile, + 'other' : otherfile} + self.scan() + + def scan(self): + """Read in and index old repo data""" + self.basenodes = {} + self.filesnodes = {} + self.othernodes = {} + self.pkg_ids = {} + if self.opts.get('verbose'): + print _("Scanning old repo data") + for file in self.files.values(): + if not os.path.exists(file): + #cannot scan + errorprint(_("Previous repo file missing: %s") % file) + return + root = libxml2.parseFile(self.files['base']).getRootElement() + self._scanPackageNodes(root, self._handleBase) + if self.opts.get('verbose'): + print _("Indexed %i base nodes" % len(self.basenodes)) + root = libxml2.parseFile(self.files['filelist']).getRootElement() + self._scanPackageNodes(root, self._handleFiles) + if self.opts.get('verbose'): + print _("Indexed %i filelist nodes" % len(self.filesnodes)) + root = libxml2.parseFile(self.files['other']).getRootElement() + self._scanPackageNodes(root, self._handleOther) + if self.opts.get('verbose'): + print _("Indexed %i other nodes" % len(self.othernodes)) + #reverse index pkg ids to track references + self.pkgrefs = {} + for relpath, pkgid in self.pkg_ids.iteritems(): + self.pkgrefs.setdefault(pkgid,[]).append(relpath) + + def _scanPackageNodes(self, root, handler): + node = root.children + while node is not None: + if node.type != "element": + node = node.next + continue + if node.name == "package": + handler(node) + node = node.next + + def _handleBase(self, node): + top = node + node = node.children + pkgid = None + mtime = None + size = None + relpath = None + while node is not None: + if node.type != "element": + node = node.next + continue + if node.name == "checksum": + pkgid = node.content + elif node.name == "time": + mtime = int(node.prop('file')) + elif node.name == "size": + size = int(node.prop('package')) + elif node.name == "location": + relpath = node.prop('href') + node = node.next + if relpath is None: + print _("Incomplete data for node") + return + if pkgid is None: + print _("pkgid missing for %s") % relpath + return + if mtime is None: + print _("mtime missing for %s") % relpath + return + if size is None: + print _("size missing for %s") % relpath + return + filepath = os.path.join(self.opts['pkgdir'], relpath) + try: + st = os.stat(filepath) + except OSError: + #file missing -- ignore + return + if not stat.S_ISREG(st.st_mode): + #ignore non files + return + #check size and mtime + if st.st_size != size: + if self.opts.get('verbose'): + print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) + return + if st.st_mtime != mtime: + if self.opts.get('verbose'): + print _("Modification time changed for %s") % filepath + return + #otherwise we index + self.basenodes[relpath] = top + self.pkg_ids[relpath] = pkgid + + def _handleFiles(self, node): + pkgid = node.prop('pkgid') + if pkgid: + self.filesnodes[pkgid] = node + + def _handleOther(self, node): + pkgid = node.prop('pkgid') + if pkgid: + self.othernodes[pkgid] = node + + def getNodes(self, relpath): + """Return base, filelist, and other nodes for file, if they exist + + Returns a tuple of nodes, or None if not found + """ + bnode = self.basenodes.get(relpath,None) + if bnode is None: + return None + pkgid = self.pkg_ids.get(relpath,None) + if pkgid is None: + print _("No pkgid found for: %s") % relpath + return None + fnode = self.filesnodes.get(pkgid,None) + if fnode is None: + return None + onode = self.othernodes.get(pkgid,None) + if onode is None: + return None + return bnode, fnode, onode + + def freeNodes(self,relpath): + #causing problems + """Free up nodes corresponding to file, if possible""" + bnode = self.basenodes.get(relpath,None) + if bnode is None: + print "Missing node for %s" % relpath + return + bnode.unlinkNode() + bnode.freeNode() + del self.basenodes[relpath] + pkgid = self.pkg_ids.get(relpath,None) + if pkgid is None: + print _("No pkgid found for: %s") % relpath + return None + del self.pkg_ids[relpath] + dups = self.pkgrefs.get(pkgid) + dups.remove(relpath) + if len(dups): + #still referenced + return + del self.pkgrefs[pkgid] + for nodes in self.filesnodes, self.othernodes: + node = nodes.get(pkgid) + if node is not None: + node.unlinkNode() + node.freeNode() + del nodes[pkgid] + + +if __name__ == "__main__": + #test code - attempts to read a repo in working directory + idx = MetadataIndex(".", "repodata/primary.xml.gz", "repodata/filelists.xml.gz", + "repodata/other.xml.gz", {'verbose':1})