#!/bin/env python import sys, re, string seps = ['-', '_', '.'] debug = 0 #define the split between name and version #as where the first occurance of a #-, or _, or . is followed by a number def is_name_version_split_loc(tarball, i): return tarball[i] in seps and tarball[i+1].isdigit() def strip_trailing_sep(text): while len(text) and text[-1] in seps: text = text[:-1] return text def is_simple_pre_nonnumeric(nonnumeric): nonnumeric = nonnumeric.lower() if nonnumeric == 'rc': return True if nonnumeric == 'beta': return True if nonnumeric == 'alpha': return True if nonnumeric == 'dev': return True return False def is_pre_nonnumeric(nonnumeric): nonnumeric = nonnumeric.lower() if is_simple_pre_nonnumeric(nonnumeric): return True for pre in 'rc', 'alpha', 'beta', 'test', 'pre', 'd0', 'dev': if nonnumeric[0:len(pre)] == pre: try: num = int(nonnumeric[len(pre):]) return True except: pass return False def normalize_known_nonnumerics(nonnumeric): newnonnumeric = nonnumeric.lower() if newnonnumeric == 'alpha': return newnonnumeric if newnonnumeric == 'beta': return newnonnumeric if newnonnumeric == '+cvs' or newnonnumeric == 'cvs': return 'cvs' if newnonnumeric == '+svn' or newnonnumeric == 'svn': return 'svn' return nonnumeric def is_okver_nonnumeric(nonnumeric): nonnumerics=0 for a in nonnumeric: if not a.isdigit(): nonnumerics = nonnumerics + 1 if nonnumerics == 1: return True return False def is_post_nonnumeric(nonnumeric): nonnumeric = nonnumeric.lower() if nonnumeric == '+cvs': return True if nonnumeric == '+svn': return True return False def has_ambiguous_nonnumeric(chunks): ret = False for chunk in chunks: if chunk[1] == 0: if is_okver_nonnumeric(chunk[0]): continue if is_post_nonnumeric(chunk[0]): continue if is_pre_nonnumeric(chunk[0]): continue ret = True break return ret def easydateable(version): return len(version) == 0 or version == "0" or version == "0.0" def isrunawayprelease(version, date, nonnumeric): return (version == '0.1' or version == '0') and nonnumeric and nonnumeric.lower() == 'beta' and date class NVRException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class PrePostError(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) def is_pre_vr(vr): return (vr == '0' or vr == '0.0' or vr == '0.1') def has_date(chunks): ret = False for chunk in chunks: if chunk[1] == 2: ret = True break return ret def ishexdigit(char): return char in string.hexdigits #allow 1.1f but enforce 1.1.foo #don't allow d01/d02 def issimplehexstring(strng): if len(strng) > 2: return 0 return filter(ishexdigit, strng) == strng def first_nonsimple_index(chunks, ispost=-1): chunkcount = len(chunks) lastchunk = 0 for i in range(0, chunkcount): if chunks[i][1] == 2: break # if chunks[i][1] == 0 and (len(chunks[i][0]) > 1 or ispost != -1): if chunks[i][1] == 0 and ((not issimplehexstring(chunks[i][0])) or ispost == 0): break lastchunk = i+1 return lastchunk def fix_broken_ver(vr): if len(vr) == 0: vr = '0' elif len(vr) > 1 and vr[0] == '0': pos = vr.find('.') if pos == -1: vr = vr[0] + '.' + vr[1:] elif pos != 1: vr = vr[1:] return vr def generate_fedora_vr(chunks, ispost=-1): vr = '' lastchunk = first_nonsimple_index(chunks, ispost) if debug: print 'pre/post status is', ispost, 'lastchunk is', lastchunk for i in range(0, lastchunk): if len(vr) > 0: #allow 1.1f but enforce 1.1.foo if not (i > 0 and chunks[i][1] == 0 and (len(chunks[i][0]) == 1 or issimplehexstring(chunks[i][0])) and chunks[i-1][1] == 1): vr = vr + '.' vr = vr + chunks[i][0] vr = fix_broken_ver(vr) if debug: print 'base vr is', vr if debug: print 'lastchunk is', lastchunk if ispost == -1 and is_pre_vr(vr) and has_date(chunks): newchunks = [] for chunk in chunks: if chunk[1] == 0 and is_simple_pre_nonnumeric(chunk[0]): continue newchunks.append(chunk) chunks = newchunks lastchunk = first_nonsimple_index(chunks, ispost) vr = '0' for chunk in chunks: if chunk[1] == 0: if is_pre_nonnumeric(chunk[0]): ispost = 0 elif is_post_nonnumeric(chunk[0]): ispost = 1 if debug: print 'pre/post status is', ispost if has_ambiguous_nonnumeric(chunks) and ispost not in [1, 0]: raise PrePostError("Can't guess if pre/post due to nonnumeric text in" + str(chunks)) if has_date(chunks) and ispost not in [1, 0] and not is_pre_vr(vr): raise PrePostError("Can't guess if pre/post due to a date") if ispost == 0 and len(chunks) > lastchunk: vr = vr + '-' + '0.1' elif ispost == 1: vr = vr + '-' + '1' if debug: print 'vr is now', vr for i in range(lastchunk, len(chunks)): if i > lastchunk and chunks[i][1] == 2 and chunks[i-1][1] == 0: tempchunk = chunks[i] chunks[i] = chunks[i-1] chunks[i-1] = tempchunk for i in range(lastchunk, len(chunks)): if len(vr) > 0 and not (chunks[i-1][1] == 2 and chunks[i][1] == 0): vr = vr + '.' vr = vr + normalize_known_nonnumerics(chunks[i][0]) if debug: print 'and vr is now', vr if vr.find('-') == -1: vr = vr + '-1' if debug: print 'vr is finally', vr return vr # if has_ambiguous_nonnumeric(chunks) and ispost not in [1, 0]: # raise PrePostError("Can't guess if pre/post due to " + chunks) # if date and ispost not in [1, 0] and not (easydateable(version) or nonnumeric): # raise PrePostError("Can't guess if pre/post due to " + date) # if len(version) == 0: # version = "0" # # if (nonnumeric or date) and ispost not in [1, 0]: # if nonnumeric: # if isrunawayprelease(version, date, nonnumeric): # ispost = -1 # nonnumeric = None # version = "0" # elif is_pre_nonnumeric(nonnumeric): # ispost = 0 # elif is_post_nonnumeric(nonnumeric): # ispost = 1 # # if len(version) > 1 and version[0] == '0' and version[1].isdigit(): # version = version[0] + '.' + version[1:] # # if nonnumeric or date: # if ispost == 1: # version = version + '-' + '1.' # elif ispost == 0: # version = version + '-' + '0.1.' # # version = version.replace('%{X}', '1') # # if date: # if version[-1].isdigit(): # version = version + '.' + date # else: # version = version + date # # if nonnumeric: # if version[-1] != '.' and not date and not is_okver_nonnumeric(nonnumeric): # version = version + '.' # version = version + normalize_known_nonnumerics(nonnumeric) # # if version.find('-') == -1: # version = version + '-1' # # return version def determine_type(chunk, isnumber): number = int(isnumber) if len(chunk) == 8 and chunk.isdigit(): #make an effort to fix a reversed date if int(chunk[0:4]) < 1900 and int(chunk[4:8]) > 1900: chunk = chunk[4:8]+chunk[2:4]+chunk[0:2] number = 2 else: chunk = chunk.strip() return chunk, number def split_version_date_nonnumeric(version): date = None version = version.replace('-', '.').replace('_', '.') pattern = r'.*?(\d\d\d\d[-_\.]\d\d[-_\.]\d)[^\d]' p = re.compile(pattern) m = p.match(version) if m: date = m.group(1) version = version[:m.start(1)] + \ m.group(1)[0:-1] + '0' + m.group(1)[-1] + \ version[m.end(1):] pattern = r'.*?(\d\d\d\d[-_\.]?\d\d[-_\.]?\d\d)' p = re.compile(pattern) m = p.match(version) if m: date = m.group(1) for sep in seps: date = date.replace(sep, '') pre = version[:m.start(1)] if len(pre) > 1 and pre[-1] != '.': pre = pre + '.' post = version[m.end(1):] if len(post) > 1 and post[0] != '.': post = '.' + post version = pre + date + post chunks = [] chunk = '' isnumber = True for c in version: if chunk == '': isnumber = c.isdigit() if isnumber and c.isdigit(): chunk = chunk + c elif not isnumber and c != '.': chunk = chunk + c elif c == '.': chunks.append(determine_type(chunk, isnumber)) chunk = '' else: chunks.append(determine_type(chunk, isnumber)) chunk = c isnumber = c.isdigit() chunks.append(determine_type(chunk, isnumber)) if debug: print chunks return chunks def split_name_version(tarball, prefix): version = None name = None suffixes = [ '_source.tar.bz2', '_source.tar.gz', '_source.tar.zip', '_src.bz2', '_src.gz', '_src.zip', '.tar.bz2', '.tar.gz', '.tgz', '-pack.zip', '.zip', '-fx+tb+sm.xpi', '-fx+zm+tb.xpi', '-fx+tb.xpi', '-tb+fx+sm.xpi', '-fx.xpi', '.xpi', '.oxt' ] if tarball[:len(prefix)] == prefix: for suffix in suffixes: if tarball[-len(suffix):] == suffix: name = prefix for i in range(len(prefix),len(tarball)-len(suffix)): if is_name_version_split_loc(tarball, i): break name = name + tarball[i] version=tarball[len(name)+1:len(tarball)-len(suffix)] break return name, version def make_nvr(origname, prefix='', ispost=-1): if debug: print 'origname is', origname name, version = split_name_version(origname, prefix) if debug: print 'name', 'version', name, version if name == None or version == None: raise NVRException('Unrecognized format for ' + origname) chunks = split_version_date_nonnumeric(version) if debug: print 'chunks', chunks ret = generate_fedora_vr(chunks, ispost) vr = ret.split('-') if debug: print 'nvr', vr return name, vr[0], vr[1] def get_nvr(srcrpm, ispost=-1): suffix = '.src.rpm' if srcrpm[-len(suffix):] == suffix: suf = -len(suffix) rel = suf while srcrpm[rel] != '-': rel = rel - 1 release = srcrpm[rel+1:suf] if ispost == 1: postbit = release.find('.') release = '1' + release[postbit:] ver = rel-1 while srcrpm[ver] != '-': ver = ver - 1 version = srcrpm[ver+1:rel] name = srcrpm[0:ver] if debug: print 'making', name, version, release, 'from', srcrpm return name, version, release return None def scrub_disttag(release): knowntags =[ '.fc7', '.fc8', '.fc9', '.fc10', '.fc11', '.fc12', '.fc13', '.fc14', '.fc15', '.fc16', '.fc17' ] for tag in knowntags: release = release.replace(tag, '') return release def undo_prerelease(release): chunks = release.split('.') if chunks > 2 and chunks[0] == '0': try: anum = int(chunks[1]) except: pass release = chunks[0] + '.1' for chunk in chunks[2:]: release = release + '.' + chunk return release def rpmsplit(instr): chunks = [] chunk = '' for c in instr: if chunk == '': mode = c.isdigit() if not c.isalnum(): chunks.append(chunk) chunk = '' elif mode == c.isdigit(): chunk = chunk + c else: chunks.append(chunk) chunk = c mode = c.isdigit() chunks.append(chunk) return chunks def compare_key(a, b): abits = rpmsplit(a) bbits = rpmsplit(b) min = len(abits) if min > len(bbits): min = len(bbits) for i in range(0, min): aisnum = True bisnum = True try: anum = int(abits[i]) except: aisnum = False pass try: bnum = int(bbits[i]) except: bisnum = False pass if aisnum and bisnum: if anum > bnum: return 1 elif anum < bnum: return -1 elif not aisnum and not bisnum: if abits[i] > bbits[i]: return 1 elif bbits[i] > abits[i]: return -1 else: if aisnum > bisnum: return 1 elif aisnum < bisnum: return -1 if len(abits) > len(bbits): return 1 elif len(bbits) > len(abits): return -1 return 0 def compare_vr(aver, bver, arel, brel): ret = compare_key(aver, bver) if ret == 0: ret = compare_key(arel, brel) return ret if __name__ == "__main__": upstreamnames = [ 'hunspell-1.1.tar.gz', 'italiano_2_4_2007_09_01.zip', 'upper_sorbian_spelling_dictionary-0.0.20060327.2-fx+tb+sm.xpi', 'foo-1.1-beta1.zip', 'foo-2009.10.10-beta-1.oxt' ] #'th_ro_RO.3.3-test3.zip' for origname in upstreamnames: try: name, version, release = make_nvr(origname) print name, version, release except PrePostError: pass srcrpms = [ 'hunspell-it-2.4-0.99.20070901.fc8.src.rpm', ] for rpm in srcrpms: rpmname, rpmversion, rpmrelease = get_nvr(rpm) rpmrelease = scrub_disttag(rpmrelease) rpmrelease = undo_prerelease(rpmrelease) print rpmname, rpmversion, rpmrelease print compare_vr(rpmversion, version, rpmrelease, release)