#!/usr/bin/env python # Original thanks to David Fraser # Caolan McNamara import urllib2, cookielib, cgi import os, sys from HTMLParser import HTMLParser from httplib import HTTPConnection class cws: def __init__(self, modules, master): self.modules = modules self.master = master class EISScraper(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.state = -1; self.modules = {} self.master = '' def handle_starttag(self, tag, attrs): if tag == 'h3' and self.master == '': self.state = 0 elif tag == 'h4': self.state = 1 elif tag == 'tbody' and self.state == 2: self.state = 3 elif tag == 'tr' and self.state == 3: self.state = 4 elif tag == 'td' and self.state == 4: self.state = 5 def handle_data(self, data): if self.state == 0 and self.master == '': self.master = data.split('\n')[1].split('/')[0].strip() elif self.state == 1 and data.find('Modules') != -1: self.state = 2 elif self.state == 5: self.modules[data.strip()] = True self.state = 3 def handle_endtag(self, tag): if tag == 'h4' and self.state == 1: self.state = -1 elif tag == 'tr' and self.state == 4: self.state = 3 elif tag == 'td' and self.state == 5: self.state = 3 elif tag == 'table' and self.state == 3: self.state = -1 class EIS: def __init__(self, cookiefile="eis.lwp"): self.cookiefile = cookiefile self.cookiejar = cookielib.LWPCookieJar() if os.path.isfile(self.cookiefile): self.cookiejar.load(self.cookiefile) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar)) urllib2.install_opener(opener) self.login() self.cache = {} def login(self): urllib2.urlopen("http://eis.services.openoffice.org/EIS2/GuestLogon").read() self.cookiejar.save(self.cookiefile) def cacheurl(self, url): if url in self.cache: return self.cache[url] else: try: contents = urllib2.urlopen(url).read() except urllib2.HTTPError, e: if e.code == 401: self.login() contents = urllib2.urlopen(url).read() else: raise self.cache[url] = contents return contents def findcws(self, cws,): thiscwsid = None milestoneresults = self.cacheurl("http://eis.services.openoffice.org/EIS2/cws.SearchCWS?DATE_NULL_Integrated_After=&DATE_NULL_DueDateBefore=&INT_NULL_Priority=&Name=" + cws + "&SRC_Step=Search&INT_NULL_IsHelpRelevant=&RSV_NoWait=true&DATE_NULL_DueDateAfter=&TaskId=&DATE_NULL_Integrated_Before=&INT_NULL_IsUIRelevant=") for line in milestoneresults.replace("\r", "").split("\n"): # cws.ShowCWS?Path=SRC680%2Fm54%2Fdba15&Id=1431 startmark, endmark = "'cws.ShowCWS?", "'" if startmark in line: cwsargs = line[line.find(startmark) + len(startmark):] cwsargs = cwsargs[:cwsargs.find(endmark)] cwsargs = cgi.parse_qs(cwsargs) thiscwsid = int(cwsargs["Id"][0]) return thiscwsid def getcwsid(self, cwsname): somecwsid = self.findcws(cwsname) if somecwsid != None: return somecwsid raise ValueError("no id found for cws %s" % cwsname) def getcwsurl(self, cwsname): cwsid = self.getcwsid(cwsname) return self.cacheurl("http://tools.services.openoffice.org/EIS2/cws.ShowCWS?Id=%d&Section=All" % cwsid) def getcwsdetails(self, cwsname): cwsid = self.getcwsid(cwsname) conn = HTTPConnection("tools.services.openoffice.org") conn.request("GET", "/EIS2/cws.ShowCWS?Id=%d&Section=All" % cwsid) response = conn.getresponse() foo = EISScraper() foo.feed(response.read()) conn.close() foo.close() return cws(foo.modules.keys(), foo.master) if len(sys.argv) < 2: print 'Usage: oooextractcws childworkspace [cvsserver]' print 'e.g. oooextractcws jaxpapi ' + \ ':pserver:cmc@localhost:2401/shared/data/helm/cvs/repository' else: if len(sys.argv) > 2: cvsserver = sys.argv[2] else: cvsserver = ':pserver:anoncvs@anoncvs.services.openoffice.org:2401/cvs' childws = sys.argv[1] eis = EIS() info = eis.getcwsdetails(childws) masterws = info.master lowertag='cws' + '_' + masterws.lower() + '_' + childws uppertag=lowertag.upper() + '_ANCHOR' for module in info.modules: os.system('cvs -q -d' + cvsserver + ' rdiff -kk -u -r' + uppertag + \ ' -r' + lowertag + ' ' + module)