#!/bin/env python # portage api from portage.dbapi.vartree import vartree from portage.dbapi.porttree import portdbapi from portage import config from portage.versions import catpkgsplit from portage import dblink # xml import bz2 # http import httplib, mimetypes import sys import os import io from tempfile import mkstemp import ConfigParser from time import time # proxy import re import commands VERSION = '2.1' DEBUG = os.path.exists('./debugpfl') HOME = os.getenv('HOME') if os.getuid() == 0: INFOFILE = '/var/lib/pfl/pfl.info' else: INFOFILE = '%s/.pfl.info' % HOME; UUIDFILE = '/proc/sys/kernel/random/uuid' if DEBUG: import portage print 'Portage Version: ', portage.VERSION """ Simulate the DOM model for XML to save memory; requires entities to be created in the final XML order """ class fakedom: """ bytearray of the final xml document """ bytes = bytearray(b'\n') """ names of open tags, for closing tag purposes """ nested_elements = [] """ holding place for staged opening tags """ staging_elements = [] debug = 0 """ internal flag to signify if elements attributes were written but not finalized """ in_attrs = 0 xml_replace = [ ('&', '&'), ('<', '<'), ('>', '>') , ('"', '"') ] def __init__(self): pass def __init__(self, debug): self.debug = debug """ escape characters in xml values """ def xml_escape(self, x): for (k, v) in self.xml_replace: x = x.replace(k, v) return x """ Indent one level """ def indent(self): if self.in_attrs: self.bytes += ('>\n' if self.debug else '>') self.in_attrs = 0 self.nested_elements.append(None) self.staging_elements.append(None) """ Revert one indent """ def outdent(self): if len(self.nested_elements): #not empty, close it if not isinstance(self.staging_elements[-1], bytearray) and self.nested_elements[-1] is not None: if self.debug: self.bytes += ('>\n' if self.in_attrs else '') + ('\t' * (len(self.nested_elements) - 1)) + '\n' else: self.bytes += ('>' if self.in_attrs else '') + '' self.in_attrs = 0 self.nested_elements = self.nested_elements[:-1] self.staging_elements = self.staging_elements[:-1] """ Create new element in current level, closing any other element in this level """ def newelem(self, elem): self.outdent() if self.debug: self.staging_elements.append(bytearray(('\t' * len(self.nested_elements)) + '<' + elem.encode('utf8'))) else: self.staging_elements.append(bytearray('<' + elem.encode('utf8'))) self.nested_elements.append(elem.encode('utf8')) """ Close all elements gracefully and with proper indentation """ def close_xml(self): while len(self.nested_elements): #not empty, close it if not isinstance(self.staging_elements[-1], bytearray) and self.nested_elements[-1] is not None: if self.debug: self.bytes += ('>\n' if self.in_attrs else '') + ('\t' * (len(self.nested_elements) - 1)) + '\n' else: self.bytes += ('>' if self.in_attrs else '') + '' self.in_attrs = 0 self.nested_elements = self.nested_elements[:-1] self.staging_elements = self.staging_elements[:-1] """ helper function: commit all but last staged elements that had children while purging unnecessary elements """ def commit_prestaging(self): for i in range(len(self.staging_elements[:-1])): if not isinstance(self.staging_elements[i], bytearray): continue self.bytes += self.staging_elements[i] + ('>\n' if self.debug else '>') self.staging_elements[i] = 0 """ new element attribute; must not be called after an element opening tag is fully committed to the bytearray """ def newattr(self, name, value): self.commit_prestaging() if len(self.staging_elements) == 0 or (not self.in_attrs and not isinstance(self.staging_elements[-1], bytearray)): raise Exception("newattr not called immediately after newelem or other newattr") self.in_attrs = 1 if isinstance(self.staging_elements[-1], bytearray): self.bytes += self.staging_elements[-1] self.staging_elements[-1] = 0 self.bytes += ' ' + name.encode('utf8') + '="' + self.xml_escape(value.encode('utf8')) + '"' """ new standalone value within an element's opening and closing tags """ def newvalue(self, value): self.commit_prestaging() if isinstance(self.staging_elements[-1], bytearray): self.bytes += self.staging_elements[-1] + ('>\n' if self.debug else '>') self.staging_elements[-1] = 0 elif self.in_attrs: self.bytes += ('>\n' if self.debug else '>') self.in_attrs = 0 self.bytes += (('\t' * (len(self.staging_elements)+0)) if self.debug else '') + self.xml_escape(value.encode('utf8')) + ('\n' if self.debug else '') class PortageMangle(object): _xml = None _xml_category = None _xml_package = None _xml_files = None _xml_uses = None _settings = None _portdbapi = None _vardbapi = None def __init__(self): # main portage config object, lots of interresting stuff in there self._settings = config() self._settings['PORTDIR_OVERLAY'] = '' # does not matter for me self._xml = fakedom(DEBUG) self._xml.newelem('pfl') self._xml.newattr('xmlns', "http://www.portagefilelist.de/xsd/collect") self._portdbapi = portdbapi(mysettings=self._settings) self._vardbapi = vartree(root = self._settings['ROOT'], settings = self._settings).dbapi def get_wellknown_cpvs(self, since): # category, package, version of all installed packages cpvs = self._vardbapi.cpv_all() # search for pkgs from known repositories wellknown = {} wellknown_count = 0 for cpv in cpvs: c, p, v, r = catpkgsplit(cpv) if r <> 'r0': v = '%s-%s' % (v, r) repo, = self._vardbapi.aux_get(cpv, ['repository']) # timestamp of merge mergedstamp = self._vardbapi.aux_get(cpv, ['_mtime_'])[0] if repo == 'gentoo' and mergedstamp >= since: wellknown.setdefault(c, {}).setdefault(p, []).append(v) wellknown_count = wellknown_count + 1 return [wellknown_count, wellknown] def get_contents(self, c, p, v): dbl = dblink(c, '%s-%s' % (p, v), self._settings['ROOT'], self._settings) return dbl.getcontents() def collect_into_xml(self, since, uuid): count, cpvs = self.get_wellknown_cpvs(since); # nothing to do if count == 0: return None workingon = 0 self._xml.indent() for c in cpvs: if DEBUG and c <> 'sys-kernel': continue self._xml.newelem("category") self._xml.newattr("name", c) self._xml.indent() for p in cpvs[c]: if DEBUG and p <> 'gentoo-sources': continue for v in cpvs[c][p]: workingon = workingon + 1 print 'working on (%d of %d) %s/%s-%s' % (workingon, count, c, p, v) self._xml.newelem('package') contents = self.get_contents(c, p, v) # no files -> this package does not matter if len(contents) == 0: continue mergedstamp = self._vardbapi.aux_get('%s/%s-%s' % (c, p, v), ['_mtime_'])[0] use = self._vardbapi.aux_get('%s/%s-%s' % (c, p, v), ['USE'])[0].split() iuse = self._vardbapi.aux_get('%s/%s-%s' % (c, p, v), ['IUSE'])[0].split() keywords = self._vardbapi.aux_get('%s/%s-%s' % (c, p, v), ['KEYWORDS'])[0].split() us = [] for u in use: if u in iuse: us.append(u) if u in keywords or '~' + u in keywords: self._xml.newattr('arch', u) self._xml.newattr('name', p) self._xml.newattr('timestamp', str(mergedstamp)) self._xml.newattr('version', v) self._xml.indent() self._xml.newelem("files") self._xml.indent() for f in contents: self._xml.newelem("file") self._xml.newattr("type", contents[f][0]) self._xml.newvalue(f) self._xml.outdent() self._xml.newelem("uses") self._xml.indent() for u in us: self._xml.newelem("use") self._xml.newvalue(u) self._xml.outdent() self._xml.outdent() self._xml.outdent() self._xml.close_xml() if DEBUG: xmlfile = './%s.pfl.xml' % uuid else: xmlfile = '/tmp/%s.pfl.xml.bz2' % uuid print 'writing xml file %s ...' % xmlfile xmlf = open(xmlfile, 'w') if DEBUG: xmlf.write(self._xml.bytes) else: xmlf.write(bz2.compress(self._xml.bytes)) xmlf.close() return xmlfile class PFLConfigParser(ConfigParser.RawConfigParser): def get(self, section, option, default = None): if not self.has_option(section, option): if default <> None: return default else: raise ConfigParser.NoOptionError(option, section) else: return ConfigParser.RawConfigParser.get(self, section, option) def set (self, section, option, value): if (not self.has_section(section)): self.add_section(section) ConfigParser.RawConfigParser.set(self, section, option, value) """ mainly ba dma147 """ class HTTP(object): def post_multipart(self, host, port, selector, fields, files): chost = host+":"+port content_type, body = self.encode_multipart_formdata(fields, files) PROXY = "" if os.environ.has_key('http_proxy'): PROXY = os.environ['http_proxy'].strip('/') if PROXY != "": HTTP = re.findall(r'\b(http)', PROXY) if not HTTP or len(HTTP) == 0: sys.stderr.write("Your http_proxy setting is wrong!\n") sys.stderr.write("The string \"http://\" is missed.\n") sys.stderr.write("Try something like \"http://your.proxy.com:port\".\n") sys.stderr.write("I'll continue here by adding the missed string to our\n") sys.stderr.write("temporary proxy-setting. You should change $http_proxy\n") sys.stderr.write("by yoursef.\n") PROXY = "http://"+PROXY tmp = re.sub(r'\b(http:\/\/)', r'', PROXY) proxy = commands.getoutput("echo "+tmp+" | sed -e 's/\([^:]*\):[0-9]*/\\1/'") port = commands.getoutput("echo "+tmp+" | sed -e 's/[^:]*:\([0-9]*\)/\\1/'") try: h = httplib.HTTP(proxy, port) except: raise Exception("Could not connect to "+chost+"! Reason unknown.") try: h.putrequest("POST", "http://"+host+selector) except: raise Exception("Could not set up the put request to the server! Reason unknown.") else: try: h = httplib.HTTP(chost) except: raise Exception("Could not connect to "+chost+"! Reason unknown.") try: h.putrequest('POST', selector) except: raise Exception("Could not set up the put request to the server! Reason unknown.") h.putheader('Host', host) h.putheader('Content-Type', content_type) h.putheader('Content-Length', str(len(body))) h.putheader('Keep-Alive', '300') h.putheader('Connection', 'close') try: h.endheaders() except: raise Exception("No internet-connection available! Please dialup.") try: h.send(body) except: raise Exception("Could not send the data to the server! Reason unknown.\n") errcode, errmsg, headers = h.getreply() if h.file: return h.file.read() else: return errmsg def encode_multipart_formdata(self, fields, files): BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' CRLF = '\r\n' L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) for (key, filename) in files: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) L.append('Content-Type: %s' % self.get_content_type(filename)) L.append('') L.append(self._read_file(filename)) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body def get_content_type(self, filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' def _read_file(self, file): f = open(file, 'r') content = f.read() f.close() return content class PFL(object): _lastrun = 0 _config = None def __init__(self): self._read_config() def _finish(self, xmlfile, success = True): if success: if DEBUG: self._config.set('PFL', 'lastrun', 0) else: self._config.set('PFL', 'lastrun', long(time())) self._config.set('PFL', 'version', VERSION) hconfig = open(INFOFILE, 'w') self._config.write(hconfig) hconfig.close() if xmlfile and os.path.isfile(xmlfile) and not DEBUG: print 'deleting xml file %s ...' % xmlfile os.unlink(xmlfile) def _read_config(self): self._config = PFLConfigParser() if os.path.isfile(INFOFILE): self._config.read(INFOFILE) def _last_run(self): if self._config.get('PFL', 'version', 'noversion') == 'noversion': return 0 else: if self._config.get('PFL', 'version') != VERSION: print 'new PFL version - I will collect all packages' return 0 else: return long(self._config.get('PFL', 'lastrun', 0)) def do_job(self): pm = PortageMangle() # use uuid uuid = self._config.get('PFL', 'uuid', 'nouuid') if uuid == 'nouuid': f = open(UUIDFILE, 'r') uuid = f.readline().strip() f.close() self._config.set('PFL', 'uuid', uuid) xmlfile = pm.collect_into_xml(self._last_run(), uuid) if xmlfile == None: print 'nothing to collect. If this is wrong, set PFL/lastrun in %s to 0' % INFOFILE else: if DEBUG: print 'should upload %s, now' % xmlfile else: h = HTTP() curversion = None try: print 'uploading xml file %s ...' % xmlfile serverversion = h.post_multipart('upload.portagefilelist.de', '80', '/data.php', [], [('foo', xmlfile)]) if serverversion != VERSION: sys.stderr.write('newest version of PFL is: %s\nIt is highly recommend to update to the latest PFL version!\n' % serverversion) except Exception, e: sys.stderr.write("%s\n" % e) self._finish(xmlfile, False) return self._finish(xmlfile, True) pfl = PFL() pfl.do_job() # TODO: INFOFILE (rm tempfile, lastrun, exclude etc.), bz2, fileinfo (sym destination...)