#! /usr/bin/python -tt # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # # Copyright Red Hat Inc. 2008 # # Author: James Antill # Based on: http://avahi.org/wiki/PythonPublishExample # http://users.ecs.soton.ac.uk/rds204/yum-avahi/repos_announce.py import dbus import gobject import avahi from dbus.mainloop.glib import DBusGMainLoop import os import sys import glob import types import time import BaseHTTPServer from yum.misc import Checksums __conf_use_xattr_store__ = True if __conf_use_xattr_store__: try: import xattr except ImportError: __conf_use_xattr_store__ = False __NAME__ = 'Checksumed Data %s' % os.uname()[1] __DNS__ = "_checksum_data._tcp" # See: http://www.dns-sd.org/ServiceTypes.html __DOMAIN__ = '' # auto __HOST__ = '' # auto __PORT__ = 1666 __TEXT__ = 'Serves data over HTTP using checksumming and length as input' __file_matchers__ = ["/var/lib/checksum-data/*", "/var/lib/checksum-data/*/*", "/var/lib/mock/cache/*/yum_cache/*/*", "/var/lib/mock/cache/*/yum_cache/*/packages/*", "/var/cache/yum/*/*", "/var/cache/yum/*/packages/*" ] def file_data(fname): try: st = os.stat(fname) return (st.st_size, int(st.st_mtime)) except OSError: return (-1, -1) def checksum(fname, checksums=['sha1', 'md5', 'sha256']): """ Takes filename, hand back Checksums object of it. """ # Borrowed from yum.misc ... CHUNK = 2**16 if type(fname) not in types.StringTypes: fo = fname # assume it's a file-like-object else: fo = open(fname, 'r', CHUNK) data = Checksums(checksums) while data.read(fo, CHUNK): pass if type(fname) is types.StringType: fo.close() del fo return data def register_service(): """ Register the checksumed object service. """ bus = dbus.SystemBus() server = dbus.Interface(bus.get_object(avahi.DBUS_NAME, avahi.DBUS_PATH_SERVER), avahi.DBUS_INTERFACE_SERVER) path = server.EntryGroupNew() group = dbus.Interface(bus.get_object(avahi.DBUS_NAME, path), avahi.DBUS_INTERFACE_ENTRY_GROUP) # FIXME: Need to see if the name collides, hopefully without # having to loop and respond to events... group.AddService(avahi.IF_UNSPEC, avahi.PROTO_INET, dbus.UInt32(0), # flags __NAME__, __DNS__, __DOMAIN__, __HOST__, dbus.UInt16(__PORT__), avahi.string_array_to_txt_array([__TEXT__])) group.Commit() def _getxattr(fname, attr): """ Work around stupid python rasing exceptions. """ try: return xattr.getxattr(fname, attr) except IOError, e: if e.errno == 61: # No data available return None raise def _setxattr(fname, attr, val): """ Work around stupid python rasing exceptions. """ try: xattr.setxattr(fname, attr, val) return True except IOError, e: if e.errno == 13: # Permission denied return False raise def _safe_int(val): """ Work around stupid python rasing exceptions. """ try: return int(val) except ValueError: return None class ChecksumedData: """ A piece of data, with a name, and a set of checksums. """ def _load_cached_checksum_data(self): """ Try and load cached checksum data from xattrs. """ if not __conf_use_xattr_store__: return False ctime = _getxattr(self.fname, "user.checksum_mtime") if ctime is None: return False clen = _getxattr(self.fname, "user.checksum_len") if clen is None: return False if (self.len, self.mtime) != (_safe_int(clen), _safe_int(ctime)): return False self.sha256 = _getxattr(self.fname, "user.checksum_sha256") if self.sha256 is None: return False self.sha1 = _getxattr(self.fname, "user.checksum_sha1") if self.sha1 is None: return False self.md5 = _getxattr(self.fname, "user.checksum_md5") if self.md5 is None: return False return True def _save_cached_checksum_data(self): """ Try and save cached checksum data to xattrs. """ if not __conf_use_xattr_store__: return False for key, val in (("checksum_mtime", str(self.mtime)), ("checksum_len", str(self.len)), ("checksum_sha256", self.sha256), ("checksum_sha1", self.sha1), ("checksum_md5", self.md5)): if not _setxattr(self.fname, "user." + key, val): return False return True def __init__(self, fname): self.fname = fname self.basename = os.path.basename(fname) (self.len, self.mtime) = file_data(fname) if self._load_cached_checksum_data(): return checksums = checksum(fname) for csum in ('sha1', 'sha256', 'md5'): setattr(self, csum, checksums.hexdigest(csum)) self._save_cached_checksum_data() def __eq__(self, other): if not other: return False if self.sha256 != other.sha256: return False if self.sha1 != other.sha1: return False if self.md5 != other.md5: return False if self.len != other.len: return False return True def __ne__(self, other): return not (self == other) def __hash__(self): return int(self.sha256, 16) def __str__(self): return """\ Name: %s len: %u md5: %s sha1: %s sha256: %s """ % (self.fname, self.len, self.md5, self.sha1, self.sha256) def file_valid(self): return file_data(self.fname) == (self.len, self.mtime) def add_checksum_data(fname, startup='\r'): if not os.path.isfile(fname): return count = len(__sha256_data__) print "%-79.79s%s" % ("Loading[%d]: %s" % (count + 1, fname), startup), data = ChecksumedData(fname) __sha256_data__.setdefault(data.sha256, set()).add(data) __sha1_data__.setdefault(data.sha1, set()).add(data) __md5_data__.setdefault(data.md5, set()).add(data) __len_data__.setdefault(data.len, set()).add(data) __fn__[data.fname] = data def del_checksum_data(data): print "Removing:", data.fname __sha256_data__[data.sha256].discard(data) __sha1_data__[data.sha1].discard(data) __md5_data__[data.md5].discard(data) __len_data__[data.len].discard(data) del __fn__[data.fname] def load_checksum_data(): global __sha256_data__, __sha1_data__, __md5_data__, __len_data__, __fn__ __sha256_data__ = {} __sha1_data__ = {} __md5_data__ = {} __len_data__ = {} __fn__ = {} global last_load last_load = time.time() for fmatch in __file_matchers__: for fname in glob.glob(fmatch): add_checksum_data(fname) print "\nLoaded Objects:", len(__sha256_data__) for store in (__sha256_data__, __sha1_data__, __md5_data__): for data in store: if len(store[data]) > 1: print "Checksum %x has %u hits" % (data, len(store[data])) if False: for data in __len_data__: if len(__len_data__[data]) > 1: print "Length %d has %u hits" % (data, len(__len_data__[data])) def reload_checksum_data(): global last_load last_load = time.time() for fmatch in __file_matchers__: for fname in glob.glob(fmatch): if fname in __fn__ and not __fn__[fname].file_valid(): del_checksum_data(__fn__[fname]) if fname not in __fn__: add_checksum_data(fname, startup='') print "Re-Loaded Objects:", len(__sha256_data__) load_checksum_data() def _conv_data(user_data, key): if False: pass elif key in ('sha256', 'sha1', 'md5'): val = user_data[key] elif key == 'len': val = _safe_int(user_data['len']) else: assert False, "Bad key" return val def find_data1(user_data, key): val = _conv_data(user_data, key) return eval("__%s_data__" % key).get(val, set()) def find_data(user_data): ret = None for key in ('sha256', 'sha1', 'md5', 'len'): if key not in user_data: continue data = find_data1(user_data, key) if ret is None: ret = data else: ret.intersection_update(data) if not ret: return None if ret is None: return None assert len(ret) >= 1 for data in list(ret): # FIXME: Should checksum again? ... to make _sure_ it hasn't changed if not data.file_valid(): ret.discard(data) del_checksum_data(data) if not ret: return None return ret def path2data(path): """Given a HTTP "path" turn it into a dict of key/val.""" vals = path.strip('/').split('/') if len(vals) % 2: return {} data = {} while vals: key = vals.pop(0) val = vals.pop(0) if key in data and data[key] != val: return {} if key in ('sha256', 'sha1', 'md5', 'len'): # Allow "future proofing", really need non-HTTP to always do the # right thing here. data[key] = val return data register_service() def _ret_http_err_code(s, code): s.send_response(code) s.send_header("Content-Type", "application/octet-stream") s.send_header("Content-Length", "0") s.end_headers() if time.time() - last_load > (60 * 30): reload_checksum_data() def _ret_http_ok_code(s, data): s.send_response(200) s.send_header("Content-Type", "application/octet-stream") s.send_header("Content-Length", data.len) # Maybe put the the Content-MD5 header here ... stupid format though s.end_headers() # FIXME: This should DIE ... HTTP is evil class HTTP_Handler(BaseHTTPServer.BaseHTTPRequestHandler): def do_HEAD(s): """Respond to a HEAD request.""" user_data = path2data(s.path) data = find_data(user_data) # print "JDBG: HEAD:", s.path, user_data, data if data is None: _ret_http_err_code(s, 404) return assert len(data) >= 1 if len(data) > 1: _ret_http_err_code(s, 300) return data = list(data)[0] _ret_http_ok_code(s, data) def do_GET(s): """Respond to a GET request.""" user_data = path2data(s.path) data = find_data(user_data) # print "JDBG: GET:", s.path, user_data, data if data is None: _ret_http_err_code(s, 404) return assert len(data) >= 1 if len(data) > 1: _ret_http_err_code(s, 300) return data = list(data)[0] _ret_http_ok_code(s, data) # FIXME: This probably sucks... r = open(data.fname, "r") s.wfile.write(r.read()) r.close() if time.time() - last_load > (60 * 30): reload_checksum_data() httpd = BaseHTTPServer.HTTPServer( ("", __PORT__), HTTP_Handler ) httpd.server_version = 'ChecksumedData/0.1' print "Server starting..." try: httpd.serve_forever() except KeyboardInterrupt: pass