#!/usr/bin/env python2.7 # ---------------------------------- # # Module pylib_eprots # # Functions (and classes) for manipulating E result protocols (this is # probably fairly specialized and not that useful for others... # # Copyright 2003-2005 Stephan Schulz, schulz@eprover.org # # This code is part of the support structure for the equational # theorem prover E. Visit # # http://www.eprover.org # # for more information. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program ; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # # The original copyright holder can be contacted as # # Stephan Schulz (I4) # Technische Universitaet Muenchen # Institut fuer Informatik # Boltzmannstrasse 3 # Garching bei Muenchen # Germany # # or via email (address above). import re import string import sys from UserList import UserList import pylib_basics import pylib_io import pylib_discretize InfiniteTime = 1000000000 EmptyProtSetException = "Cannot get required information from empty protocol set" train_number_re = re.compile("_[0-9]*\.") def break_prot_line(line): """ Take an E protocoll line and convert it into an array of values of the appropriate type (string, float or int) that is returned. """ if line[0] == "#": return None parts = string.split(line, None) for i in range(0, len(parts)): try: p = int(parts [i]) except ValueError: try: p = float(parts[i]) except ValueError: p = parts[i] parts[i] = p return parts def get_relevant_parts(line): """ Accept an E protocol line and return a tuple of the three relevant elements: Problem name, status, and time. """ parts = break_prot_line(line) if parts: return (parts[0],parts[1],parts[2], parts[3]) return None class process_line: """ Class of functional objects converting one E protocol line into another by rounding the time as specified by a rounding function. """ def __init__(self, round_fun): self.round_fun = round_fun; def __call__(self,line): print line if line[0] == "#": return line parts = string.split(line, None, 4) return "%-29s %s %8.3f %-10s %85s" % (parts[0], parts[1], self.round_fun(float(parts[2])), parts[3],parts[4]) def compare_entries(entry1, entry2): """ Compare two tuples of the form (status, time, reason) so that failures are smaller than successes and better times are smaller than larger ones. """ res = cmp(entry1[0], entry2[0]) if res!=0: return -1*res; res = cmp(entry1[1], entry2[1]) if res!=0: return res; return cmp(entry1[2], entry2[2]) class eprotocol: """ Data type for storing (the relevant parts of) a standard E test protocol. """ def __init__(self, name=None, silent=False): self.name = "Unknown" self.data = {} self.proofs = 0 self.models = 0 self.successes = 0 self.succ_time = 0.0 self.entries = 0 self.comments = "" self.silent = silent if name: self.parse(name) def __cmp__(self, other): """ Comparison of two protocols. Smaller is better, i.e. more successes or same number of successes in less time. """ tmp = other.successes - self.successes if tmp!=0: return tmp tmp = self.succ_time- other.succ_time return pylib_basics.sign(tmp) def insert_entry(self, entry, state, time, reason): self.data[entry] = (state, time, reason) self.entries += 1 if state == "T": self.proofs += 1 self.successes += 1 self.succ_time += time elif state == "N": self.models += 1 self.successes += 1 self.succ_time += time def insert_line(self, line): tmp = get_relevant_parts(line) if tmp: self.insert_entry(tmp[0], tmp[1], tmp[2], tmp[3]); else: self.comments += line def parse(self, file): if not self.silent: sys.stderr.write("Parsing "+file+".\n") f = pylib_io.flexopen(file,'r') l = f.readlines() pylib_io.flexclose(f) self.name = file for line in l: self.insert_line(line) def filter(self, filter_re): """ Return a new protocol with the same name containing just problems where the name matches the filtering regexp. """ res = eprotocol(None, self.silent); iter_list = self.data.keys() iter_list.sort() for i in iter_list: if filter_re.search(i): l_entry = self.data[i] res.insert_entry(i, l_entry[0], l_entry[1], l_entry[2]); return res def collect_sample(self): """ Consider names to consist of a class part and a running number of the form '_XXX' directly before the suffix. Return a dictionary associating every class with a list of all results. """ res = {} for i in self.data.keys(): entry = self.data[i] mo = train_number_re.search(i); key = i[:mo.start()]; try: res[key].append(entry) except KeyError: res[key] = []; res[key].append(entry) for i in res.keys(): res[i].sort(compare_entries); return res def collect_medians(self): """ For each class, compute the median value. Return a sorted list of class/value tuples. If the median is no sucess, generate no pair! """ tmp = self.collect_sample() res = []; for i in tmp.keys(): entry = tmp[i]; value = entry[int((len(entry)-1)/2)]; if value[0]!='F': res.append( (i, value[1])); res.sort(); return res; def collect_times(self, success_only=False): """ Return a list of the run times for all problems (or all successfully solved problems) in the protocol. """ if success_only: return [i[1] for i in self.data.values() if i[0]!='F'] else: return [i[1] for i in self.data.values()] def repr_entry(self, key): """ Return a representation of the entry for a single key. """ tmp = self.data[key]; return "%-38s %s %8.3f %s" % (key, tmp[0], tmp[1], tmp[2]) def __repr__(self): res = self.comments; res +="""# Proofs: %5d # Models: %5d # Successed: %5d # Time: %-8.3f """ % (self.proofs, self.models, self.successes, self.succ_time) tmpkeys = self.data.keys() tmpkeys.sort() for i in tmpkeys: res += self.repr_entry(i) res += "\n" return res; def eval_problem(self, problem, round_fun=pylib_discretize.no_round): """ Return a tuple (time, solutions, succ_time), where time is the time for a successful proof attempt at problem (or InfiniteTime for failure), solutions is the number of solutions in the protocol, and succ_time is the sum of all solution times in the protocol. Unknown problems are treated as failures. """ try: state, time = self.data[problem]; if state == "F": return (InfiniteTime, self.successes, self.succ_time) else: return (round_fun(time), self.successes, self.succ_time) except KeyError: return (InfiniteTime, self.successes, self.succ_time) def get_status(self, problem): (state, time) = self.data[problem] return state def eval_is_better(e1, e2): """ Return true if e1 is better than e2 """ if e1[0] < e2[0]: return True elif e1[0] == e2[0]: if e1[1] > e2[1]: return True elif e1[1] == e2[1]: if e1[2] > e2[2]: return True return False class classification(UserList): def __init__(self, data=[]): UserList.__init__(self) for i in data: self.append(i) self.hash = {} def append(self, new): if len(new)!=3: raise TypeError UserList.append(self,new) self.hash[new[0]] = new[2] def printout(self, prefix = ""): self.sort() for i in self: print "%s%-29s : %s : %s" % (prefix, i[0], i[1], i[2]) def parse(self, file): f = pylib_io.flexopen(file,"r") l = f.readlines() pylib_io.flexclose(f) for i in l: if i.startswith("#"): continue tmp = string.split(i, ":"); if len(tmp) == 4: del(tmp[0]) tmp = map(string.strip, tmp) self.append((tmp[0], tmp[1], tmp[2])) def classify(self, prob): return self.hash[prob] class eprot_set: """ Class for storing an arbitrary number of E protocols and answering interesting questions about them. """ def __init__(self, names=[]): self.protlist = [] self.sorted = True; self.parse(names) def insert(self,prot): self.sorted = False; self.protlist.append(prot) def parse(self, names, silent=False): for name in names: prot = eprotocol(name, silent) self.insert(prot) def __repr__(self): res = "" sep = "[" for i in self.protlist: res += sep res += i.name sep = "\n " return res+"]" def sort(self): if not self.sorted: self.protlist.sort() def find_class(self, problem, round_fun=pylib_discretize.no_round): self.sort() try: res = self.protlist[0] reseval = (InfiniteTime, 10000000, 0) except IndexError: raise EmptyProtSetException for i in self.protlist: eval = i.eval_problem(problem, round_fun) if eval_is_better(eval, reseval): res = i reseval = eval return res def make_classification(self,round_fun=pylib_discretize.no_round): self.sort() try: source = self.protlist[0] except IndexError: raise EmptyProtSetException res = classification() for i in source.data.keys(): prot = self.find_class(i,round_fun) status = prot.get_status(i) res.append((i, status, prot.name)) return res class featurelist(UserList): def __init__(self, data=[]): UserList.__init__(self) for i in data: self.append(i) def parse(self, file): f = pylib_io.flexopen(file,"r") l = f.readlines() pylib_io.flexclose(f) for i in l: if i.startswith("#"): continue tmp = string.split(i, ":"); name = tmp[0].strip() featurestring = (tmp[1].strip())[1:-1] features = map(string.strip,string.split(featurestring, ",")) if len(tmp) == 3: # Old style features add_features = tmp[2].strip() features.append(add_features[0]) features.append(add_features[1]) features.append(add_features[2]) features.append(add_features[4]) features.append(add_features[9]) self.append((name, features)) def printout(self): self.sort() for i in self: print i[0],":", string.join(i[1],",")