#!/usr/bin/env python2.7 """ tptp_analyse.py 0.1 Usage: tptp_analyse.py [options] ... Parse a list of TPTP file name occurances and File lines and generate an overview of which problem is in which version of TPTP. Options: -h Print this help. -s Parse sourcefile and obtain problem statistics from the given sources. Copyright 2009 Stephan Schulz, schulz@eprover.org This code is part of the support structure for the equational theorem prover E. Visit http://www.eprover.org for more information. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program ; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA The original copyright holder can be contacted as Stephan Schulz (I4) Technische Universitaet Muenchen Institut fuer Informatik Boltzmannstrasse 3 Garching bei Muenchen Germany or via email (address above). """ import sys import re import string import getopt import pylib_io from pylib_maths import * def tptp_rep(sample, element): if element in sample: return element else: return " - " def header_extract_int(line): """ Given a TPTP header syntax line, extract the numerical value. """ line = line[14:] tmp = line.split(":") res = int(tmp[1].split()[0]) return res class problem_desc(object): def __init__(self, filename): fp = pylib_io.flexopen(filename, "r") desc = fp.readlines(4096) pylib_io.flexclose(fp) self.specsize = 0 self.atomno = 0 self.predno = 0 self.funcno = 0 if "+" in filename: self.fof = True else: self.fof = False self.filename = filename self.name = filename.split("/")[-1] for i in desc: pos = i.find("Number of clauses") if pos != -1: self.specsize = header_extract_int(i) pos = i.find("Number of formulae") if pos != -1: self.specsize = header_extract_int(i) pos = i.find("Number of literals") if pos != -1: self.atomno = header_extract_int(i) pos = i.find("Number of atoms") if pos != -1: self.atomno = header_extract_int(i) pos = i.find("Number of predicates") if pos != -1: self.predno = header_extract_int(i) pos = i.find("Number of functors") if pos != -1: self.funcno = header_extract_int(i) def __str__(self): return ("(%s:%d, %d, %d, %d)")%(self.name, self.specsize, self.atomno, self.predno, self.funcno) class tptp_store(object): """ Represent data about the problems in different versions of the TPTP. """ def __init__(self): self.problems = {} self.tptps = set() self.prob_names = None self.prob_desc = {} def prob_list(self): if not self.prob_names: self.prob_names = self.problems.keys() self.prob_names.sort() return self.prob_names def add(self, line): tmp = line.split("%") probdesc = tmp[0] tmp = probdesc.split("/") tptp = tmp[1][5:] prob = tmp[4][:-1].replace(":", ".") if not prob in self.problems: self.problems[prob] = set() self.prob_names = None self.problems[prob].add(tptp) self.tptps.add(tptp) def prob_rep(self, order, prob): tptps = self.problems[prob] presence_rep = map(lambda x:tptp_rep(tptps,x), order) presence_rep.insert(0, prob) return " ".join(presence_rep) def __str__(self): order = list(self.tptps) order.sort() replist = [self.prob_rep(order, prob) for prob in self.prob_list()] replist.append("") return "\n".join(replist) def is_present(self, problem, tptp): try: return tptp in self.problems[problem] except KeyError: return False def extract(self, tptp): """ Return the list of all problems in the given TPTP. """ return filter(lambda x:self.is_present(x, tptp), self.prob_list()) def tptps_list(self): """ Return an ordered list of TPTPs represented in the store. """ res = list(self.tptps) res.sort() return res def parse_prob_data(self, file): fp = pylib_io.flexopen(file, "r") for line in fp: tmp = problem_desc(line[:-1]) self.prob_desc[tmp.name] = tmp pylib_io.flexclose(fp) def tptp_name(self, tptp): return tptp def prob_no(self, tptp): return len(self.extract(tptp)) def fof_proportion(self, tptp): probs = filter(lambda x:self.prob_desc[x].fof, self.extract(tptp)) return round(float(len(probs))/self.prob_no(tptp), 2) def ax_no(self, tptp): l = [self.prob_desc[x].specsize for x in self.extract(tptp)] return round(mean(l), 2) def atom_no(self, tptp): l = [self.prob_desc[x].atomno for x in self.extract(tptp)] return round(mean(l), 2) def pred_no(self, tptp): l = [self.prob_desc[x].predno for x in self.extract(tptp)] return round(mean(l), 2) def func_no(self, tptp): l = [self.prob_desc[x].funcno for x in self.extract(tptp)] return round(mean(l), 2) def stats(self, fname, feature_extractor): res = [fname] for i in self.tptps_list(): res.append(feature_extractor(i)) return res if __name__ == '__main__': parse_data = None opts, args = getopt.gnu_getopt(sys.argv[1:], "hvs:", ["Verbose"]) for option, optarg in opts: if option == "-h": print __doc__ sys.exit() elif option == "-v" or option =="--verbose": pylib_io.Verbose = 1 elif option == "-s": parse_data = optarg else: sys.exit("Unknown option "+ option) if len(args)<1: print __doc__ sys.exit() store = tptp_store() for arg in args: fp = pylib_io.flexopen(arg, "r") for line in fp: store.add(line) pylib_io.flexclose(fp) print store