#!/usr/bin/env python import getopt import sys import tempfile import liblinearutil as ll from fp import * from vectorize import * def usage(f = sys.stdout): print >> f, """\ Usage: %s -m [FP_FILENAME]... Classify fingerprints using a previously trained model. -m, --model=MODEL_FILENAME use the model in MODEL_FILENAME (required).\ """ % sys.argv[0] class options (object): model_filename = None def line_iter(f): for line in f: line = line.strip() if line != "": yield line def parse_model(f): feature_names = [] models = [] iter = line_iter(f) while True: line = next(iter, None) if line is None: break parts = line.split() if parts == ["begin", "features"]: assert(len(feature_names) == 0) while True: line = next(iter) if line.split() == ["end", "features"]: break feature_names.append(line) elif parts == ["begin", "liblinear"]: model_file = tempfile.NamedTemporaryFile() while True: line = next(iter) if line.split() == ["end", "liblinear"]: break print >> model_file, line model_file.flush() model = ll.load_model(model_file.name) model_file.close() models[-1][0] = model elif parts[0] == "tier": tier = int(parts[1]) assert(tier == len(models) + 1) models.append([None, []]) elif parts[0] == "class": osclass = tuple(map(lambda z: z.strip(), " ".join(parts[1:]).split("|"))) models[-1][1].append(osclass) return feature_names, models def parse_model_file(filename): f = open(filename) try: return parse_model(f) finally: f.close() opts, args = getopt.gnu_getopt(sys.argv[1:], "m:", ["model="]) for o, a in opts: if o == "-m" or o == "--model": options.model_filename = a if options.model_filename is None: usage(sys.stderr) exit(1) feature_names, models = parse_model_file(options.model_filename) for path in args: for fp_filename in find_files(path, "*.6fp"): rs = ResponseSet() rs.parse_file(fp_filename) print print "==", fp_filename, "==" print "osclass:", rs.osclass is None and "None" or " | ".join(rs.osclass) for tier in range(len(models)): features = vectorize(feature_names, rs) features = map(lambda z: z is None and -1 or z, features) model = models[tier][0] osclasses = models[tier][1] try: p_label, p_acc, p_val = ll.predict([-1], [features], model, "-b 1") p_label = int(p_label[0]) p_val = p_val[0] confidence = p_val[p_label] guesses = [(osclasses[z[0]], z[1]) for z in enumerate(p_val)] except TypeError: p_label, p_acc, p_val = ll.predict([-1], [features], model) p_label = int(p_label[0]) confidence = 1.0 guesses = [] osclass = osclasses[p_label] osclass_str = " | ".join(osclass) print "tier %d prediction: %5.2f%% %s" % (tier + 1, confidence * 100.0, osclass_str) for guess in sorted(guesses, key = lambda z: z[1], reverse = True): print " %5.2f%% %s" % (guess[1] * 100, " | ".join(guess[0]))