#!/usr/bin/env python import getopt import math import sys import numpy as np sys.path.append("liblinear-1.8/python") import liblinearutil as ll import common import parse import vectorize def usage(f = sys.stdout): print >> f, """\ Usage: %s -m [FP_FILENAME]... Classify fingerprints using a previously trained model. -h, --help show this help. -m, --model=MODEL_FILENAME use the model in MODEL_FILENAME (required).\ """ % sys.argv[0] class options (object): model_filename = None def format_accuracy(n, d): assert n <= d if d == 0: return "%3d/%-3d --%%" % (n, d) pct = 100.0 * n / d return "%3d/%-3d %6.2f%%" % (n, d, pct) def apply_scale(features, scale_params): if scale_params is None: return features n = len(features) for i in range(n): mn, mx = scale_params[i] if features[i] >= 0: if mn == mx: denom = 1.0 else: denom = mx - mn features[i] = (features[i] - mn) / denom return list(features) def novelty_of(features, model, index): means = model.means[index] variances = model.variances[index] sum = 0.0 for f, m, v in zip(features, means, variances): d = common.clamp(f - m) if v == 0.0: v = 0.01 sum += d * d / v; return math.sqrt(sum) opts, args = getopt.gnu_getopt(sys.argv[1:], "hm:", ["help", "model="]) for o, a in opts: if o == "-h" or o == "--help": usage() sys.exit() elif o == "-m" or o == "--model": options.model_filename = a if options.model_filename is None: usage(sys.stderr) exit(1) model = parse.parse_model_file(options.model_filename) accuracy = [0, 0] def logit(x): if x < -500: return 0.0 return 1.0 / (1.0 + math.exp(-x)) for fp_filename, fp_f in common.find_files_or_stdin(args, "*.6fp"): rs = parse.parse_6fp(fp_f) print print "==", fp_filename, "==" print "nmapclasses:", ", ".join([parse.format_nmapclass(nc) for nc in rs.desc.nmapclasses]) ll_model = model.model features = vectorize.vectorize(model.feature_names, rs) features = map(lambda z: (z is vectorize.MISSING or z is vectorize.UNKNOWN) and -1 or z, features) features = apply_scale(features, model.scale_params) p_label, p_acc, p_val = ll.predict([-1], [features], ll_model) p_label = int(p_label[0]) p_val = p_val[0] p_val = [logit(x) for x in p_val] guesses = list(enumerate(p_val)) desc = model.descs[p_label] correct = None accuracy[1] += 1 if set(rs.desc.nmapclasses).issubset(set(desc.nmapclasses)): accuracy[0] += 1 correct = True else: correct = False print "predictions%s" % (correct == False and " (incorrect)" or "") for n, prob in sorted(guesses, key = lambda z: z[1], reverse = True): desc = model.descs[n] novelty = novelty_of(features, model, n) print "%2d. %5.2f%% %6.2f %s" % (n, prob * 100, novelty, desc.nmapname) print "Accuracy %s" % format_accuracy(*accuracy)