#!/usr/bin/env python import getopt import sys import tempfile import string import sys sys.path.append("liblinear-1.8/python") import liblinearutil as ll import common import parse import vectorize def usage(f = sys.stdout): print >> f, """\ Usage: %s -m Saves a liblinear model in C struct format. -h, --help show this help. -m, --model=MODEL_FILENAME use the model in MODEL_FILENAME (required).\ """ % sys.argv[0] class options (object): model_filename = None def c_quote(s): r = [] for c in s: if c == "\"": c = "\\\"" elif c == "\\": c = "\\\\" elif c not in string.printable: c = "\\" + "%03o" % ord(c) r.append(c) return '"' + "".join(r) + '"' def c_quote_or_null(s): if s is None: return "NULL"; else: return c_quote(s) def format_osclass(osclass): assert len(osclass) == 4, osclass return "{" + ", ".join(c_quote_or_null(x) for x in osclass) + "}" def save_model_c_struct(f, model): """Write a liblinear model as a C struct.""" if model.model.bias >= 0: w_size = model.model.nr_feature + 1 else: w_size = model.model.nr_feature if model.model.nr_class == 2 and model.model.solver_type != 4: nr_w = 1 else: nr_w = model.model.nr_class f.write("/* This file is automatically generated. */\n") f.write("\n") f.write("#include \"FingerPrintResults.h\"\n") f.write("#include \"linear.h\"\n") f.write("\n") f.write("""\ /* Scale parameters are pairs (a, b). A scaled value x' is calculated from a, b, and an observed x by x' = (x + a) * b. */ """) f.write("double FPscale[][2] = {\n") if model.scale_params: for i in range(w_size): mn, mx = model.scale_params[i] a = -mn if mn == mx: b = 1.0 else: b = 1.0 / (mx - mn) f.write("\t{ %10g, %10g },\t/* %s */\n" % (a, b, model.feature_names[i])) else: for i in range(w_size): f.write("\t{ 0.0, 1.0 },\n") f.write("};\n") f.write("\n") f.write("double FPmean[][%d] = {\n" % w_size) for desc, means in zip(model.descs, model.means): f.write("\t/* %s */\n" % desc.nmapname); f.write("\t{ ") assert len(means) == w_size, (len(means), nr_w) for m in means: f.write("%+.8f, " % m); f.write("},\n") f.write("};\n") f.write("\n") f.write("""\ /* Variances are per-feature only, not considering correlation. Rows of this array can be thought of as diagonals of a covariance matrices, under the assumption that feature values are independent (which is not true in general). This is an approximation to avoid having to store an n^2 covariance matrix per OS class. */ """) f.write("double FPvariance[][%d] = {\n" % w_size) for desc, variances in zip(model.descs, model.variances): f.write("\t/* %s */\n" % desc.nmapname); f.write("\t{ ") assert len(variances) == w_size, (len(variances), nr_w) for v in variances: f.write("%+.8f, " % v); f.write("},\n") f.write("};\n") f.write("\n") f.write("/* Regression coefficients. Columns are classes, rows are features. */\n") f.write("static double _w[] = {\n"); for i in range(w_size): f.write("\t/* %s */\n" % model.feature_names[i]) f.write("\t") for j in range(nr_w): f.write("%+.8f, " % model.model.w[i * nr_w + j]) f.write("\n") f.write("};\n") f.write("\n") f.write("static int _labels[] = {" + ", ".join(str(x) for x in range(model.model.nr_class)) + "};\n") f.write("\n") f.write("/* C = %f */\n" % model.model.param.C) f.write("struct model FPModel = {\n") f.write("\t{%d},\n" % model.model.param.solver_type) f.write("\t%d,\n" % model.model.nr_class) f.write("\t%d,\n" % model.model.nr_feature) f.write("\t_w,\n"); f.write("\t_labels,\n"); f.write("\t%.8f\n" % model.model.bias) f.write("};\n") f.write("\n"); f.write("std::vector load_fp_matches() {\n") f.write("\tstd::vector matches;\n"); for i, desc in enumerate(model.descs): f.write("\t{\n"); f.write("\t\tFingerMatch match;\n"); f.write("\t\tmatch.line = %d;\n" % i); f.write("\t\tmatch.OS_name = (char *) %s;\n" % c_quote(desc.nmapname)); for nmapclass in desc.nmapclasses: f.write("\t\t{\n") f.write("\t\t\tOS_Classification osclass = %s;\n" % format_osclass(nmapclass.nmapclass)) for cpe in nmapclass.cpe: f.write("\t\t\tosclass.cpe.push_back(%s);\n" % c_quote(cpe)) f.write("\t\t\tmatch.OS_class.push_back(osclass);\n") f.write("\t\t}\n") f.write("\t\tmatches.push_back(match);\n"); f.write("\t}\n"); f.write("\treturn matches;\n"); f.write("}\n"); opts, args = getopt.gnu_getopt(sys.argv[1:], "hm:", ["help", "model="]) for o, a in opts: if o == "-h" or o == "--help": usage() sys.exit() elif o == "-m" or o == "--model": options.model_filename = a if options.model_filename is None or args: usage(sys.stderr) exit(1) model = parse.parse_model_file(options.model_filename) save_model_c_struct(sys.stdout, model)