#!/usr/bin/env python import datetime import getopt import sys import numpy as np import matplotlib.image as mpimg import common import impute import parse import vectorize def usage(f = sys.stdout): print >> f, """\ Usage: %s -s [FP_FILENAME]... Make graphics of covariance matrices for each OS class and for the set of all fingerprints as a whole. Each file is named cov-DATE-ID.png. Output Pmwiki markup. -g, --group=GROUP_FILENAME read groups from GROUP_FILENAME (required). -s, --set=SET_FILENAME use the set of features in SET_FILENAME (required). --scale scale feature vectors to the range [0, 1].\ """ % sys.argv[0] class options (object): group_filename = None set_filename = None scale = False def scale(features): m, n = features.shape s_min = np.zeros(n) s_max = np.zeros(n) for i in range(n): assigned = [x for x in features[:,i] if x >= 0] if assigned: mn = min(assigned) mx = max(assigned) else: mn = 0.0 mx = 0.0 s_min[i] = mn s_max[i] = mx if mn == mx: denom = 1.0 else: denom = mx - mn for j in range(m): if features[j, i] >= 0: features[j, i] = (features[j, i] - mn) / denom return features, s_min, s_max def prepare_features(groups): """Impute and scale features, and assign them back to groups.""" feature_list = [] for group in groups: for features in group.features: feature_list.append(features) feature_matrix = np.vstack(feature_list) feature_matrix = impute.impute(feature_matrix) if options.scale: feature_matrix, s_min, s_max = scale(feature_matrix) scale_params = zip(s_min, s_max) else: scale_params = None f_i = iter(feature_matrix) for group in groups: group.features = [] for i in range(len(group.rs_list)): group.features.append(f_i.next()) return scale_params def feature_cov(a): h, w = a.shape if h == 1: # np.cov gives the scalar variance of the elements of a 1 by n matrix. return np.diag(np.array([np.nan] * w)) else: return np.cov(a, rowvar = 0) opts, args = getopt.gnu_getopt(sys.argv[1:], "g:s:", ["group=", "set=", "scale"]) for o, a in opts: if o == "-g" or o == "--group": options.group_filename = a elif o == "-s" or o == "--set": options.set_filename = a elif o == "--scale": options.scale = True if options.set_filename is None: usage(sys.stderr) exit(1) feature_names = parse.parse_feature_set_file(options.set_filename) labels, groups_list = parse.parse_groups_file(options.group_filename) feature_list = [] for group_elem in groups_list: for rs in group_elem: features = vectorize.vectorize(feature_names, rs) feature_list.append(features) f_i = iter(feature_list) groups = [] for group_elem in groups_list: group = parse.Group() for rs in group_elem: group.add_rs(rs, f_i.next()) groups.append(group) scale_params = prepare_features(groups) DATE_STR = datetime.datetime.now().strftime("%Y%m%d") covs = [] for group in groups: covs.append(feature_cov(np.array(group.features))) cov_all = feature_cov(np.vstack([group.features for group in groups])) vmin = min(np.min(np.nan_to_num(covs)), np.min(np.nan_to_num(cov_all))) vmax = max(np.max(np.nan_to_num(covs)), np.max(np.nan_to_num(cov_all))) def emit(label, count, fn): if count == 1: count_str = "(1 sample)" else: count_str = "(%d samples)" % count print "!!! %s %s" % (label, count_str) print print "Attach:%s" % fn print fn = "cov-%s-all.png" % (DATE_STR) emit("All fingerprints", len(feature_list), fn) mpimg.imsave(fn, cov_all, vmin = vmin, vmax = vmax) n = 0 for label, cov, group in zip(labels, covs, groups): fn = "cov-%s-%02d.png" % (DATE_STR, n) emit(label, len(group.features), fn) mpimg.imsave(fn, cov, vmin = vmin, vmax = vmax) n += 1