#!/usr/bin/env python

import getopt
import sys
import tempfile

import numpy as np
import matplotlib.pyplot as plt

sys.path.append("liblinear-1.8/python")
import liblinearutil as ll

import common
import impute
import parse
import vectorize

np.set_printoptions(threshold = 10000)

def usage(f = sys.stdout):
    print >> f, """\
Usage: %s -s <SET_FILENAME> [FP_FILENAME]...
Make a visualization of the projection of the principal components of
feature vectors.

  -g, --group=GROUP_FILENAME  read groups from GROUP_FILENAME (required).
  -s, --set=SET_FILENAME      use the set of features in SET_FILENAME (required).
  --scale                     scale feature vectors to the range [0, 1].\
""" % sys.argv[0]

class options (object):
    group_filename = None
    set_filename = None
    scale = False

def scale(features):
    m, n = features.shape
    s_min = np.zeros(n)
    s_max = np.zeros(n)
    for i in range(n):
        assigned = [x for x in features[:,i] if x >= 0]
        if assigned:
            mn = min(assigned)
            mx = max(assigned)
        else:
            mn = 0.0
            mx = 0.0
        s_min[i] = mn
        s_max[i] = mx
        if mn == mx:
            denom = 1.0
        else:
            denom = mx - mn
        for j in range(m):
            if features[j, i] >= 0:
                features[j, i] = (features[j, i] - mn) / denom
    return features, s_min, s_max

def prepare_features(groups):
    """Impute and scale features, and assign them back to groups."""
    feature_list = []
    for group in groups:
        for features in group.features:
            feature_list.append(features)

    feature_matrix = np.vstack(feature_list)
    feature_matrix = impute.impute(feature_matrix)
    if options.scale:
        feature_matrix, s_min, s_max = scale(feature_matrix)
        scale_params = zip(s_min, s_max)
    else:
        scale_params = None

    f_i = iter(feature_matrix)
    for group in groups:
        group.features = []
        for i in range(len(group.rs_list)):
            group.features.append(f_i.next())

    return scale_params

# M = numpy.transpose(numpy.vstack(x))
# M -= numpy.mean(M, axis = 0)
# M = numpy.nan_to_num(M / numpy.std(M, axis = 0))
# 
# U, d, Vh = numpy.linalg.svd(M, full_matrices = False)
# 
# dim_2 = U[:,:2] * d[:2]
# coords = numpy.dot(numpy.transpose(M), dim_2)

def do_pca(groups):
    # X has features along rows (individual features in columns).
    X = np.vstack(g.features for g in groups)
    # print
    # print "X"
    # print X
    X -= np.mean(X, axis = 0)
    # print
    # print "X meaned"
    # print X
    X = np.nan_to_num(X / np.std(X, axis = 0))
    # print
    # print "X std"
    # print X

    U, d, Vh = np.linalg.svd(X, full_matrices = False)
    # print U
    # print d
    # print Vh

    coords = U[:,:2] * d[:2]

    pca_groups = []
    f_i = iter(coords)
    for group in groups:
        pca_group = []
        pca_groups.append(pca_group)
        group.features = []
        for i in range(len(group.rs_list)):
            pca_group.append(f_i.next())

    return pca_groups

opts, args = getopt.gnu_getopt(sys.argv[1:], "g:s:", ["group=", "set=", "scale"])
for o, a in opts:
    if o == "-g" or o == "--group":
        options.group_filename = a
    elif o == "-s" or o == "--set":
        options.set_filename = a
    elif o == "--scale":
        options.scale = True

if options.set_filename is None:
    usage(sys.stderr)
    exit(1)
if options.group_filename is None:
    usage(sys.stderr)
    exit(1)

feature_names = parse.parse_feature_set_file(options.set_filename)


groups = parse.parse_groups_file(options.group_filename)
for group in groups:
    group.features = []
    for rs in group.rs_list:
        features = vectorize.vectorize(feature_names, rs)
        group.features.append(features)

scale_params = prepare_features(groups)

pca_groups = do_pca(groups)

COLORS = {
    "Linux": "#888888",
    "Windows": "lightgreen",
    "AIX": "blue",
    "OpenBSD": "orange",
    "FreeBSD": "coral",
    "Mac OS X": "cornflowerblue",
    "iOS": "cornflowerblue",
    "OpenIndiana": "purple",
    "OpenSolaris": "purple",
}

for i, gg in enumerate(zip(groups, pca_groups)):
    group, pca_group = gg
    pca_group = np.asarray(pca_group)
    plt.scatter(pca_group[:,0], pca_group[:,1], c = COLORS.get(group.desc.nmapclasses[0].nmapclass[1], "black"))
plt.show()