#!/usr/bin/env python

import sys
import os
import subprocess
import difflib
import math
import re

import parse
import predict
import unwrap
import vectorize
import train


def is_exe(fpath):
    return os.path.isfile(fpath) and os.access(fpath, os.X_OK)


class Clipboard(object):
    def __init__(self, override=None):
        if override:
            self.cmd = override
            return
        paths = tuple(p.strip('"') for p in os.environ["PATH"].split(os.pathsep))
        for exe, cmd in (
                ("xsel", "xsel -i"),
                ("xclip", "xclip -i"),
                ("clip", "clip"), # Windows
                ("pbcopy", "pbcopy"), # OS X
                ):
            for path in paths:
                exe_file = os.path.join(path, exe)
                if is_exe(exe_file):
                    self.cmd = cmd
                    return
        print "No clipboard program (xsel, xclip, clip, pbcopy) found."

    def copy(self, inp):
        if not self.cmd:
            return
        clip = subprocess.Popen(self.cmd, shell=True, stdin=subprocess.PIPE)
        clip.communicate(inp)

def show_print(gname, classline, fp, clip):
    print gname, classline, unwrap.format_fp(fp, rle=True, strip=False)
    clip.copy("{}{}cpe \n\nprint\n{}{}".format(
        gname,
        classline,
        gname.replace("group", "#", 1),
        unwrap.format_fp(fp, rle=True, strip=True)
        ))

def show_predictions(correct, guesses, groups):
    print "predictions%s" % (correct == False and " (incorrect)" or "")
    for n, prob, desc, novelty in guesses[:10]:
        print "%2d.  %5.2f%% %6.2f %2d %s" % (n, prob * 100, novelty, len(groups[n].rs_list), desc.nmapname)

# Makes features use the hex values that you might expect from the IPv4 side
def hexify(feature, name=None):
    t = None
    if name == "TCP_ISR":
        try:
            t = float(feature)
        except:
            return feature
        if t < 1:
            return "0"
        else:
            return "{:X}".format(long(8*math.log(t, 2)))
    else:
        try:
            t = long(feature)
        except:
            return feature
        return "{:X}".format(t)


def show_diff(feature_names, features, references):
    cmp_features = tuple(vectorize.vectorize(feature_names, ref) for ref in references)
    print "{0: <20} {1: <16} {2:}".format("feature", "observation", "references")
    for n, feature in enumerate(feature_names):
        f = hexify(features[n], feature)
        for cf in cmp_features:
            if features[n] != cf[n]:
                print "{0: <20} {1: <16} {2}".format(feature, f, " ".join("{0: <16}".format(hexify(c[n], feature)) for c in cmp_features))
                break

def tcp_opt_str(features, probe):
    out = []
    for i in range(0, 16):
        val = features["{0}.TCP_OPT_{1}".format(probe, i)]
        if val == vectorize.MISSING or val == vectorize.UNKNOWN:
            break
        if val == 0:
            out.append("L")
        elif val == 1:
            out.append("N")
        elif val == 2:
            out.append("M{0}".format(hexify(features["{0}.TCP_MSS".format(probe)])))
        elif val == 3:
            out.append("W{0}".format(hexify(features["{0}.TCP_WSCALE".format(probe)])))
        elif val == 4:
            out.append("S")
        elif val == 8:
            out.append("T__") # We don't vectorize the TSval and TSecr
        else:
            out.append("?")
    return "".join(out)

def cc_str(features):
    ece = features["TECN.TCP_FLAG_E"] == 1
    cwr = features["TECN.TCP_FLAG_C"] == 1
    if ece and cwr:
        return "S"
    elif ece:
        return "Y"
    elif not cwr:
        return "N"
    else:
        return "O"

def show_v4fp(feature_names, features, rs):
    f = dict(zip(feature_names, features))
    print "SEQ(ISR={0})".format(hexify(f["TCP_ISR"], "TCP_ISR"))
    print "OPS(O1={}%O2={}%O3={}%O4={}%O5={}%O6={})".format(
            tcp_opt_str(f, "S1"),
            tcp_opt_str(f, "S2"),
            tcp_opt_str(f, "S3"),
            tcp_opt_str(f, "S4"),
            tcp_opt_str(f, "S5"),
            tcp_opt_str(f, "S6")
            )
    print "WIN(W1={}%W2={}%W3={}%W4={}%W5={}%W6={})".format(
            hexify(f["S1.TCP_WINDOW"]),
            hexify(f["S2.TCP_WINDOW"]),
            hexify(f["S3.TCP_WINDOW"]),
            hexify(f["S4.TCP_WINDOW"]),
            hexify(f["S5.TCP_WINDOW"]),
            hexify(f["S6.TCP_WINDOW"])
            )
    if f["TECN.PLEN"] == vectorize.UNKNOWN:
        print "ECN(R=N)"
    else:
        resp = vectorize.get_reply(rs, "TECN")
        hlim = 0
        if resp and resp.p:
            hlim = resp.p.hlim
        print "ECN(R=Y%T={t:x}%TG={tg}%W={w}%O={o}%CC={cc}%Q={r})".format(
                t=hlim,
                tg=hexify(f["TECN.HLIM"]),
                w=hexify(f["TECN.TCP_WINDOW"]),
                o=tcp_opt_str(f, "TECN"),
                cc=cc_str(f),
                r=(f["TECN.TCP_FLAG_RES8"] == 1 or
                    f["TECN.TCP_FLAG_RES9"] == 1 or
                    f["TECN.TCP_FLAG_RES10"] == 1 or
                    f["TECN.TCP_FLAG_RES11"] == 1) and "R" or ""
                # URGP not in nmap.set
                )
    if f["S1.PLEN"] == vectorize.UNKNOWN:
        print "T1(R=N)"
    else:
        resp = vectorize.get_reply(rs, "S1")
        hlim = 0
        if resp and resp.p:
            hlim = resp.p.hlim
        print "T1(R=Y%T={t:x}%TG={tg}%Q={r})".format(
                t=hlim,
                tg=hexify(f["S1.HLIM"]),
                r=(f["S1.TCP_FLAG_RES8"] == 1 or
                    f["S1.TCP_FLAG_RES9"] == 1 or
                    f["S1.TCP_FLAG_RES10"] == 1 or
                    f["S1.TCP_FLAG_RES11"] == 1) and "R" or ""
                )
    for p in range(2, 8):
        if (f["T{}.PLEN".format(p)] == vectorize.UNKNOWN or
                f["T{}.PLEN".format(p)] == vectorize.MISSING):
            print "T{}(R=N)".format(p)
        else:
            resp = vectorize.get_reply(rs, "T{}".format(p))
            hlim = 0
            if resp and resp.p:
                hlim = resp.p.hlim
            print "T{p}(R=Y%T={t:x}%TG={tg}%W={w}%O={o}%Q={r})".format(
                    t=hlim,
                    p=p,
                    tg=hexify(f["T{}.HLIM".format(p)]),
                    w=hexify(f["T{}.TCP_WINDOW".format(p)]),
                    o=tcp_opt_str(f, "T{}".format(p)),
                    r=(f["T{}.TCP_FLAG_RES8".format(p)] == 1 or
                        f["T{}.TCP_FLAG_RES9".format(p)] == 1 or
                        f["T{}.TCP_FLAG_RES10".format(p)] == 1 or
                        f["T{}.TCP_FLAG_RES11".format(p)] == 1) and "R" or ""
                    # URGP not in nmap.set
                    )

if __name__ == "__main__":
    gname = ""
    classline = ""
    rawfp = []
    cont = False
    for line in sys.stdin:
        if line.startswith("Fingerprint "):
            gname = line.replace("Fingerprint", "group", 1)
        elif line.startswith("Class "):
            classline = line.replace("Class", "nmapclass", 1)
        elif cont or line.startswith("OS:"):
            line = line.strip()
            if line == "":
                break
            line = re.sub(r' *OS:', "", line)
            rawfp.append(line)
            cont = not line.endswith(')')
        elif rawfp:
            break

    fp = parse.parse_nmapfp_raw("".join(rawfp))
    rs = parse.ResponseSet()
    rs.parse_nmapfp_string(unwrap.format_fp(fp))

    model = parse.parse_model_file("nmap.model")
    feature_names = parse.parse_feature_set_file("nmap.set")
    groups = parse.parse_groups_file("nmap.groups")
    clip = Clipboard()

    # Vectorize
    features = vectorize.vectorize(feature_names, rs)

    # Print and copy the fingerprint
    show_print(gname, classline, fp, clip)

    # Do prediction
    correct, guesses = predict.predict(rs, model)
    show_predictions(correct, guesses, groups)

    old_stdin = sys.stdin
    sys.stdin = open("/dev/tty", "r")
    while True:
        choice = raw_input("(#) diff, (c)opy, (i)pv4 view, (t)rain, (r)eload, (p)redict, (q)uit? ").strip().lower()
        if len(choice) > 0 and choice[0].isdigit():
            i = 0
            if " " in choice:
                choice, i = choice.split()
                i = int(i)
            # Limit to diffing with 5 prints
            # TODO: Show the 5 closest-matching prints?
            show_diff(feature_names, features, groups[int(choice)].rs_list[i:i+5])
        elif choice == 'c':
            show_print(gname, classline, fp, clip)
        elif choice != "" and choice in 'tr':
            print "Reloading"
            feature_names = parse.parse_feature_set_file("nmap.set")
            groups = parse.parse_groups_file("nmap.groups")
            if choice == 't':
                # ./train.py -c 100 -s nmap.set -g nmap.groups --scale
                for group in groups:
                    group.features = []
                    for grs in group.rs_list:
                        gfeat = vectorize.vectorize(feature_names, grs)
                        group.features.append(gfeat)
                scale_params = train.prepare_features(groups, True)
                print "Training."
                gen_model = train.train(groups, 100)
                with open("nmap.model", "w") as mf:
                    train.save_model(mf, feature_names, groups, gen_model, scale_params)
            model = parse.parse_model_file("nmap.model")
            correct, guesses = predict.predict(rs, model)
        elif choice == 'p':
            show_predictions(correct, guesses, groups)
        elif choice == 'i':
            show_v4fp(feature_names, features, rs)
        elif choice == 'q':
            print "Goodbye"
            exit(0)
        else:
            print "Unknown"