#!/usr/bin/python

"""
tester.py

Runs various Nmap command lines, compares the output and timing, testing for
regressions.

This program requires Unix signals and calls Unix commands, so it won't work
under Windows. It might take quite a lot of free space in its current
directory.

Do NOT run it from within a directory which contains "nmap" file or directory!
It will be removed recursively in the cleanup stage.
"""

from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import xml.etree.ElementTree as ET
import subprocess
import datetime
import time
import signal
import pprint
import os
import sys
import StringIO
import atexit
import pipes
import gzip
import re
import smtplib

EMAIL_SENDER = "sender@example.org"
EMAIL_TO = ["receiver1@somedomain.org", "some@other-receiver.tld"]

try:
    subprocess.call(["gnuplot", "--version"], stdout=subprocess.PIPE)
    GNUPLOT_PATH = "gnuplot"
except OSError:
    GNUPLOT_PATH = os.path.expanduser("~/bin/gnuplot")

LOGFILE_DIR = "logs/%s" % datetime.date.today()
WATCHED = ['cwnd', 'probes_active', 'ssthresh']
FIRST_LINE_RE = '^\\*\\*TIMING STATS\\*\\* \\((.*?)s\\).*$'

NOTFIRST_LINE_RE = '^   ([^ ]+)( \\(([^/]+)/([^ ]+) incomplete\\))?:' + \
                   ' ' + '/'.join(['([-0-9*.]+)'] * 6) + \
                   ' ' + '/'.join(['([-0-9*.]+)'] * 3) + \
                   ' ' + '/'.join(['([-0-9*.]+)'] * 3)


# Back up stdout and stderr. We'll overwrite them by StringIO to save the
# output to a string.
stdout_copy = sys.stdout
stderr_copy = sys.stderr

f = StringIO.StringIO()

sys.stdout = f
sys.stderr = f

# The MIME message we're going to wrap the program's output in.
msg = MIMEMultipart()

# At exit, print f's contents as a MIME message.
def flush_f():

    # Rewind the f StringIO so that f.read() will read all the data.
    f.seek(0)

    # Restore stdout and stderr - this will let us print to the program's
    # standard output. Note that we overwrite stderr with the stdout - this
    # will let us see the error messages in crontab e-mails.
    sys.stdout = stdout_copy
    sys.stderr = stdout_copy

    # Add the stdout contents to the message body.
    stdout_contents = MIMEText(f.read(), "plain")
    msg.attach(stdout_contents)

    # Attach the attachments. We use "unknown" to trick Thunderbird into
    # thinking that the file is binary.
    if 'filenames' in globals():
        for filename in filenames:
            if filename.endswith(".png"):
                attachment = MIMEBase("image", "png")
            else:
                attachment = MIMEBase("application", "unknown")
            attachment.set_payload(open(filename).read())
            attachment.add_header('Content-Disposition', 'attachment',
                                  filename=filename)
            encoders.encode_base64(attachment)
            msg.attach(attachment)

    msg['From'] = EMAIL_SENDER
    msg['To'] = ', '.join(EMAIL_TO)
    msg['Subject'] = "Nmap regression testing results"

    server = smtplib.SMTP('localhost')
    server.set_debuglevel(1)
    server.sendmail(EMAIL_SENDER, EMAIL_TO, msg.as_string())
    server.quit()
atexit.register(flush_f)


def makefilename(cmd):
    logfile_name = "%s %s" % (str(datetime.datetime.today()), cmd)
    logfile_name = logfile_name.replace('/', '__')
    logfile_name = logfile_name.replace(' ', '_')
    return logfile_name

def test_cmd(cmd, read_timeout=60*60):
    """
    Runs a specified Nmap command line, reads its output and returns
    two variables - the elapsed time and a dictionary with details about
    the port scanning results. If waiting for the output takes long than
    the read_timeout parameter (which defaults to one hour), kill the
    subprocess and return -1, {}.
    """
    logfilenames = []

    if not os.path.exists(LOGFILE_DIR):
        os.makedirs(LOGFILE_DIR)
    logfile_path = "%s/%s.xml" % (LOGFILE_DIR, makefilename(cmd))
    stdout_path = "%s/%s-stdout.txt.gz" % (LOGFILE_DIR, makefilename(cmd))
    logfilenames += [logfile_path]

    cmd += ' -oX %s 2>&1 | gzip' % pipes.quote(logfile_path)

    started = time.time()
    stdout_file = open(stdout_path, "w")
    p = subprocess.Popen(cmd, stdout=stdout_file,
                         shell=True)
    logfilenames += [stdout_path]

    # We might run into an infinite loop which would block the script. Let's
    # create a timeout using a Unix alarm() function.
    def read_timed_out(*args):
        raise RuntimeError()
    signal.signal(signal.SIGALRM, read_timed_out)
    signal.alarm(read_timeout)

    p.wait()
    if p.returncode != 0:
        p.kill()
        return -1, {}, []
    # we managed to read the data successfully, cancel the alarm
    signal.alarm(0)

    elapsed = time.time() - started

    xmlout = open(logfile_path, 'r').read()

    # parse the XML output
    t = ET.fromstring(xmlout)

    ret = []
    for host_node in t.findall(".//host"):
        state_nodes = host_node.findall(".//port")
        address_node = host_node.findall(".//address")[0]
        for i in range(len(state_nodes)):
            state_attributes = state_nodes[i].findall('state')[0].items()
            port_attributes = state_nodes[i].items()
            port_dict = dict(state_attributes + port_attributes +
                             address_node.items())
            ret += [port_dict]
    return elapsed, ret, logfilenames


def parse_timing(timing):
    """
    Parses Nmap's "TIMING STATS" lines and returns a dict, where keys are
    hostnames and values are values are dicts which pair labels with
    floating-point values or None.
    """
    to_float = lambda f: float(f) if f != '*' else None
    lines = timing.split('\n')
    current_time = float(re.match(FIRST_LINE_RE, lines[0]).groups()[0])
    d = {}
    for line in lines[1:]:
        if line.strip() == '':
            continue
        labels = ['group', 'ignored', 'num_complete', 'num_incomplete',
                  'probes_active', 'freshportsleft', 'retry_stack',
                  'outstanding', 'retranwait', 'onbench', 'cwnd',
                  'ssthresh', 'delay', 'timeout', 'srtt', 'rttvar']
        groups = re.match(NOTFIRST_LINE_RE, line).groups()
        d[groups[0]] = {'current_time': current_time}
        for i in range(len(groups)):
            if i > 3:
                d[groups[0]][labels[i]] = to_float(groups[i])
            else:
                d[groups[0]][labels[i]] = groups[i]
    return d


def end_of_timing(data, hosts, timing_str, title_prefix):
    """
    Used to signal that timing_str ended. Adds data from timing_str to the
    plot.
    """
    d = parse_timing(timing_str)
    for k in d.keys():
        hosts.add(k)
    for k, v in d.items():
        for column in WATCHED:
            data_tuple = (v['current_time'], v[column])
            filename = "%s-%s-%s.txt" % (title_prefix, k, column)
            if filename not in data:
                data[filename] = open(filename, "w")
            data[filename].write("%s %s\n" % data_tuple)


def read_data(input_file, title_prefix):
    """
    Reads data from a given file, extracts timing-related lines and parses
    them. Returns the results in "data" dict (key being the column name,
    values are a lists of values), along with a set of hosts found in the
    timing information.
    """
    hosts = set([])
    data = {}

    now_timing = False
    timing_str = ''
    for line in input_file:
        if line.startswith('**TIMING STATS** '):
            if now_timing:
                end_of_timing(data, hosts, timing_str)
                timing_str = ''
            now_timing = True
            timing_str += line
        elif line.startswith('   ') and now_timing:
            timing_str += line
        else:
            if now_timing:
                now_timing = False
                end_of_timing(data, hosts, timing_str, title_prefix)
                timing_str = ''

    return hosts


def write_graph(f, hosts, title_prefix, last=True):
    """
    Generates a gnuplot script based on the data from function parameters for
    given hosts.
    """
    keys = ['%s-%s' % (h, w) for w in WATCHED for h in hosts]
    for i in range(len(keys)):
        s = ""
        t = "%s-%s" % (title_prefix, keys[i])
        f.write('"%s.txt" using 1:2 title "%s" with lines ' % (t, t))
        if not last or i != len(keys) - 1:
            f.write(', \\')
        f.write('\n')


def run_test(args, print_graph=False):
    """
    Run a test for the given command line. Adds -oX - to the Nmap arguments
    in order to force XML output. Prints the times each invocation took and
    compares the port scanning results.
    """

    print("Testing 'nmap -n -Pn -sT %s'" % args)

    trunk_time, trunk_results, trunk_logfilenames = test_cmd((
        "./nmap/nmap -n -Pn --unprivileged %s " +
        "-sT -d4") % args)
    print("%s\t => Trunk time" % trunk_time)

    nsock_time, nsock_results, nsock_logfilenames = test_cmd((
        "./nmap-nsock-scan/nmap -n -Pn --unprivileged %s " +
        "-sT -d4") % args)
    print("%s\t => nsock time" % nsock_time)

    if trunk_results == nsock_results:
        print("The results are consistent.")
    else:
        print("The results are inconsistent.")
    #if True:
        try:
            import datadiff
            print(datadiff.diff(trunk_results, nsock_results))
        except:
            pprint.pprint(trunk_results)
            pprint.pprint(nsock_results)

    if print_graph:

        f = open("gnuplot.tmp", "w")
        f.write("set term png\n")
        f.write('set xlabel "Time (s)"\n')
        f.write('set ylabel "Timing variable"\n')
        f.write('plot ')

        in1 = gzip.open(trunk_logfilenames[1], "r")
        hosts1 = read_data(in1, "trunk")
        write_graph(f, hosts1, "trunk", last=False)
        in1.close()

        in2 = gzip.open(nsock_logfilenames[1], "r")
        hosts2 = read_data(in2, "nsock")
        write_graph(f, hosts2, "nsock")
        in2.close()
        f.close()
        p = subprocess.Popen([GNUPLOT_PATH, "gnuplot.tmp"],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        plotfile_path = "%s/%s.png" % (LOGFILE_DIR, makefilename(args))
        gnuplot_output, gnuplot_stderr = p.communicate()
        print(gnuplot_stderr)
        open(plotfile_path, "w").write(gnuplot_output)
        return [plotfile_path]
    else:
        return []


def pull_and_build_branch(branch, directory):
    cmd = ("( svn co 'https://svn.nmap.org/%s'"
           " && cd %s && ./configure && make ) >buildlog 2>&1") % (branch,
                                                                   directory)
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    output = p.communicate()[0]
    p.poll()
    if p.returncode != 0:
        sys.exit("'%s' failed with returncode %d: %s" % (cmd, p.returncode,
                                                         output))

if __name__ == "__main__":

    pull_and_build_branch("nmap", "nmap")
    pull_and_build_branch("nmap-exp/d33tah/nmap-nsock-scan", "nmap-nsock-scan")

    filenames = []
    filenames += run_test("localhost")
    filenames += run_test("scanme.nmap.org", print_graph=True)
    filenames += run_test("127.0.0.128/31 -p-")
    filenames += run_test("localhost -p1-10 --max-rate=1")
    filenames += run_test("localhost -p1-10 --scan-delay=1s")

    # Other ideas for tests:
    #
    # * simulating congestion by dropping 10% of packets and instead of looking
    #   for incosistencies, measure how many open ports were found (and how
    #   many retries were attempted - maybe also to make sure that limits are
    #   respected?)
    #
    # * scanme.nmap.org --top-ports=10?
    # * scanme.nmap.org -F?
    # * scanme.nmap.org --top-ports=10000?
    # * scanme.nmap.org -p-?
    #
    # * I got interesting results while scanning scanme.nmap.org along with
    #   8.8.8.8 - my branch is much slower than SVN trunk.
    #
    # * -S
    # * -e
    # * TCP lingering
    # * --ip-options
    # * --proxies
    #
    # * progress info (maybe measure how much off the predictions were?)
    # * retransmissions
    # * timeouts
    # * rate limiting
    # * netem simulations
    #
    # * --min-rate
    # * --scan-delay
    # * --max-scan-delay
    # * --min-rate and --max-rate (effectively 1/--scan delay)

    subprocess.call("rm -rf ./nmap ./nmap-nsock-scan",
                    shell=True)