import hashlib import struct from scapy.all import * # Returns only the first reply (if any), otherwise None. def get_reply(rs, probe_name): packets = rs[probe_name] if packets is None or len(packets) == 0: return None return packets[0] def find_tcp(packet): payload = packet while payload and type(payload) != TCP: payload = payload.payload if not payload: return None return payload def vectorize_tcp_window(ip): tcp = find_tcp(ip) if tcp is None: return None return tcp.window def make_vectorize_tcp_flag(flag): def fn(ip): tcp = find_tcp(ip) if tcp is None: return None # Get the reserved bits too. flags, = struct.unpack_from(">H", str(tcp), 12) flags = flags & 0xfff if flags & flag == 0: return 0 else: return 1 return fn # Shorthand tests that just need to get a value from a single response. INDIVIDUAL_TESTS = { "TCPWINDOW": vectorize_tcp_window, "TCPFLAG_F": make_vectorize_tcp_flag(1 << 0), "TCPFLAG_S": make_vectorize_tcp_flag(1 << 1), "TCPFLAG_R": make_vectorize_tcp_flag(1 << 2), "TCPFLAG_P": make_vectorize_tcp_flag(1 << 3), "TCPFLAG_A": make_vectorize_tcp_flag(1 << 4), "TCPFLAG_U": make_vectorize_tcp_flag(1 << 5), "TCPFLAG_E": make_vectorize_tcp_flag(1 << 6), "TCPFLAG_C": make_vectorize_tcp_flag(1 << 7), "TCPFLAG_RES8": make_vectorize_tcp_flag(1 << 8), "TCPFLAG_RES9": make_vectorize_tcp_flag(1 << 9), "TCPFLAG_RES10": make_vectorize_tcp_flag(1 << 10), "TCPFLAG_RES11": make_vectorize_tcp_flag(1 << 11), } def vectorize_hash(rs): h = hashlib.md5() if rs.osclass is not None: for part in rs.osclass: h.update(part) digest = h.digest() n, = struct.unpack_from(">H", digest) return n # These functions take a whole ResponseSet. COMBINED_TESTS = { "HASH": vectorize_hash, } def vectorize_unit(feature_name, rs): parts = feature_name.split(".") indiv = INDIVIDUAL_TESTS.get(parts[-1]) if indiv is not None: assert(len(parts) == 2) ip = get_reply(rs, parts[0]) return indiv(ip) combined = COMBINED_TESTS.get(feature_name) if combined is not None: return combined(rs) raise ValueError("Don't know how to vectorize feature %s" % repr(feature_name)) def vectorize(feature_set, response_set): rs = response_set vector = [] for feature_name in feature_set: vector.append(vectorize_unit(feature_name, rs)) return vector