--- -- Formula functions for various calculations. -- -- The library lets scripts to use common mathematical functions to compute percentages, -- averages, entropy, randomness and other calculations. Scripts that generate statistics -- and metrics can also make use of this library. -- -- Functions included: -- -- calcPwdEntropy - Calculate the entropy of a password. A random -- password's information entropy, H, is given by the formula: H = L * (logN) / (log2), -- where N is the number of possible symbols and L is the number of symbols in the -- password. Based on https://en.wikipedia.org/wiki/Password_strength -- -- looksRandom - Returns true if the value looks random. -- -- EditDistance - Finds the edit distance between two strings or tables. -- Edit distance is the minimum number of edits needed to transform one string or table -- into the other. The implementation was taken from Brett Smith: https://gist.github.com/Nayruden/427389 -- -- Parameters: -- s - A *string* or *table*. -- t - Another *string* or *table* to compare against s. -- lim - An *optional number* to limit the function to a maximum edit distance. If specified -- and the function detects that the edit distance is going to be larger than limit, limit -- is returned immediately. -- -- Returns: -- -- A *number* specifying the minimum edits it takes to transform s into t or vice versa. Will -- not return a higher number than lim, if specified. -- -- Example: -- -- :editDistance( "Tuesday", "Teusday" ) -- One transposition. -- :editDistance( "kitten", "sitting" ) -- Two substitutions and a deletion. -- -- returns... -- -- :1 -- :3 -- -- Notes: -- -- * Complexity is O( (#t+1) * (#s+1) ) when lim isn't specified. -- * This function can be used to compare array-like tables as easily as strings. -- * The algorithm used is Damerau–Levenshtein distance, which calculates edit distance based -- off number of subsitutions, additions, deletions, and transpositions. -- * Source code for this function is based off the Wikipedia article for the algorithm -- . -- * This function is case sensitive when comparing strings. -- * If this function is being used several times a second, you should be taking advantage of -- the lim parameter. -- * Using this function to compare against a dictionary of 250,000 words took about 0.6 -- seconds on my machine for the word "Teusday", around 10 seconds for very poorly -- spelled words. Both tests used lim. -- -- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html --- local stdnse = require "stdnse" local table = require "table" _ENV = stdnse.module("formulas", stdnse.seeall) calcPwdEntropy = function(value) local total, hasdigit, haslower, hasupper, hasspaces = 0, 0, 0, 0, false if string.find(value, "%d") then hasdigit = 1 end if string.find(value, "%l") then haslower = 1 end if string.find(value, "%u") then hasupper = 1 end if string.find(value, ' ') then hasspaces = true end -- The values 10, 26, 26 have been taken from Wikipedia's entropy table. local total = hasdigit * 10 + hasupper * 26 + haslower * 26 local entropy = math.floor(math.log(total) * #value / math.log(2)) return entropy end -- A chi-square test for the null hypothesis that the members of data are drawn -- from a uniform distribution over num_cats categories. local function chi2(data, num_cats) local bins = {} local x2, delta, expected for _, x in ipairs(data) do bins[x] = bins[x] or 0 bins[x] = bins[x] + 1 end expected = #data / num_cats x2 = 0.0 for _, n in pairs(bins) do delta = n - expected x2 = x2 + delta * delta end x2 = x2 / expected return x2 end -- Split a string into a sequence of bit strings of the given length. -- splitbits("abc", 5) --> {"01100", "00101", "10001", "00110"} -- Any short final group is omitted. local function splitbits(s, n) local seq local _, bits = bin.unpack("B" .. #s, s) seq = {} for i = 1, #bits - n, n do seq[#seq + 1] = bits:sub(i, i + n - 1) end return seq end -- chi-square cdf table at 0.95 confidence for different degrees of freedom. -- >>> import scipy.stats, scipy.optimize -- >>> scipy.optimize.newton(lambda x: scipy.stats.chi2(dof).cdf(x) - 0.95, dof) local CHI2_CDF = { [3] = 7.8147279032511738, [15] = 24.99579013972863, [255] = 293.2478350807001, } function looksRandom(data) local x2 -- Because our sample is so small (only 16 bytes), do a chi-square -- goodness of fit test across groups of 2, 4, and 8 bits. If using only -- 8 bits, for example, any sample whose bytes are all different would -- pass the test. Using 2 bits will tend to catch things like pure -- ASCII, where one out of every four samples never has its high bit -- set. x2 = chi2(splitbits(data, 2), 4) if x2 > CHI2_CDF[3] then return false end x2 = chi2(splitbits(data, 4), 16) if x2 > CHI2_CDF[15] then return false end x2 = chi2({string.byte(data, 1, -1)}, 256) if x2 > CHI2_CDF[255] then return false end return true end --[[ Function: editDistance Finds the edit distance between two strings or tables. Edit distance is the minimum number of edits needed to transform one string or table into the other. Revisions: v1.00 - Initial. ]] function editDistance( s, t, lim ) local s_len, t_len = #s, #t -- Calculate the sizes of the strings or arrays if lim and math.abs( s_len - t_len ) >= lim then -- If sizes differ by lim, we can stop here return lim end -- Convert string arguments to arrays of ints (ASCII values) if type( s ) == "string" then s = { string.byte( s, 1, s_len ) } end if type( t ) == "string" then t = { string.byte( t, 1, t_len ) } end local min = math.min -- Localize for performance local num_columns = t_len + 1 -- We use this a lot local d = {} -- (s_len+1) * (t_len+1) is going to be the size of this array -- This is technically a 2D array, but we're treating it as 1D. Remember that 2D access in the -- form my_2d_array[ i, j ] can be converted to my_1d_array[ i * num_columns + j ], where -- num_columns is the number of columns you had in the 2D array assuming row-major order and -- that row and column indices start at 0 (we're starting at 0). for i=0, s_len do d[ i * num_columns ] = i -- Initialize cost of deletion end for j=0, t_len do d[ j ] = j -- Initialize cost of insertion end for i=1, s_len do local i_pos = i * num_columns local best = lim -- Check to make sure something in this row will be below the limit for j=1, t_len do local add_cost = (s[ i ] ~= t[ j ] and 1 or 0) local val = min( d[ i_pos - num_columns + j ] + 1, -- Cost of deletion d[ i_pos + j - 1 ] + 1, -- Cost of insertion d[ i_pos - num_columns + j - 1 ] + add_cost -- Cost of substitution, it might not cost anything if it's the same ) d[ i_pos + j ] = val -- Is this eligible for tranposition? if i > 1 and j > 1 and s[ i ] == t[ j - 1 ] and s[ i - 1 ] == t[ j ] then d[ i_pos + j ] = min( val, -- Current cost d[ i_pos - num_columns - num_columns + j - 2 ] + add_cost -- Cost of transposition ) end if lim and val < best then best = val end end if lim and best >= lim then return lim end end return d[ #d ] end return _ENV