author = "Patrick Donnelly " categories = {"intrusive", "default"}; -- FIXME runlevel = 2; -- Run after most basic scripts description = [[ ]]; --- -- @args parallelism blah blah local assert = assert; local setmetatable = setmetatable; local type = type; local table = require "table"; local nmap = require "nmap"; local http = require "http"; function portrule (host, port) local svc = { std = { ["http"] = 1, ["http-alt"] = 1, }, ssl = { ["https"] = 1, ["https-alt"] = 1, }, }; local service, st = port.service, port.version.service_tunnel; if port.protocol == "tcp" and (svc.std[service] or svc.ssl[service]) then -- Don't bother running on SSL ports if we don't have SSL. if (svc.ssl[service] or st == "ssl") and not nmap.have_ssl() then return false; else return true; end end return false; end local Output = { number_objects = 0, number_html = 0, number_css = 0, number_php = 0, number_jpg = 0, }; function Output:new () local object = { extensions = setmetatable({}, {__index = function (t, k) return 0 end}), }; return setmetatable(object, {__index = Output, __metatable = Output}); end function Output:add_path (path) assert(type(path) == "string"); print("ADD_PATH", path) self.number_objects = self.number_objects+1; if path:find "%.(%w+)$" then -- extension? local extension = path:match("%.(%w+)$"):lower(); self.extensions[extension] = self.extensions[extension]+1; print ("EXTENSION", extension) end -- Ideas: -- Most common prefix in basename -- Most common suffix in basename -- Most common word or phrase in basename -- [DONE] Print number of objects for each type of extension if > 0 -- Page with most images? -- Page that links to the most objects -- Number of offsite links -- Most common offsite link -- Pages crawled (w/ ratio to # of objects seen) -- Most commonly linked page end function Output:dump () local out = {"Number of objects seen "..self.number_objects..".\n"}; -- Extension Statistics local sorted_extensions = {}; for extension, number in pairs(self.extensions) do print(extension, number) sorted_extensions[#sorted_extensions+1] = { extension = extension, number = number }; end table.sort(sorted_extensions, function (e1, e2) return e1.number < e2.number; end); local printed_extensions = nmap.verbosity()*5; if printed_extensions == 0 then printed_extensions = 3 end if printed_extensions > #sorted_extensions then printed_extensions = #sorted_extensions end for i = 1, printed_extensions do local n = #sorted_extensions-i+1; out[#out+1] = sorted_extensions[n].number.." ".. sorted_extensions[n].extension.." objects seen.\n"; end if #sorted_extensions > printed_extensions then local remaining, number = #sorted_extensions-5, 0; for i = remaining, 1, -1 do number = number+sorted_extensions[i].number end out[#out+1] = number.." other objects with "..remaining.. " different extensions.\n"; end return table.concat(out); end function action (host, port) local spider = http.spider.all(host, port, {max_requests = 100, thread_parallelism = 3, sleep = 5}, "/", "/index.html"); local output = Output:new(); while true do local path, from_path, from_response = spider(); print("SPIDER", path, from_path, from_response); if not path then break end output:add_path(path); end return host.name.."\n"..output:dump(); end