description = [[ This script will crawl a web server and display a list of all the files found. This script is useful to determine all the possible file-targets/attack-surface when auditing web applications. ]] --- -- @usage -- nmap -p80 --script http-sitemap-dump -- -- @output --- author = "Paulino Calderon" license = "Same as Nmap--See http://nmap.org/book/man-legal.html" categories = {"discovery", "intrusive"} require "http" require "shortport" require "stdnse" portrule = shortport.http local DEFAULT_PATH = "/" local OPT_PATH = stdnse.get_script_args("http-sitemap-dump.basepath") or DEFAULT_PATH local output_lns={} local links_list = {} local visited_links = {} --Checks if URL is an absolute address --@param url URL String --@return True if "http://" is found local function is_url_absolute(url) if string.find(url, "http://") then return true else return false end end --Checks if given string is a relative url --@param url URL String --@return True if url is a relative url local function is_url_relative(url) if is_url_absolute(url) then return false end return true end --Returns the url including the script name without parameters. --@param uri URL String --@return URL without parameters local function remove_query(uri) local url_frags, abs_url url_frags = url.parse(uri) if url_frags.scheme and url_frags.authority and url_frags.path then abs_url = url_frags.scheme.."://"..url_frags.authority..url_frags.path else abs_url = uri end return abs_url end --Checks if link is anchored () --Example: "#linkPointingInsideOfDocument" --@param url URL String --@return True if url is an anchored link local function is_link_anchored(url) if string.sub(url, 1, 1) == "#" then return true end return false end --Checks if link is local. --@param url_parts --@param host --@return True if link is local local function is_link_local(url_parts, host) if url_parts.authority and not(url_parts.authority == stdnse.get_hostname(host) or url_parts.authority == "www."..stdnse.get_hostname(host)) then return false end return true end --Checks if link is malformed --This function looks for: --*Links that are too long --*Links containing html code --*Links with mailto tags -- --@param url URL String --@return True if link seems malformed local function is_link_malformed(url) --check for links that are too long if string.len(url)>100 then return true end --check if brackets are found (indicating html code in link) if string.find(url, "[<>]") ~= nil then return true end --check for mailto tag if string.find(url, "mailto:") ~= nil then return true end --check if its a javascript action if string.find(url, "javascript:") ~= nil then return true end return false end --Checks if a link is crawable --Criteria: --*Must be a local link --*Must be valid --*Must be a link pointing outside a document --@param uri URL String --@param host Host table --@return True if link meets criteria to be crawlable local function is_link_crawlable(uri, host) local url_frags url_frags = url.parse(uri) if not(is_link_local(url_frags, host)) or is_link_anchored(uri) or is_link_malformed(uri) then return false end return true end --Parses the href attribute of the tags inside the body --@param body HTML Body --@return Table of href links found in document local function get_href_links(body) local href_links = {} body = string.lower(body) for l in string.gfind(body, 'href%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]') do table.insert(href_links, l) end return href_links end --Checks if url contains a blacklisted extension --Maybe whitelist approach will work better --@param ext Url extension --@return True if the url contains a invalid extension local function is_url_extension_blacklisted(ext) local banned_extensions = {} if ext then ext = string.lower(ext) end for _, banned_ext in pairs(banned_extensions) do if ext == banned_ext then return true end end return false end --Gets current path of URL --@param url URL String --@return Path string excluding OPT_PATH local function get_current_path(uri, host) local base_path_frags, base_path_frags_num, path_frags, path_frags_num local current_path="" base_path_frags = url.parse_path("http://"..stdnse.get_hostname(host)..OPT_PATH) path_frags = url.parse_path(uri) base_path_frags_num = #base_path_frags path_frags_num = #path_frags for i = base_path_frags_num+1, path_frags_num-1, 1 do current_path = current_path..path_frags[i].."/" end return current_path end --Extracts file extension from URL --@param uri URL String --@return URL Extension local function get_url_extension(uri) local page_ext, ext_offset, url_frags -- Parse file extension if available url_frags=url.parse(uri) if url_frags ~= nil then ext_offset = string.find(url_frags.path, "%.(.*)") if ext_offset ~= nil then page_ext = string.sub(url_frags.path, ext_offset) else page_ext = "" end end return page_ext end --Downloads a page and stores its information in the global table "links_list" --@param host Host table --@param port Port number --@param url URL String local function download_page(host, port, uri) local page_ext, ext_offset, page_resp -- Parse file extension if available page_ext = get_url_extension(uri) -- Checks if url ext is blacklisted to save requests if is_url_extension_blacklisted(page_ext) then stdnse.print_debug(2, "Skipping %s", uri) return false else if uri ~= nil then stdnse.print_debug(2, "HTTP GET %s", uri) end --Append trailing path if missing if uri == "http://"..stdnse.get_hostname(host) or uri == "http://www."..stdnse.get_hostname(host) then uri = uri .. "/" end page_resp = http.get(host, port, uri) end --301,302,303 Redirections if page_resp.status == 301 or page_resp.status == 302 or page_resp.status == 303 then local new_target stdnse.print_debug(2, "HTTP REDIRECTION %s DETECTED", page_resp.status) if page_resp.header["location"] and not(visited_links[page_resp.header["location"]]) then new_target = page_resp.header["location"].."/" stdnse.print_debug(2, "Redirecting to: %s", new_target) -- Parse file extension if available page_ext = get_url_extension(new_target) -- Checks if url ext is blacklisted to minimize requests if is_url_extension_blacklisted(page_ext) then stdnse.print_debug(2, "Skipping %s", new_target) return false else return new_target end end end stdnse.print_debug(3, "%s returned:\n %s", uri, page_resp.body) -- Store page info in crawled list links_list[uri] = {["uri"]=uri, ["status"]=page_resp.status, ["ext"]=page_ext, ["content"]=page_resp.body} return true end --Crawls given URL until it find all local links --@param uri URL --@param options Options table --@return Table of crawled pages and its information local function crawl(uri, cur_path, options) local href_links, url_parts local hostname_str = stdnse.get_hostname(options["host"]) stdnse.print_debug(2, "Crawling %s", uri) if not(is_link_crawlable(uri, options["host"])) then stdnse.print_debug(2, "Ignoring uri: %s", uri) return end --Normalize urls by only using absolute urls if is_url_relative(uri) then uri = url.absolute("http://"..hostname_str..cur_path, uri) end uri = remove_query(uri) cur_path = get_current_path(uri, options["host"]) --Download URI and extract links local download_page_res = download_page(options["host"], options["port"], uri) if not(download_page_res) then return -- if a redirect was detected then update current path elseif type(download_page_res) == "string" then local new_target = remove_query(download_page_res) if visited_links[new_target] == nil then cur_path = get_current_path(new_target, options["host"]) visited_links[new_target] = true crawl( new_target, cur_path, options) end return end href_links = get_href_links(links_list[uri]["content"]) --Iterate through link list for i, href_link in ipairs(href_links) do stdnse.print_debug(2, "HREF tag found: %s", href_link) if is_url_relative(href_link) then href_link = url.absolute("http://"..hostname_str.."/"..OPT_PATH..cur_path, href_link) end if href_link == "http://www."..hostname_str or href_link == "http://"..hostname_str then href_link = href_link .. "/" end --Recursive crawl when a link hasn't been visited if visited_links[href_link] == nil then visited_links[href_link]=true crawl( href_link, cur_path, options) end end end --Returns a list with all the crawled pages and its information --@return Table of crawled pages local function get_page_list() return links_list end --[[ --MAIN --]] action = function(host, port) local options, pages --Sets options and starts crawler options = {host = host, port = port, allow_remote=false} crawl(OPT_PATH, "", options) --Iterate through page list to find php files and send the attack vector pages = get_page_list() for _,pg in pairs(pages) do if pg["status"] then output_lns[#output_lns + 1] = pg["uri"] end end if #output_lns > 0 then return stdnse.strjoin("\n", output_lns) end end