-- TODO validate results - make sure we're not catching false positives...
description = [[
Performs an IP address search at Bing.com to discover hostnames serving content
indexed by Bing e.g. web vhosts and subdomains. A list of hostnames associated
with a target IP address may be useful in the information gathering phase of a
pentest. Discovered host names are appended to a file and a summary is printed
at the end of the scan. This script queries the mobile bing interface which
yields smaller http responses than the standard web interface.
Important
At the moment, parallelism of script instances is prevented by way of mutex.
This is necessary to avoid behaviour which might be perceived as abuse and so
you are advised not to use this script with large numbers of targets if you
require scan times to be short. As a rule-of-thumb, scan length may be 1s per
search result page per target.
Default Behaviours
There are no script arguments defined for this script, but there are some
variables that may readily be changed.
MAXRESULTS = 250
- the number of search result entries to be parsed
is not necessarily the maximum number of unique host names collected due to
duplicates in the results i.e. the number collected is likely to be much less.
This number represents 25 pages of results showing 10 entries each (although the
actual number of results per page may vary) and if the scan result reports that
25 pages were parsed you may wish to increase MAXRESULTS
until it
parses less than it could have.
OUTFILE = 'scripts/data/ipsearch-$target.list'
- the relative path
to the file to which discovered host names are written. The token
$target
will be replaced with the host name or IP address given to
the nmap command. Make sure that the scripts/data/ directory exists or the file
will not be created. If the file exists, it will be appended to.
PATTERN
and SUBPATTERN
- the html markup of Bing.com
IP search results may change at any time (just like the hyperlinks on any of
microsoft's websites do) and this pattern may need to be changed to suit.
PATTERN
contains a single capture and the value captured will be
further parsed using SUBPATTERN
to, hopefully, extract a valid host
name.
]]
---
-- @usage
-- nmap -n -Pn -sn --script ipsearch
--
-- @output
-- Host script results:
-- | ipsearch: parsed 3 pages and dumped 2 host names to:
-- |_scripts/data/ipsearch-insecure.org.list
--
-- Host script results:
-- | ipsearch: parsed 11 pages and dumped 59 host names to:
-- |_scripts/data/ipsearch-xn--8-qeuua7h2gr823avwxa.com.list
id = 'ipsearch'
author = 'jah'
runlevel = '1'
categories = {'discovery', 'external'}
-- search results parsed - not necessarily the number of valid, unique host names
local MAXRESULTS = 250
-- file pattern to which discovered host names are written
local OUTFILE = 'scripts/data/ipsearch-$target.list'
-- the pattern used to capture host names from search result pages
local PATTERN = 'class="s15">%s*= MAXRESULTS)
mutex 'done'
result.numpages = pagenum -- first page was pagenum zero
return num_results > 0 and summary(result, host) or nil
end
---
-- Sends a single GET request to bing, parses the result and stores unique host
-- names found in the response.
--
-- @param o table of options and values passed from action.
-- @param r table in which parsed search results will be stored.
-- @return number of search result entries parsed in the response to a single
-- request. This will be zero when there are error conditions.
function do_req( o, r )
local count = 0
local unique = o.uniq
local response = http.get_url( o.req, o.http )
if not response then
stdnse.print_debug(1, 'ipsearch: nil response to %s', url)
return true, 0
end
if type( response.status ) ~= 'number' then
stdnse.print_debug(1, 'ipsearch: non-numeric Status in response to %s', url)
return true, 0
end
if type( response.body ) ~= 'string' then
stdnse.print_debug(1, 'ipsearch: non-string Body in response to %s', url)
return true, 0
end
if response.status ~= 200 then
stdnse.print_debug(2, 'ipsearch: HTTP Status $d in response to %s',
response.status, url)
return true, 0
end
-- look for hostnames in the response body using PATTERN
-- try and extract just the valid host names
local h, valid, len, canon
for h in response.body:gmatch(PATTERN) do
canon = canonicalise(h)
count = count + 1 -- we're counting 'search results' not valid host names
--h = h:gsub('%s', '')
valid = canon:match('target=http://([a-zA-Z0-9%.%-_]+)/')
len = valid and valid:len() or 0
if len >= 3 and len <= 255 then
if (r[valid] == nil) then
r[valid] = valid -- unique hosts
o.uniq = o.uniq + 1
o.flagged = false
end
else
stdnse.print_debug('ipsearch: cannot handle hostname: %s',
replace_nonprint(canon))
end
end
-- is bing being weird? sometimes it will show the same results on every page.
if o.uniq == unique then
-- no more hosts added from this page
if o.flagged == true then
return true, 0 -- second consec time it happened
else
o.flagged = true -- try one more page
end
end
return true, count
end
---
-- Checks that there are some results to write to a file and generates a short
-- summary of them for printing to the host script results table.
--
-- @param r table of results.
-- @param host target host table.
-- @return string summary for printing or nil if there were no results.
function summary( r, host )
local t = {}
local knownAlreadyFlag = false
local numpages = r.numpages
r.numpages = nil
for _, hostname in pairs(r) do
if hostname ~= host.targetname and
hostname ~= host.name then
t[#t+1] = hostname
else
knownAlreadyFlag = true
end
end
if #t < 1 then
if knownAlreadyFlag == true then
stdnse.print_debug(
'ipsearch: The Scan Target Name was the only result for IP search:%s',
host.ip
)
else
stdnse.print_debug(2, 'ipsearch: No results for IP search:%s', host.ip)
end
return nil
end
filename = write(t, host.targetname or host.ip)
if filename == nil then
stdnse.print_debug(
'ipsearch: Unable to write results to file for IP search:%s\nResults\n%s',
host.ip, table.concat(t, '\n')
)
return nil
end
return ('parsed %d pages and dumped %d host name%s to:\n%s'):format(
numpages, #t, #t>1 and 's' or '', filename
)
end
---
-- Writes the supplied list of results to a file.
--
-- @param t table of host names to write to file.
-- @param target string target name which form part of the file to be written
-- to if the $target
token is present in the
-- OUTFILE
variable.
-- @return string filepath that was written to or nil if there was an io error.
function write( t, target )
local fname = (OUTFILE):gsub('$target', target)
local f, err, _
local filepath = nmap.fetchfile(fname)
if filepath then
f, err, _ = io.open(filepath, 'a')
if not f then
stdnse.print_debug('ipsearch: Error opening %s for appending: %s.',
fname, err)
return nil
end
else
f, err, _ = io.open(
nmap.fetchfile('nmap-services'):sub(1,-14) .. fname, 'w'
)
if not f then
stdnse.print_debug('ipsearch: Error creating %s for writing: %s.',
fname, err)
return nil
end
end
f:write( table.concat(t, '\n') .. '\n' )
f:close()
return fname
end
---
-- helper.
--
-- @see banner.replace_nonprint
function replace_nonprint( s, len )
local t = {}
local count = 0
for c in s:gmatch(".") do
if c:byte() < 32 or c:byte() > 126 then
t[#t+1] = ('\\x%s'):format(
('0%s'):format(
((stdnse.tohex(c:byte())):upper())
):sub(-2,-1)
)
count = count+4
else
t[#t+1] = c
count = count+1
end
if type(len) == 'number' and count >= len then break end
end
return table.concat(t)
end
---
-- Canonicalises the supplied string using one or more decoders which are passed
-- to string.gsub
as their second argument.
--
-- Currently decodes Percent encoding only. e.g. %25252f is '/' encoded thrice
-- and the return value will be the canonical string '/'.
--
-- @param s string to be canonicalised.
-- @return canonicalised string.
function canonicalise(s)
local working, last = s, nil
repeat
last = working
working = working:gsub('%%[a-zA-Z0-9][a-zA-Z0-9]', percentDecode)
until working == last
return working
end
---
-- Decoder function which accepts a percent encoded character (e.g. '%2f') and
-- returns the decoded character (e.g. '/').
--
-- @param s string numeric hexadecimal percent entity
-- @return string decoded character.
function percentDecode(s)
local hex = s:sub(2,-1)
local ord = tonumber(hex, 16)
return string.char(ord)
end