description = [[
This script crawls through the website to find any rss or atom feeds.
The script, by default, spiders and searches within forty pages. For large web
applications make sure to increase httpspider's maxpagecount
value.
Please, note that the script will become more intrusive though.
]]
---
-- @usage nmap -p80 --script http-feed.nse
--
-- @output
-- PORT STATE SERVICE REASON
-- 80/tcp open http syn-ack
-- | http-feed:
-- | Spidering limited to: maxpagecount=40; withinhost=some-random-page.com
-- | Found the following feeds:
-- | RSS (version 2.0): http://www.some-random-page.com/2011/11/20/feed/
-- | RSS (version 2.0): http://www.some-random-page.com/2011/12/04/feed/
-- | RSS (version 2.0): http://www.some-random-page.com/category/animalsfeed/
-- | RSS (version 2.0): http://www.some-random-page.com/comments/feed/
-- |_ RSS (version 2.0): http://www.some-random-page.com/feed/
---
categories = {"discovery", "intrusive"}
author = "George Chatzisofroniou"
license = "Same as Nmap--See http://nmap.org/book/man-legal.html"
local http = require "http"
local shortport = require "shortport"
local stdnse = require "stdnse"
local table = require "table"
local string = require "string"
local httpspider = require "httpspider"
portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open")
FEEDS = { RSS = { search = { '' }, version = 'version=["\'](.-)["\']' },
Atom = { search = { '' }, version = 'version=["\'](.-)["\']' },
}
FEEDS_REFS = { "type=[\"']application/rss%+xml[\"']%s*href=[\"'](.-)[\"']",
"type=[\"']application/rss%+xml[\"']%s*title=[\"'].-[\"']%s*href=[\"'](.-)[\"']",
"type=[\"']application/atom%+xml[\"']%s*href=[\"'](.-)[\"']",
"type=[\"']application/atom%+xml[\"']%s*title=[\"'].-[\"']%s*href=[\"'](.-)[\"']",
}
feedsfound = {}
checked = {}
-- Searches the resource for feeds.
local findFeeds = function(body, path)
if body then
for _, f in pairs(FEEDS) do
for __, pf in pairs(f["search"]) do
local c = string.match(body, pf)
if c then
-- Try to find feed's version.
if string.match(c, f["version"]) then
v = " (version " .. string.match(c, f["version"]) .. ")"
else
v = ""
end
feedsfound[path] = _ .. v .. ": "
end
end
end
end
checked[path] = true
end
action = function(host, port)
local maxpagecount = stdnse.get_script_args("maxpagecount") or 40
local crawler = httpspider.Crawler:new(host, port, '/', { scriptname = SCRIPT_NAME,
maxpagecount = maxpagecount,
maxdepth = -1,
withinhost = 1
})
crawler.options.doscraping = function(url)
if crawler:iswithinhost(url)
and not crawler:isresource(url, "js")
and not crawler:isresource(url, "css") then
return true
end
end
if (not(crawler)) then
return
end
crawler:set_timeout(10000)
local index, k, target, response, path
while (true) do
status, r = crawler:crawl()
-- if the crawler fails it can be due to a number of different reasons
-- most of them are "legitimate" and should not be reason to abort
if (not(status)) then
if (r.err) then
return stdnse.format_output(true, ("ERROR: %s"):format(r.reason))
else
break
end
end
response = r.response
path = tostring(r.url)
if response.body then
findFeeds(response.body, path)
for _, p in ipairs(FEEDS_REFS) do
for l in string.gmatch(response.body, p) do
if not checked[l] then
local resp
-- If this is an absolute URL, use get_url.
if string.match(l, "^http") then
resp = http.get_url(l)
else
resp = http.get(host, port, l)
end
if resp.body then
findFeeds(resp.body, l)
end
end
end
end
end
end
-- If the table is empty.
if next(feedsfound) == nil then
return "Couldn't find any feeds."
end
-- Create a nice output.
local results = {}
for c, _ in pairs(feedsfound) do
table.insert(results, {_ .. c } )
end
table.insert(results, 1, "Found the following feeds: ")
results.name = crawler:getLimitations()
return stdnse.format_output(true, results)
end