--- -- Library methods for handling punycode strings. -- -- Punycode is a simple and efficient transfer encoding syntax designed -- for use with Internationalized Domain Names in Applications (IDNA). -- It uniquely and reversibly transforms a Unicode string into an ASCII -- string. ASCII characters in the Unicode string are represented -- literally, and non-ASCII characters are represented by ASCII -- characters that are allowed in host name labels (letters, digits, and -- hyphens). This document defines a general algorithm called -- Bootstring that allows a string of basic code points to uniquely -- represent any string of code points drawn from a larger set. -- Punycode is an instance of Bootstring that uses particular parameter -- values specified by this document, appropriate for IDNA. -- -- Advantages of Bootstring algorithm are Completeness, Uniqueness, -- Reversibility, Efficient encoding, Simplicity and Readability. -- -- Portions of this library were adapted from punycode.js by Mathias Bynens -- under the MIT License. -- -- References: -- * http://ietf.org/rfc/rfc3492.txt -- * punycode.js: https://mths.be/punycode -- -- @author Rewanth Cool -- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html local stdnse = require "stdnse" local string = require "string" local math = require "math" local table = require "table" local unicode = require "unicode" local unittest = require "unittest" _ENV = stdnse.module("punycode", stdnse.seeall) -- Localize few functions for a tiny speed boost, since these will be -- used frequently. local floor = math.floor local byte = string.byte local char = string.char local find = string.find local match = string.match local reverse = string.reverse local sub = string.sub -- Highest positive signed 32-bit float value local maxInt = 0x7FFFFFFF -- Bootstring parameters local base = 0x24 local tMin = 0x1 local tMax = 0x1A local skew = 0x26 local damp = 0x2BC local initialBias = 0x48 local initialN = 0x80 local delimiter = char("0x2D") -- Convenience shortcuts local baseMinusTMin = base - tMin -- Bias adaptation function as per section 3.4 of RFC 3492. -- https://tools.ietf.org/html/rfc3492#section-3.4 -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. local function adapt(delta, numPoints, firstTime) local k = 0; if firstTime then delta = floor(delta / damp) else delta = (delta >> 1) end delta = delta + floor(delta / numPoints) while delta > (baseMinusTMin * tMax >> 1) do delta = floor(delta / baseMinusTMin) k = k + base end return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)) end -- The following function converts boolean value to integer. -- -- @param status boolean value is given as input. -- @return Returns 0/1 based on the given boolean input. local function boolToNum(status) if status == true then return 1 else return 0 end end -- This function converts a basic code point into a digit/integer. -- -- @param codePoint The basic numeric code point value. -- @return The numeric value of a basic code point (for use in -- representing integers) in the range `0` to `base - 1`, or `base` if -- the code point does not represent a value. -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. local function basicToDigit(codePoint) if (codePoint - 0x30 < 0x0A) then return codePoint - 0x16 end if (codePoint - 0x41 < 0x1A) then return codePoint - 0x41 end if (codePoint - 0x61 < 0x1A) then return codePoint - 0x61 end return base end -- This function converts a digit/integer into a basic code point. -- -- @param digit The numeric value of a basic code point. -- @return The basic code point whose value (when used for -- representing integers) is `digit`, which needs to be in the range -- `0` to `base - 1`. If `flag` is non-zero, the uppercase form is -- used; else, the lowercase form is used. The behavior is undefined -- if `flag` is non-zero and `digit` has no uppercase form. -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. local function digitToBasic(digit, flag) -- 0..25 map to ASCII a..z or A..Z -- 26..35 map to ASCII 0..9 return digit + 22 + 75 * boolToNum(digit < 26) - (boolToNum((flag ~= 0)) << 5) end -- Creates a string based on an array of numeric code points. -- -- @param input list-table of Unicode code points -- @param decoder Sets the decoding format to be used. -- @return The new encoded string -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. function encode_input(input) local output = {} -- Cache the length. local inputLength = #input -- Initialize the state. local n = initialN local delta = 0 local bias = initialBias -- Handle the basic code points. for _, v in ipairs(input) do if v < 0x80 then table.insert(output, char(v)) end end local basicLength = #output local handledCPCount = basicLength -- `handledCPCount` is the number of code points that have been handled -- `basicLength` is the number of basic code points. -- Finish the basic string with a delimiter unless it's empty. if (basicLength > 0) then table.insert(output, delimiter) end -- Main encoding loop: while (handledCPCount < inputLength) do -- All non-basic code points < n have been handled already. Find -- the next larger one: local m = maxInt for _, v in ipairs(input) do if v >= n and v < m then m = v end end -- Increase `delta` enough to advance the decoder's state to -- , but guard against overflow. local handledCPCountPlusOne = handledCPCount + 1 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) then --error('overflow') return nil, "Overflow exception occurred." end delta = delta + (m - n) * handledCPCountPlusOne n = m for _, currentValue in ipairs(input) do if currentValue < n then delta = delta + 1 --Move this down incase of wrong answer if delta > maxInt then --error("overflow") return nil, "Overflow exception occurred." end end if (currentValue == n) then -- Represent delta as a generalized variable-length integer. local q = delta local k = base repeat local t if k <= bias then t = tMin else if k >= bias + tMax then t = tMax else t = k - bias end end if q < t then break end local qMinusT = q - t local baseMinusT = base - t local ans = digitToBasic(t + qMinusT % baseMinusT, 0) table.insert(output, char(ans)) q = floor(qMinusT / baseMinusT) k = k + base until false local ans = digitToBasic(q, 0) table.insert(output, char(ans)) bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength) delta = 0 handledCPCount = handledCPCount + 1 end end delta = delta + 1 n = n + 1 end return table.concat(output, '') end -- Converts a Punycode string of ASCII-only symbols to a -- list-table of Unicode code points. -- -- @param input The Punycode string of ASCII-only symbols. -- @return The resulting list-table of Unicode code points. -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. function decode_input(input) local output = {} local inputLength = #input local i = 0 local n = initialN local bias = initialBias local basic if find(reverse(input), delimiter) then basic = #input - find(reverse(input), delimiter) else basic = -1 end if basic < 0 then basic = 0 end for j = 1, basic do local c = sub(input, j, j) local value = byte(c) if value >= 0x80 then --error("Not basic") return nil, "Not basic exception occurred." end table.insert(output, value) end local index if basic > 0 then index = basic + 1 else index = 0 end while index < inputLength do local oldi = i local w = 1 local k = base repeat if index >= inputLength then --error("Invalid input") return nil, "Invalid input exception occurred." end local c = sub(input, index+1, index+1) local value = byte(c) local digit = basicToDigit(value) index = index + 1 if (digit >= base or digit > floor((maxInt - i) / w)) then --error('overflow'); return nil, "Overflow exception occurred." end i = i + digit * w; local t if k <= bias then t = tMin else if k >= bias + tMax then t = tMax else t = k - bias end end if digit < t then break end local baseMinusT = base - t; if (w > floor(maxInt / baseMinusT)) then --error('overflow'); return nil, "Overflow exception occurred." end w = w * baseMinusT; k = k + base until false local out = #output + 1; bias = adapt(i - oldi, out, oldi == 0) -- `i` was supposed to wrap around from `out` to `0`, -- incrementing `n` each time, so we'll fix that now: if (floor(i / out) > maxInt - n) then --error('overflow'); return nil, "Overflow exception occurred." end n = n + floor(i / out); i = i % out; for temp = #output, i, -1 do output[temp+1] = output[temp] end output[i+1] = n i = i + 1 end return output end -- Performs punycode encoding on a label -- -- If the label is already ASCII, it is returned as a string. If any encoding -- was required, the "xn--" prefix is added. -- -- @param u A list-table of Unicode code points representing a domain label -- @return A punycode-encoded ASCII string function encode_label(u) local flag = false -- Looks for non-ASCII character for _, val in pairs(u) do if not (val >=0 and val <= 127) then flag = true break end end if flag then local res, err = encode_input(u) if err then return nil, err end return 'xn--' .. res else return unicode.encode(u, unicode.utf8_enc) end end --- Decodes a punycode-encoded label to Unicode. -- -- If the label starts with "xn--", it will be punycode-decoded. Otherwise, it -- will be decoded as UTF-8 (ASCII). The return value is always a table of -- Unicode code points. -- -- @param s String of input. -- @return A table of Unicode code points. function decode_label(s) if match(s, "^xn%-%-") then local res, err = decode_input(sub(s, 5)) if err then return nil, err end return res else return unicode.decode(s, unicode.utf8_dec) end end --Ignore the rest if we are not testing. if not unittest.testing() then return _ENV end -- Table of punycode test cases. local testCases = { { "xn--0zwm56d", "\xe6\xb5\x8b\xe8\xaf\x95" }, { "xn--knigsgsschen-lcb0w", "k\xc3\xb6nigsg\xc3\xa4sschen" }, { "xn--ab-fsf", "a\xe0\xa5\x8db" }, { "xn--maana-pta", "ma\xc3\xb1ana" }, { "xn----dqo34k", "\xe2\x98\x83-\xe2\x8c\x98" } } test_suite = unittest.TestSuite:new() -- Running test cases against Encoding function. for i, v in ipairs(testCases) do test_suite:add_test(unittest.equal(unicode.encode(decode_label(v[1]), unicode.utf8_enc), v[2])) test_suite:add_test(unittest.equal(encode_label(unicode.decode(v[2], unicode.utf8_dec)), v[1])) end return _ENV