#!/usr/bin/env python import re # "^%s*([^'\"%s{},=][^{},=]-)%s*[},=]" unquoted_re = re.compile(r'\s*([^\'"\s{},=][^{},=]*?)\s*([},=]|$)') # "^%s*(['\"])(.-[^\\])%1%s*[},=]" quoted_re = re.compile(r'\s*(([\'"])(.*?[^\\])\2)\s*([},=]|$)') # "^%s*(['\"])%1%s*[},=]" empty_re = re.compile(r'\s*(([\'"])\2)\s*([},=]|$)') def parse_string(s, start): """ Parses Individual string that are quoted,unquoted or empty quotes. It returns the found string alongwith the next starting position """ for pattern in unquoted_re, quoted_re, empty_re: m = pattern.match(s, start) or quoted_re.match(s, start) if m: return m.group(1), m.end(1) raise ValueError("No string found at %s." % repr(s[start:])) def next_char(s, start): """ Returns the next character and position,when the string and starting position is supplied """ while start < len(s) and s[start].isspace(): start += 1 if start < len(s): return s[start], start else: return None, start def parse_value(s, start): """ If the string starting from "start" is key/value pair it returns key,value and next position or else it returns the string alone""" nc, j = next_char(s, start) if nc == "{": j = parse_table(s, j) return s[start:j], j else: tmp, j = parse_string(s, j) nc, j = next_char(s, j) if nc == "=": # Key/value? j += 1 begin = j nc, j = next_char(s, j) if nc == "{": j = parse_table(s, j) else: dummy, j = parse_string(s, j) return (tmp, s[begin:j]), j else: return s[start:j], j def parse_table(s, start): """ This function is responsible for parsing table i.e strings that starts with '{'. It returns the position where the balancing pair of braces get closed""" nc, j = next_char(s, start) if not nc or nc != "{": raise ValueError("No '{' found at %s." % repr(s[start:])) j += 1 while True: nc, j = next_char(s, j) if nc == "}": # End of table. return j + 1 else: # Replace this with a call to parse_value. v, j = parse_value(s, j) nc, j = next_char(s, j) if nc == ",": j += 1 def parse_script_args(s): """ Main function responsible for parsing the script args and storing the key/value pairs in list.if invalid argument is present it stores the value as None""" args = [] nc, j = next_char(s, 0) try: while nc is not None: val, j = parse_value(s, j) if type(val) == str: raise ValueError("Only name-value pairs expected in parse_script_args.") else: args.append(val) nc, j = next_char(s, j) if nc == ",": j += 1 nc, j = next_char(s, j) except ValueError: return None return args def parse_script_args_dict(raw_argument): """ Wrapper function that copies the list key value pairs in list in to key:value pairs in dictionary and returns it""" args_dict = {} args = parse_script_args(raw_argument) if args is None: return None for item in args: if(len(item) == 2): # only key/value pairs are stored args_dict[item[0]] = item[1] return args_dict if __name__ == '__main__': TESTS = ( ('', []), ('a=b,c=d', [('a', 'b'), ('c', 'd')]), ('a="b=c"', [('a', '"b=c"')]), ('a="def\\"ghi"', [('a', '"def\\"ghi"')]), ('a={one,{two,{three}}}', [('a', '{one,{two,{three}}}')]), ('a={"quoted}quoted"}', [('a', '{"quoted}quoted"}')]), ('a="unterminated', None), ('user=foo,pass=",{}=bar",whois={whodb=nofollow+ripe},userdb=C:\\Some\\Path\\To\\File', [('user', 'foo'), ('pass', '",{}=bar"'), ('whois', '{whodb=nofollow+ripe}'), ('userdb', 'C:\\Some\\Path\\To\\File')]), ) for test, expected in TESTS: args_dict = parse_script_args_dict(test) print args_dict args = parse_script_args(test) if args == expected: print "PASS" , test continue print "FAIL", test if args is None: print "Parsing error" else: print "%d args" % len(args) for a, v in args: print a, "=", v if expected is None: print "Expected parsing error" else: print "Expected %d args" % len(expected) for a, v in expected: print a, "=", v