/* ** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $ ** LPeg - PEG pattern matching for Lua ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) ** written by Roberto Ierusalimschy */ #if !defined(lptypes_h) #define lptypes_h #if !defined(LPEG_DEBUG) #define NDEBUG #endif #include #include #include "lua.h" #define VERSION "0.12" #define PATTERN_T "lpeg-pattern" #define MAXSTACKIDX "lpeg-maxstack" /* ** compatibility with Lua 5.2 */ #if (LUA_VERSION_NUM == 502) #undef lua_equal #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) #undef lua_getfenv #define lua_getfenv lua_getuservalue #undef lua_setfenv #define lua_setfenv lua_setuservalue #undef lua_objlen #define lua_objlen lua_rawlen #undef luaL_register #define luaL_register(L,n,f) \ { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } #endif /* default maximum size for call/backtrack stack */ #if !defined(MAXBACK) #define MAXBACK 100 #endif /* maximum number of rules in a grammar */ #define MAXRULES 200 /* initial size for capture's list */ #define INITCAPSIZE 32 /* index, on Lua stack, for subject */ #define SUBJIDX 2 /* number of fixed arguments to 'match' (before capture arguments) */ #define FIXEDARGS 3 /* index, on Lua stack, for capture list */ #define caplistidx(ptop) ((ptop) + 2) /* index, on Lua stack, for pattern's ktable */ #define ktableidx(ptop) ((ptop) + 3) /* index, on Lua stack, for backtracking stack */ #define stackidx(ptop) ((ptop) + 4) typedef unsigned char byte; #define BITSPERCHAR 8 #define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) typedef struct Charset { byte cs[CHARSETSIZE]; } Charset; #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } /* access to charset */ #define treebuffer(t) ((byte *)((t) + 1)) /* number of slots needed for 'n' bytes */ #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) /* set 'b' bit in charset 'cs' */ #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) /* ** in capture instructions, 'kind' of capture and its offset are ** packed in field 'aux', 4 bits for each */ #define getkind(op) ((op)->i.aux & 0xF) #define getoff(op) (((op)->i.aux >> 4) & 0xF) #define joinkindoff(k,o) ((k) | ((o) << 4)) #define MAXOFF 0xF #define MAXAUX 0xFF /* maximum number of bytes to look behind */ #define MAXBEHIND MAXAUX /* maximum size (in elements) for a pattern */ #define MAXPATTSIZE (SHRT_MAX - 10) /* size (in elements) for an instruction plus extra l bytes */ #define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) /* size (in elements) for a ISet instruction */ #define CHARSETINSTSIZE instsize(CHARSETSIZE) /* size (in elements) for a IFunc instruction */ #define funcinstsize(p) ((p)->i.aux + 2) #define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) #endif /* ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ */ #if !defined(lptree_h) #define lptree_h /* ** types of trees */ typedef enum TTag { TChar = 0, TSet, TAny, /* standard PEG elements */ TTrue, TFalse, TRep, TSeq, TChoice, TNot, TAnd, TCall, TOpenCall, TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ TGrammar, /* sib1 is initial (and first) rule */ TBehind, /* match behind */ TCapture, /* regular capture */ TRunTime /* run-time capture */ } TTag; /* number of siblings for each tree */ extern const byte numsiblings[]; /* ** Tree trees ** The first sibling of a tree (if there is one) is immediately after ** the tree. A reference to a second sibling (ps) is its position ** relative to the position of the tree itself. A key in ktable ** uses the (unique) address of the original tree that created that ** entry. NULL means no data. */ typedef struct TTree { byte tag; byte cap; /* kind of capture (if it is a capture) */ unsigned short key; /* key in ktable for Lua data (0 if no key) */ union { int ps; /* occasional second sibling */ int n; /* occasional counter */ } u; } TTree; /* ** A complete pattern has its tree plus, if already compiled, ** its corresponding code */ typedef struct Pattern { union Instruction *code; int codesize; TTree tree[1]; } Pattern; /* number of siblings for each tree */ extern const byte numsiblings[]; /* access to siblings */ #define sib1(t) ((t) + 1) #define sib2(t) ((t) + (t)->u.ps) #endif /* ** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ */ #if !defined(lpcap_h) #define lpcap_h /* kinds of captures */ typedef enum CapKind { Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup } CapKind; typedef struct Capture { const char *s; /* subject position */ short idx; /* extra info about capture (group name, arg index, etc.) */ byte kind; /* kind of capture */ byte siz; /* size of full capture + 1 (0 = not a full capture) */ } Capture; typedef struct CapState { Capture *cap; /* current capture */ Capture *ocap; /* (original) capture list */ lua_State *L; int ptop; /* index of last argument to 'match' */ const char *s; /* original string */ int valuecached; /* value stored in cache slot */ } CapState; int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); int getcaptures (lua_State *L, const char *s, const char *r, int ptop); int finddyncap (Capture *cap, Capture *last); #endif /* ** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $ */ #if !defined(lpvm_h) #define lpvm_h /* Virtual Machine's instructions */ typedef enum Opcode { IAny, /* if no char, fail */ IChar, /* if char != aux, fail */ ISet, /* if char not in buff, fail */ ITestAny, /* in no char, jump to 'offset' */ ITestChar, /* if char != aux, jump to 'offset' */ ITestSet, /* if char not in buff, jump to 'offset' */ ISpan, /* read a span of chars in buff */ IBehind, /* walk back 'aux' characters (fail if not possible) */ IRet, /* return from a rule */ IEnd, /* end of pattern */ IChoice, /* stack a choice; next fail will jump to 'offset' */ IJmp, /* jump to 'offset' */ ICall, /* call rule at 'offset' */ IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ ICommit, /* pop choice and jump to 'offset' */ IPartialCommit, /* update top choice to current position and jump */ IBackCommit, /* "fails" but jump to its own 'offset' */ IFailTwice, /* pop one choice and then fail */ IFail, /* go back to saved state on choice and jump to saved offset */ IGiveup, /* internal use */ IFullCapture, /* complete capture of last 'off' chars */ IOpenCapture, /* start a capture */ ICloseCapture, ICloseRunTime } Opcode; typedef union Instruction { struct Inst { byte code; byte aux; short key; } i; int offset; byte buff[1]; } Instruction; int getposition (lua_State *L, int t, int i); void printpatt (Instruction *p, int n); const char *match (lua_State *L, const char *o, const char *s, const char *e, Instruction *op, Capture *capture, int ptop); int verify (lua_State *L, Instruction *op, const Instruction *p, Instruction *e, int postable, int rule); void checkrule (lua_State *L, Instruction *op, int from, int to, int postable, int rule); #endif /* ** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $ */ #if !defined(lpcode_h) #define lpcode_h #include "lua.h" int tocharset (TTree *tree, Charset *cs); int checkaux (TTree *tree, int pred); int fixedlenx (TTree *tree, int count, int len); int hascaptures (TTree *tree); int lp_gc (lua_State *L); Instruction *compile (lua_State *L, Pattern *p); void reallocprog (lua_State *L, Pattern *p, int nsize); int sizei (const Instruction *i); #define PEnullable 0 #define PEnofail 1 #define nofail(t) checkaux(t, PEnofail) #define nullable(t) checkaux(t, PEnullable) #define fixedlen(t) fixedlenx(t, 0, 0) #endif /* ** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ */ #if !defined(lpprint_h) #define lpprint_h #if defined(LPEG_DEBUG) void printpatt (Instruction *p, int n); void printtree (TTree *tree, int ident); void printktable (lua_State *L, int idx); void printcharset (const byte *st); void printcaplist (Capture *cap, Capture *limit); #else #define printktable(L,idx) \ luaL_error(L, "function only implemented in debug mode") #define printtree(tree,i) \ luaL_error(L, "function only implemented in debug mode") #define printpatt(p,n) \ luaL_error(L, "function only implemented in debug mode") #endif #endif /* ** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include "lua.h" #include "lauxlib.h" #define captype(cap) ((cap)->kind) #define isclosecap(cap) (captype(cap) == Cclose) #define closeaddr(c) ((c)->s + (c)->siz - 1) #define isfullcap(cap) ((cap)->siz != 0) #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) /* ** Put at the cache for Lua values the value indexed by 'v' in ktable ** of the running pattern (if it is not there yet); returns its index. */ static int updatecache (CapState *cs, int v) { int idx = cs->ptop + 1; /* stack index of cache for Lua values */ if (v != cs->valuecached) { /* not there? */ getfromktable(cs, v); /* get value from 'ktable' */ lua_replace(cs->L, idx); /* put it at reserved stack position */ cs->valuecached = v; /* keep track of what is there */ } return idx; } static int pushcapture (CapState *cs); /* ** Goes back in a list of captures looking for an open capture ** corresponding to a close */ static Capture *findopen (Capture *cap) { int n = 0; /* number of closes waiting an open */ for (;;) { cap--; if (isclosecap(cap)) n++; /* one more open to skip */ else if (!isfullcap(cap)) if (n-- == 0) return cap; } } /* ** Go to the next capture */ static void nextcap (CapState *cs) { Capture *cap = cs->cap; if (!isfullcap(cap)) { /* not a single capture? */ int n = 0; /* number of opens waiting a close */ for (;;) { /* look for corresponding close */ cap++; if (isclosecap(cap)) { if (n-- == 0) break; } else if (!isfullcap(cap)) n++; } } cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ } /* ** Push on the Lua stack all values generated by nested captures inside ** the current capture. Returns number of values pushed. 'addextra' ** makes it push the entire match after all captured values. The ** entire match is pushed also if there are no other nested values, ** so the function never returns zero. */ static int pushnestedvalues (CapState *cs, int addextra) { Capture *co = cs->cap; if (isfullcap(cs->cap++)) { /* no nested captures? */ lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ return 1; /* that is it */ } else { int n = 0; while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ n += pushcapture(cs); if (addextra || n == 0) { /* need extra? */ lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ n++; } cs->cap++; /* skip close entry */ return n; } } /* ** Push only the first value generated by nested captures */ static void pushonenestedvalue (CapState *cs) { int n = pushnestedvalues(cs, 0); if (n > 1) lua_pop(cs->L, n - 1); /* pop extra values */ } /* ** Try to find a named group capture with the name given at the top of ** the stack; goes backward from 'cap'. */ static Capture *findback (CapState *cs, Capture *cap) { lua_State *L = cs->L; while (cap-- > cs->ocap) { /* repeat until end of list */ if (isclosecap(cap)) cap = findopen(cap); /* skip nested captures */ else if (!isfullcap(cap)) continue; /* opening an enclosing capture: skip and get previous */ if (captype(cap) == Cgroup) { getfromktable(cs, cap->idx); /* get group name */ if (lua_equal(L, -2, -1)) { /* right group? */ lua_pop(L, 2); /* remove reference name and group name */ return cap; } else lua_pop(L, 1); /* remove group name */ } } luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); return NULL; /* to avoid warnings */ } /* ** Back-reference capture. Return number of values pushed. */ static int backrefcap (CapState *cs) { int n; Capture *curr = cs->cap; pushluaval(cs); /* reference name */ cs->cap = findback(cs, curr); /* find corresponding group */ n = pushnestedvalues(cs, 0); /* push group's values */ cs->cap = curr + 1; return n; } /* ** Table capture: creates a new table and populates it with nested ** captures. */ static int tablecap (CapState *cs) { lua_State *L = cs->L; int n = 0; lua_newtable(L); if (isfullcap(cs->cap++)) return 1; /* table is empty */ while (!isclosecap(cs->cap)) { if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ pushluaval(cs); /* push group name */ pushonenestedvalue(cs); lua_settable(L, -3); } else { /* not a named group */ int i; int k = pushcapture(cs); for (i = k; i > 0; i--) /* store all values into table */ lua_rawseti(L, -(i + 1), n + i); n += k; } } cs->cap++; /* skip close entry */ return 1; /* number of values pushed (only the table) */ } /* ** Table-query capture */ static int querycap (CapState *cs) { int idx = cs->cap->idx; pushonenestedvalue(cs); /* get nested capture */ lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ if (!lua_isnil(cs->L, -1)) return 1; else { /* no value */ lua_pop(cs->L, 1); /* remove nil */ return 0; } } /* ** Fold capture */ static int foldcap (CapState *cs) { int n; lua_State *L = cs->L; int idx = cs->cap->idx; if (isfullcap(cs->cap++) || /* no nested captures? */ isclosecap(cs->cap) || /* no nested captures (large subject)? */ (n = pushcapture(cs)) == 0) /* nested captures with no values? */ return luaL_error(L, "no initial value for fold capture"); if (n > 1) lua_pop(L, n - 1); /* leave only one result for accumulator */ while (!isclosecap(cs->cap)) { lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ lua_insert(L, -2); /* put it before accumulator */ n = pushcapture(cs); /* get next capture's values */ lua_call(L, n + 1, 1); /* call folding function */ } cs->cap++; /* skip close entry */ return 1; /* only accumulator left on the stack */ } /* ** Function capture */ static int functioncap (CapState *cs) { int n; int top = lua_gettop(cs->L); pushluaval(cs); /* push function */ n = pushnestedvalues(cs, 0); /* push nested captures */ lua_call(cs->L, n, LUA_MULTRET); /* call function */ return lua_gettop(cs->L) - top; /* return function's results */ } /* ** Select capture */ static int numcap (CapState *cs) { int idx = cs->cap->idx; /* value to select */ if (idx == 0) { /* no values? */ nextcap(cs); /* skip entire capture */ return 0; /* no value produced */ } else { int n = pushnestedvalues(cs, 0); if (n < idx) /* invalid index? */ return luaL_error(cs->L, "no capture '%d'", idx); else { lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ lua_pop(cs->L, n - 1); /* remove other captures */ return 1; } } } /* ** Return the stack index of the first runtime capture in the given ** list of captures (or zero if no runtime captures) */ int finddyncap (Capture *cap, Capture *last) { for (; cap < last; cap++) { if (cap->kind == Cruntime) return cap->idx; /* stack position of first capture */ } return 0; /* no dynamic captures in this segment */ } /* ** Calls a runtime capture. Returns number of captures removed by ** the call, including the initial Cgroup. (Captures to be added are ** on the Lua stack.) */ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { int n, id; lua_State *L = cs->L; int otop = lua_gettop(L); Capture *open = findopen(close); assert(captype(open) == Cgroup); id = finddyncap(open, close); /* get first dynamic capture argument */ close->kind = Cclose; /* closes the group */ close->s = s; cs->cap = open; cs->valuecached = 0; /* prepare capture state */ luaL_checkstack(L, 4, "too many runtime captures"); pushluaval(cs); /* push function to be called */ lua_pushvalue(L, SUBJIDX); /* push original subject */ lua_pushinteger(L, s - cs->s + 1); /* push current position */ n = pushnestedvalues(cs, 0); /* push nested captures */ lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ if (id > 0) { /* are there old dynamic captures to be removed? */ int i; for (i = id; i <= otop; i++) lua_remove(L, id); /* remove old dynamic captures */ *rem = otop - id + 1; /* total number of dynamic captures removed */ } else *rem = 0; /* no dynamic captures removed */ return close - open; /* number of captures of all kinds removed */ } /* ** Auxiliary structure for substitution and string captures: keep ** information about nested captures for future use, avoiding to push ** string results into Lua */ typedef struct StrAux { int isstring; /* whether capture is a string */ union { Capture *cp; /* if not a string, respective capture */ struct { /* if it is a string... */ const char *s; /* ... starts here */ const char *e; /* ... ends here */ } s; } u; } StrAux; #define MAXSTRCAPS 10 /* ** Collect values from current capture into array 'cps'. Current ** capture must be Cstring (first call) or Csimple (recursive calls). ** (In first call, fills %0 with whole match for Cstring.) ** Returns number of elements in the array that were filled. */ static int getstrcaps (CapState *cs, StrAux *cps, int n) { int k = n++; cps[k].isstring = 1; /* get string value */ cps[k].u.s.s = cs->cap->s; /* starts here */ if (!isfullcap(cs->cap++)) { /* nested captures? */ while (!isclosecap(cs->cap)) { /* traverse them */ if (n >= MAXSTRCAPS) /* too many captures? */ nextcap(cs); /* skip extra captures (will not need them) */ else if (captype(cs->cap) == Csimple) /* string? */ n = getstrcaps(cs, cps, n); /* put info. into array */ else { cps[n].isstring = 0; /* not a string */ cps[n].u.cp = cs->cap; /* keep original capture */ nextcap(cs); n++; } } cs->cap++; /* skip close */ } cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ return n; } /* ** add next capture value (which should be a string) to buffer 'b' */ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); /* ** String capture: add result to buffer 'b' (instead of pushing ** it into the stack) */ static void stringcap (luaL_Buffer *b, CapState *cs) { StrAux cps[MAXSTRCAPS]; int n; size_t len, i; const char *fmt; /* format string */ fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ for (i = 0; i < len; i++) { /* traverse them */ if (fmt[i] != '%') /* not an escape? */ luaL_addchar(b, fmt[i]); /* add it to buffer */ else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ luaL_addchar(b, fmt[i]); /* add to buffer */ else { int l = fmt[i] - '0'; /* capture index */ if (l > n) luaL_error(cs->L, "invalid capture index (%d)", l); else if (cps[l].isstring) luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); else { Capture *curr = cs->cap; cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ if (!addonestring(b, cs, "capture")) luaL_error(cs->L, "no values in capture index %d", l); cs->cap = curr; /* continue from where it stopped */ } } } } /* ** Substitution capture: add result to buffer 'b' */ static void substcap (luaL_Buffer *b, CapState *cs) { const char *curr = cs->cap->s; if (isfullcap(cs->cap)) /* no nested captures? */ luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ else { cs->cap++; /* skip open entry */ while (!isclosecap(cs->cap)) { /* traverse nested captures */ const char *next = cs->cap->s; luaL_addlstring(b, curr, next - curr); /* add text up to capture */ if (addonestring(b, cs, "replacement")) curr = closeaddr(cs->cap - 1); /* continue after match */ else /* no capture value */ curr = next; /* keep original text in final result */ } luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ } cs->cap++; /* go to next capture */ } /* ** Evaluates a capture and adds its first value to buffer 'b'; returns ** whether there was a value */ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { switch (captype(cs->cap)) { case Cstring: stringcap(b, cs); /* add capture directly to buffer */ return 1; case Csubst: substcap(b, cs); /* add capture directly to buffer */ return 1; default: { lua_State *L = cs->L; int n = pushcapture(cs); if (n > 0) { if (n > 1) lua_pop(L, n - 1); /* only one result */ if (!lua_isstring(L, -1)) luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); luaL_addvalue(b); } return n; } } } /* ** Push all values of the current capture into the stack; returns ** number of values pushed */ static int pushcapture (CapState *cs) { lua_State *L = cs->L; luaL_checkstack(L, 4, "too many captures"); switch (captype(cs->cap)) { case Cposition: { lua_pushinteger(L, cs->cap->s - cs->s + 1); cs->cap++; return 1; } case Cconst: { pushluaval(cs); cs->cap++; return 1; } case Carg: { int arg = (cs->cap++)->idx; if (arg + FIXEDARGS > cs->ptop) return luaL_error(L, "reference to absent argument #%d", arg); lua_pushvalue(L, arg + FIXEDARGS); return 1; } case Csimple: { int k = pushnestedvalues(cs, 1); lua_insert(L, -k); /* make whole match be first result */ return k; } case Cruntime: { lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ return 1; } case Cstring: { luaL_Buffer b; luaL_buffinit(L, &b); stringcap(&b, cs); luaL_pushresult(&b); return 1; } case Csubst: { luaL_Buffer b; luaL_buffinit(L, &b); substcap(&b, cs); luaL_pushresult(&b); return 1; } case Cgroup: { if (cs->cap->idx == 0) /* anonymous group? */ return pushnestedvalues(cs, 0); /* add all nested values */ else { /* named group: add no values */ nextcap(cs); /* skip capture */ return 0; } } case Cbackref: return backrefcap(cs); case Ctable: return tablecap(cs); case Cfunction: return functioncap(cs); case Cnum: return numcap(cs); case Cquery: return querycap(cs); case Cfold: return foldcap(cs); default: assert(0); return 0; } } /* ** Prepare a CapState structure and traverse the entire list of ** captures in the stack pushing its results. 's' is the subject ** string, 'r' is the final position of the match, and 'ptop' ** the index in the stack where some useful values were pushed. ** Returns the number of results pushed. (If the list produces no ** results, push the final position of the match.) */ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); int n = 0; if (!isclosecap(capture)) { /* is there any capture? */ CapState cs; cs.ocap = cs.cap = capture; cs.L = L; cs.s = s; cs.valuecached = 0; cs.ptop = ptop; do { /* collect their values */ n += pushcapture(&cs); } while (!isclosecap(cs.cap)); } if (n == 0) { /* no capture values? */ lua_pushinteger(L, r - s + 1); /* return only end position */ n = 1; } return n; } /* ** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include "lua.h" #include "lauxlib.h" /* signals a "no-instruction */ #define NOINST -1 static const Charset fullset_ = {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; static const Charset *fullset = &fullset_; /* ** {====================================================== ** Analysis and some optimizations ** ======================================================= */ /* ** Check whether a charset is empty (IFail), singleton (IChar), ** full (IAny), or none of those (ISet). */ static Opcode charsettype (const byte *cs, int *c) { int count = 0; int i; int candidate = -1; /* candidate position for a char */ for (i = 0; i < CHARSETSIZE; i++) { int b = cs[i]; if (b == 0) { if (count > 1) return ISet; /* else set is still empty */ } else if (b == 0xFF) { if (count < (i * BITSPERCHAR)) return ISet; else count += BITSPERCHAR; /* set is still full */ } else if ((b & (b - 1)) == 0) { /* byte has only one bit? */ if (count > 0) return ISet; /* set is neither full nor empty */ else { /* set has only one char till now; track it */ count++; candidate = i; } } else return ISet; /* byte is neither empty, full, nor singleton */ } switch (count) { case 0: return IFail; /* empty set */ case 1: { /* singleton; find character bit inside byte */ int b = cs[candidate]; *c = candidate * BITSPERCHAR; if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } if ((b & 0x02) != 0) { *c += 1; } return IChar; } default: { assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */ return IAny; } } } /* ** A few basic operations on Charsets */ static void cs_complement (Charset *cs) { loopset(i, cs->cs[i] = ~cs->cs[i]); } static int cs_equal (const byte *cs1, const byte *cs2) { loopset(i, if (cs1[i] != cs2[i]) return 0); return 1; } /* ** computes whether sets cs1 and cs2 are disjoint */ static int cs_disjoint (const Charset *cs1, const Charset *cs2) { loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) return 1; } /* ** Convert a 'char' pattern (TSet, TChar, TAny) to a charset */ int tocharset (TTree *tree, Charset *cs) { switch (tree->tag) { case TSet: { /* copy set */ loopset(i, cs->cs[i] = treebuffer(tree)[i]); return 1; } case TChar: { /* only one char */ assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); loopset(i, cs->cs[i] = 0); /* erase all chars */ setchar(cs->cs, tree->u.n); /* add that one */ return 1; } case TAny: { loopset(i, cs->cs[i] = 0xFF); /* add all to the set */ return 1; } default: return 0; } } /* ** Checks whether a pattern has captures */ int hascaptures (TTree *tree) { tailcall: switch (tree->tag) { case TCapture: case TRunTime: return 1; default: { switch (numsiblings[tree->tag]) { case 1: /* return hascaptures(sib1(tree)); */ tree = sib1(tree); goto tailcall; case 2: if (hascaptures(sib1(tree))) return 1; /* else return hascaptures(sib2(tree)); */ tree = sib2(tree); goto tailcall; default: assert(numsiblings[tree->tag] == 0); return 0; } } } } /* ** Checks how a pattern behaves regarding the empty string, ** in one of two different ways: ** A pattern is *nullable* if it can match without consuming any character; ** A pattern is *nofail* if it never fails for any string ** (including the empty string). ** The difference is only for predicates and run-time captures; ** for other patterns, the two properties are equivalent. ** (With predicates, &'a' is nullable but not nofail. Of course, ** nofail => nullable.) ** These functions are all convervative in the following way: ** p is nullable => nullable(p) ** nofail(p) => p cannot fail ** The function assumes that TOpenCall is not nullable; ** this will be checked again when the grammar is fixed.) ** Run-time captures can do whatever they want, so the result ** is conservative. */ int checkaux (TTree *tree, int pred) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: case TFalse: case TOpenCall: return 0; /* not nullable */ case TRep: case TTrue: return 1; /* no fail */ case TNot: case TBehind: /* can match empty, but can fail */ if (pred == PEnofail) return 0; else return 1; /* PEnullable */ case TAnd: /* can match empty; fail iff body does */ if (pred == PEnullable) return 1; /* else return checkaux(sib1(tree), pred); */ tree = sib1(tree); goto tailcall; case TRunTime: /* can fail; match empty iff body does */ if (pred == PEnofail) return 0; /* else return checkaux(sib1(tree), pred); */ tree = sib1(tree); goto tailcall; case TSeq: if (!checkaux(sib1(tree), pred)) return 0; /* else return checkaux(sib2(tree), pred); */ tree = sib2(tree); goto tailcall; case TChoice: if (checkaux(sib2(tree), pred)) return 1; /* else return checkaux(sib1(tree), pred); */ tree = sib1(tree); goto tailcall; case TCapture: case TGrammar: case TRule: /* return checkaux(sib1(tree), pred); */ tree = sib1(tree); goto tailcall; case TCall: /* return checkaux(sib2(tree), pred); */ tree = sib2(tree); goto tailcall; default: assert(0); return 0; }; } /* ** number of characters to match a pattern (or -1 if variable) ** ('count' avoids infinite loops for grammars) */ int fixedlenx (TTree *tree, int count, int len) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: return len + 1; case TFalse: case TTrue: case TNot: case TAnd: case TBehind: return len; case TRep: case TRunTime: case TOpenCall: return -1; case TCapture: case TRule: case TGrammar: /* return fixedlenx(sib1(tree), count); */ tree = sib1(tree); goto tailcall; case TCall: if (count++ >= MAXRULES) return -1; /* may be a loop */ /* else return fixedlenx(sib2(tree), count); */ tree = sib2(tree); goto tailcall; case TSeq: { len = fixedlenx(sib1(tree), count, len); if (len < 0) return -1; /* else return fixedlenx(sib2(tree), count, len); */ tree = sib2(tree); goto tailcall; } case TChoice: { int n1, n2; n1 = fixedlenx(sib1(tree), count, len); if (n1 < 0) return -1; n2 = fixedlenx(sib2(tree), count, len); if (n1 == n2) return n1; else return -1; } default: assert(0); return 0; }; } /* ** Computes the 'first set' of a pattern. ** The result is a conservative aproximation: ** match p ax -> x' for some x ==> a in first(p). ** The set 'follow' is the first set of what follows the ** pattern (full set if nothing follows it). ** The function returns 0 when this set can be used for ** tests that avoid the pattern altogether. ** A non-zero return can happen for two reasons: ** 1) match p '' -> '' ==> returns 1. ** (tests cannot be used because they always fail for an empty input) ** 2) there is a match-time capture ==> returns 2. ** (match-time captures should not be avoided by optimizations) */ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: { tocharset(tree, firstset); return 0; } case TTrue: { loopset(i, firstset->cs[i] = follow->cs[i]); return 1; } case TFalse: { loopset(i, firstset->cs[i] = 0); return 0; } case TChoice: { Charset csaux; int e1 = getfirst(sib1(tree), follow, firstset); int e2 = getfirst(sib2(tree), follow, &csaux); loopset(i, firstset->cs[i] |= csaux.cs[i]); return e1 | e2; } case TSeq: { if (!nullable(sib1(tree))) { /* return getfirst(sib1(tree), fullset, firstset); */ tree = sib1(tree); follow = fullset; goto tailcall; } else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ Charset csaux; int e2 = getfirst(sib2(tree), follow, &csaux); int e1 = getfirst(sib1(tree), &csaux, firstset); if (e1 == 0) return 0; /* 'e1' ensures that first can be used */ else if ((e1 | e2) & 2) /* one of the children has a matchtime? */ return 2; /* pattern has a matchtime capture */ else return e2; /* else depends on 'e2' */ } } case TRep: { getfirst(sib1(tree), follow, firstset); loopset(i, firstset->cs[i] |= follow->cs[i]); return 1; /* accept the empty string */ } case TCapture: case TGrammar: case TRule: { /* return getfirst(sib1(tree), follow, firstset); */ tree = sib1(tree); goto tailcall; } case TRunTime: { /* function invalidates any follow info. */ int e = getfirst(sib1(tree), fullset, firstset); if (e) return 2; /* function is not "protected"? */ else return 0; /* pattern inside capture ensures first can be used */ } case TCall: { /* return getfirst(sib2(tree), follow, firstset); */ tree = sib2(tree); goto tailcall; } case TAnd: { int e = getfirst(sib1(tree), follow, firstset); loopset(i, firstset->cs[i] &= follow->cs[i]); return e; } case TNot: { if (tocharset(sib1(tree), firstset)) { cs_complement(firstset); return 1; } /* else go through */ } case TBehind: { /* instruction gives no new information */ /* call 'getfirst' to check for math-time captures */ int e = getfirst(sib1(tree), follow, firstset); loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ return e | 1; /* always can accept the empty string */ } default: assert(0); return 0; } } /* ** If it returns true, then pattern can fail only depending on the next ** character of the subject */ static int headfail (TTree *tree) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: case TFalse: return 1; case TTrue: case TRep: case TRunTime: case TNot: case TBehind: return 0; case TCapture: case TGrammar: case TRule: case TAnd: tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ case TCall: tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ case TSeq: if (!nofail(sib2(tree))) return 0; /* else return headfail(sib1(tree)); */ tree = sib1(tree); goto tailcall; case TChoice: if (!headfail(sib1(tree))) return 0; /* else return headfail(sib2(tree)); */ tree = sib2(tree); goto tailcall; default: assert(0); return 0; } } /* ** Check whether the code generation for the given tree can benefit ** from a follow set (to avoid computing the follow set when it is ** not needed) */ static int needfollow (TTree *tree) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: case TFalse: case TTrue: case TAnd: case TNot: case TRunTime: case TGrammar: case TCall: case TBehind: return 0; case TChoice: case TRep: return 1; case TCapture: tree = sib1(tree); goto tailcall; case TSeq: tree = sib2(tree); goto tailcall; default: assert(0); return 0; } } /* }====================================================== */ /* ** {====================================================== ** Code generation ** ======================================================= */ /* ** size of an instruction */ int sizei (const Instruction *i) { switch((Opcode)i->i.code) { case ISet: case ISpan: return CHARSETINSTSIZE; case ITestSet: return CHARSETINSTSIZE + 1; case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: return 2; default: return 1; } } /* ** state for the compiler */ typedef struct CompileState { Pattern *p; /* pattern being compiled */ int ncode; /* next position in p->code to be filled */ lua_State *L; } CompileState; /* ** code generation is recursive; 'opt' indicates that the code is ** being generated under a 'IChoice' operator jumping to its end. ** 'tt' points to a previous test protecting this code. 'fl' is ** the follow set of the pattern. */ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, const Charset *fl); void reallocprog (lua_State *L, Pattern *p, int nsize) { void *ud; lua_Alloc f = lua_getallocf(L, &ud); void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), nsize * sizeof(Instruction)); if (newblock == NULL && nsize > 0) luaL_error(L, "not enough memory"); p->code = (Instruction *)newblock; p->codesize = nsize; } static int nextinstruction (CompileState *compst) { int size = compst->p->codesize; if (compst->ncode >= size) reallocprog(compst->L, compst->p, size * 2); return compst->ncode++; } #define getinstr(cs,i) ((cs)->p->code[i]) static int addinstruction (CompileState *compst, Opcode op, int aux) { int i = nextinstruction(compst); getinstr(compst, i).i.code = op; getinstr(compst, i).i.aux = aux; return i; } static int addoffsetinst (CompileState *compst, Opcode op) { int i = addinstruction(compst, op, 0); /* instruction */ addinstruction(compst, (Opcode)0, 0); /* open space for offset */ assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2); return i; } static void setoffset (CompileState *compst, int instruction, int offset) { getinstr(compst, instruction + 1).offset = offset; } /* ** Add a capture instruction: ** 'op' is the capture instruction; 'cap' the capture kind; ** 'key' the key into ktable; 'aux' is optional offset ** */ static int addinstcap (CompileState *compst, Opcode op, int cap, int key, int aux) { int i = addinstruction(compst, op, joinkindoff(cap, aux)); getinstr(compst, i).i.key = key; return i; } #define gethere(compst) ((compst)->ncode) #define target(code,i) ((i) + code[i + 1].offset) static void jumptothere (CompileState *compst, int instruction, int target) { if (instruction >= 0) setoffset(compst, instruction, target - instruction); } static void jumptohere (CompileState *compst, int instruction) { jumptothere(compst, instruction, gethere(compst)); } /* ** Code an IChar instruction, or IAny if there is an equivalent ** test dominating it */ static void codechar (CompileState *compst, int c, int tt) { if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && getinstr(compst, tt).i.aux == c) addinstruction(compst, IAny, 0); else addinstruction(compst, IChar, c); } /* ** Add a charset posfix to an instruction */ static void addcharset (CompileState *compst, const byte *cs) { int p = gethere(compst); int i; for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) nextinstruction(compst); /* space for buffer */ /* fill buffer with charset */ loopset(j, getinstr(compst, p).buff[j] = cs[j]); } /* ** code a char set, optimizing unit sets for IChar, "complete" ** sets for IAny, and empty sets for IFail; also use an IAny ** when instruction is dominated by an equivalent test. */ static void codecharset (CompileState *compst, const byte *cs, int tt) { int c = 0; /* (=) to avoid warnings */ Opcode op = charsettype(cs, &c); switch (op) { case IChar: codechar(compst, c, tt); break; case ISet: { /* non-trivial set? */ if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && cs_equal(cs, getinstr(compst, tt + 2).buff)) addinstruction(compst, IAny, 0); else { addinstruction(compst, ISet, 0); addcharset(compst, cs); } break; } default: addinstruction(compst, op, c); break; } } /* ** code a test set, optimizing unit sets for ITestChar, "complete" ** sets for ITestAny, and empty sets for IJmp (always fails). ** 'e' is true iff test should accept the empty string. (Test ** instructions in the current VM never accept the empty string.) */ static int codetestset (CompileState *compst, Charset *cs, int e) { if (e) return NOINST; /* no test */ else { int c = 0; Opcode op = charsettype(cs->cs, &c); switch (op) { case IFail: return addoffsetinst(compst, IJmp); /* always jump */ case IAny: return addoffsetinst(compst, ITestAny); case IChar: { int i = addoffsetinst(compst, ITestChar); getinstr(compst, i).i.aux = c; return i; } case ISet: { int i = addoffsetinst(compst, ITestSet); addcharset(compst, cs->cs); return i; } default: assert(0); return 0; } } } /* ** Find the final destination of a sequence of jumps */ static int finaltarget (Instruction *code, int i) { while (code[i].i.code == IJmp) i = target(code, i); return i; } /* ** final label (after traversing any jumps) */ static int finallabel (Instruction *code, int i) { return finaltarget(code, target(code, i)); } /* ** == behind n;

(where n = fixedlen(p)) */ static void codebehind (CompileState *compst, TTree *tree) { if (tree->u.n > 0) addinstruction(compst, IBehind, tree->u.n); codegen(compst, sib1(tree), 0, NOINST, fullset); } /* ** Choice; optimizations: ** - when p1 is headfail ** - when first(p1) and first(p2) are disjoint; than ** a character not in first(p1) cannot go to p1, and a character ** in first(p1) cannot go to p2 (at it is not in first(p2)). ** (The optimization is not valid if p1 accepts the empty string, ** as then there is no character at all...) ** - when p2 is empty and opt is true; a IPartialCommit can resuse ** the Choice already active in the stack. */ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, const Charset *fl) { int emptyp2 = (p2->tag == TTrue); Charset cs1, cs2; int e1 = getfirst(p1, fullset, &cs1); if (headfail(p1) || (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { /* == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ int test = codetestset(compst, &cs1, 0); int jmp = NOINST; codegen(compst, p1, 0, test, fl); if (!emptyp2) jmp = addoffsetinst(compst, IJmp); jumptohere(compst, test); codegen(compst, p2, opt, NOINST, fl); jumptohere(compst, jmp); } else if (opt && emptyp2) { /* p1? == IPartialCommit; p1 */ jumptohere(compst, addoffsetinst(compst, IPartialCommit)); codegen(compst, p1, 1, NOINST, fullset); } else { /* == test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ int pcommit; int test = codetestset(compst, &cs1, e1); int pchoice = addoffsetinst(compst, IChoice); codegen(compst, p1, emptyp2, test, fullset); pcommit = addoffsetinst(compst, ICommit); jumptohere(compst, pchoice); jumptohere(compst, test); codegen(compst, p2, opt, NOINST, fl); jumptohere(compst, pcommit); } } /* ** And predicate ** optimization: fixedlen(p) = n ==> <&p> ==

; behind n ** (valid only when 'p' has no captures) */ static void codeand (CompileState *compst, TTree *tree, int tt) { int n = fixedlen(tree); if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { codegen(compst, tree, 0, tt, fullset); if (n > 0) addinstruction(compst, IBehind, n); } else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ int pcommit; int pchoice = addoffsetinst(compst, IChoice); codegen(compst, tree, 0, tt, fullset); pcommit = addoffsetinst(compst, IBackCommit); jumptohere(compst, pchoice); addinstruction(compst, IFail, 0); jumptohere(compst, pcommit); } } /* ** Captures: if pattern has fixed (and not too big) length, use ** a single IFullCapture instruction after the match; otherwise, ** enclose the pattern with OpenCapture - CloseCapture. */ static void codecapture (CompileState *compst, TTree *tree, int tt, const Charset *fl) { int len = fixedlen(sib1(tree)); if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { codegen(compst, sib1(tree), 0, tt, fl); addinstcap(compst, IFullCapture, tree->cap, tree->key, len); } else { addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); codegen(compst, sib1(tree), 0, tt, fl); addinstcap(compst, ICloseCapture, Cclose, 0, 0); } } static void coderuntime (CompileState *compst, TTree *tree, int tt) { addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); codegen(compst, sib1(tree), 0, tt, fullset); addinstcap(compst, ICloseRunTime, Cclose, 0, 0); } /* ** Repetion; optimizations: ** When pattern is a charset, can use special instruction ISpan. ** When pattern is head fail, or if it starts with characters that ** are disjoint from what follows the repetions, a simple test ** is enough (a fail inside the repetition would backtrack to fail ** again in the following pattern, so there is no need for a choice). ** When 'opt' is true, the repetion can reuse the Choice already ** active in the stack. */ static void coderep (CompileState *compst, TTree *tree, int opt, const Charset *fl) { Charset st; if (tocharset(tree, &st)) { addinstruction(compst, ISpan, 0); addcharset(compst, st.cs); } else { int e1 = getfirst(tree, fullset, &st); if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ int jmp; int test = codetestset(compst, &st, 0); codegen(compst, tree, opt, test, fullset); jmp = addoffsetinst(compst, IJmp); jumptohere(compst, test); jumptothere(compst, jmp, test); } else { /* test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2: */ /* or (if 'opt'): partialcommit L1; L1:

; partialcommit L1; */ int commit, l2; int test = codetestset(compst, &st, e1); int pchoice = NOINST; if (opt) jumptohere(compst, addoffsetinst(compst, IPartialCommit)); else pchoice = addoffsetinst(compst, IChoice); l2 = gethere(compst); codegen(compst, tree, 0, NOINST, fullset); commit = addoffsetinst(compst, IPartialCommit); jumptothere(compst, commit, l2); jumptohere(compst, pchoice); jumptohere(compst, test); } } } /* ** Not predicate; optimizations: ** In any case, if first test fails, 'not' succeeds, so it can jump to ** the end. If pattern is headfail, that is all (it cannot fail ** in other parts); this case includes 'not' of simple sets. Otherwise, ** use the default code (a choice plus a failtwice). */ static void codenot (CompileState *compst, TTree *tree) { Charset st; int e = getfirst(tree, fullset, &st); int test = codetestset(compst, &st, e); if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */ addinstruction(compst, IFail, 0); else { /* test(fail(p))-> L1; choice L1;

; failtwice; L1: */ int pchoice = addoffsetinst(compst, IChoice); codegen(compst, tree, 0, NOINST, fullset); addinstruction(compst, IFailTwice, 0); jumptohere(compst, pchoice); } jumptohere(compst, test); } /* ** change open calls to calls, using list 'positions' to find ** correct offsets; also optimize tail calls */ static void correctcalls (CompileState *compst, int *positions, int from, int to) { int i; Instruction *code = compst->p->code; for (i = from; i < to; i += sizei(&code[i])) { if (code[i].i.code == IOpenCall) { int n = code[i].i.key; /* rule number */ int rule = positions[n]; /* rule position */ assert(rule == from || code[rule - 1].i.code == IRet); if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ code[i].i.code = IJmp; /* tail call */ else code[i].i.code = ICall; jumptothere(compst, i, rule); /* call jumps to respective rule */ } } assert(i == to); } /* ** Code for a grammar: ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: */ static void codegrammar (CompileState *compst, TTree *grammar) { int positions[MAXRULES]; int rulenumber = 0; TTree *rule; int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ int start = gethere(compst); /* here starts the initial rule */ jumptohere(compst, firstcall); for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { positions[rulenumber++] = gethere(compst); /* save rule position */ codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ addinstruction(compst, IRet, 0); } assert(rule->tag == TTrue); jumptohere(compst, jumptoend); correctcalls(compst, positions, start, gethere(compst)); } static void codecall (CompileState *compst, TTree *call) { int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ assert(sib2(call)->tag == TRule); } /* ** Code first child of a sequence ** (second child is called in-place to allow tail call) ** Return 'tt' for second child */ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, int tt, const Charset *fl) { if (needfollow(p1)) { Charset fl1; getfirst(p2, fl, &fl1); /* p1 follow is p2 first */ codegen(compst, p1, 0, tt, &fl1); } else /* use 'fullset' as follow */ codegen(compst, p1, 0, tt, fullset); if (fixedlen(p1) != 0) /* can 'p1' consume anything? */ return NOINST; /* invalidate test */ else return tt; /* else 'tt' still protects sib2 */ } /* ** Main code-generation function: dispatch to auxiliar functions ** according to kind of tree */ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, const Charset *fl) { tailcall: switch (tree->tag) { case TChar: codechar(compst, tree->u.n, tt); break; case TAny: addinstruction(compst, IAny, 0); break; case TSet: codecharset(compst, treebuffer(tree), tt); break; case TTrue: break; case TFalse: addinstruction(compst, IFail, 0); break; case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; case TRep: coderep(compst, sib1(tree), opt, fl); break; case TBehind: codebehind(compst, tree); break; case TNot: codenot(compst, sib1(tree)); break; case TAnd: codeand(compst, sib1(tree), tt); break; case TCapture: codecapture(compst, tree, tt, fl); break; case TRunTime: coderuntime(compst, tree, tt); break; case TGrammar: codegrammar(compst, tree); break; case TCall: codecall(compst, tree); break; case TSeq: { tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */ /* codegen(compst, p2, opt, tt, fl); */ tree = sib2(tree); goto tailcall; } default: assert(0); } } /* ** Optimize jumps and other jump-like instructions. ** * Update labels of instructions with labels to their final ** destinations (e.g., choice L1; ... L1: jmp L2: becomes ** choice L2) ** * Jumps to other instructions that do jumps become those ** instructions (e.g., jump to return becomes a return; jump ** to commit becomes a commit) */ static void peephole (CompileState *compst) { Instruction *code = compst->p->code; int i; for (i = 0; i < compst->ncode; i += sizei(&code[i])) { switch (code[i].i.code) { case IChoice: case ICall: case ICommit: case IPartialCommit: case IBackCommit: case ITestChar: case ITestSet: case ITestAny: { /* instructions with labels */ jumptothere(compst, i, finallabel(code, i)); /* optimize label */ break; } case IJmp: { int ft = finaltarget(code, i); switch (code[ft].i.code) { /* jumping to what? */ case IRet: case IFail: case IFailTwice: case IEnd: { /* instructions with unconditional implicit jumps */ code[i] = code[ft]; /* jump becomes that instruction */ code[i + 1].i.code = IAny; /* 'no-op' for target position */ break; } case ICommit: case IPartialCommit: case IBackCommit: { /* inst. with unconditional explicit jumps */ int fft = finallabel(code, ft); code[i] = code[ft]; /* jump becomes that instruction... */ jumptothere(compst, i, fft); /* but must correct its offset */ i--; /* reoptimize its label */ break; } default: { jumptothere(compst, i, ft); /* optimize label */ break; } } break; } default: break; } } assert(code[i - 1].i.code == IEnd); } /* ** Compile a pattern */ Instruction *compile (lua_State *L, Pattern *p) { CompileState compst; compst.p = p; compst.ncode = 0; compst.L = L; reallocprog(L, p, 2); /* minimum initial size */ codegen(&compst, p->tree, 0, NOINST, fullset); addinstruction(&compst, IEnd, 0); reallocprog(L, p, compst.ncode); /* set final size */ peephole(&compst); return p->code; } /* }====================================================== */ /* ** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include #include #if defined(LPEG_DEBUG) /* ** {====================================================== ** Printing patterns (for debugging) ** ======================================================= */ void printcharset (const byte *st) { int i; printf("["); for (i = 0; i <= UCHAR_MAX; i++) { int first = i; while (testchar(st, i) && i <= UCHAR_MAX) i++; if (i - 1 == first) /* unary range? */ printf("(%02x)", first); else if (i - 1 > first) /* non-empty range? */ printf("(%02x-%02x)", first, i - 1); } printf("]"); } static void printcapkind (int kind) { const char *const modes[] = { "close", "position", "constant", "backref", "argument", "simple", "table", "function", "query", "string", "num", "substitution", "fold", "runtime", "group"}; printf("%s", modes[kind]); } static void printjmp (const Instruction *op, const Instruction *p) { printf("-> %d", (int)(p + (p + 1)->offset - op)); } static void printinst (const Instruction *op, const Instruction *p) { const char *const names[] = { "any", "char", "set", "testany", "testchar", "testset", "span", "behind", "ret", "end", "choice", "jmp", "call", "open_call", "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", "fullcapture", "opencapture", "closecapture", "closeruntime" }; printf("%02ld: %s ", (long)(p - op), names[p->i.code]); switch ((Opcode)p->i.code) { case IChar: { printf("'%c'", p->i.aux); break; } case ITestChar: { printf("'%c'", p->i.aux); printjmp(op, p); break; } case IFullCapture: { printcapkind(getkind(p)); printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); break; } case IOpenCapture: { printcapkind(getkind(p)); printf(" (idx = %d)", p->i.key); break; } case ISet: { printcharset((p+1)->buff); break; } case ITestSet: { printcharset((p+2)->buff); printjmp(op, p); break; } case ISpan: { printcharset((p+1)->buff); break; } case IOpenCall: { printf("-> %d", (p + 1)->offset); break; } case IBehind: { printf("%d", p->i.aux); break; } case IJmp: case ICall: case ICommit: case IChoice: case IPartialCommit: case IBackCommit: case ITestAny: { printjmp(op, p); break; } default: break; } printf("\n"); } void printpatt (Instruction *p, int n) { Instruction *op = p; while (p < op + n) { printinst(op, p); p += sizei(p); } } #if defined(LPEG_DEBUG) static void printcap (Capture *cap) { printcapkind(cap->kind); printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); } void printcaplist (Capture *cap, Capture *limit) { printf(">======\n"); for (; cap->s && (limit == NULL || cap < limit); cap++) printcap(cap); printf("=======\n"); } #endif /* }====================================================== */ /* ** {====================================================== ** Printing trees (for debugging) ** ======================================================= */ static const char *tagnames[] = { "char", "set", "any", "true", "false", "rep", "seq", "choice", "not", "and", "call", "opencall", "rule", "grammar", "behind", "capture", "run-time" }; void printtree (TTree *tree, int ident) { int i; for (i = 0; i < ident; i++) printf(" "); printf("%s", tagnames[tree->tag]); switch (tree->tag) { case TChar: { int c = tree->u.n; if (isprint(c)) printf(" '%c'\n", c); else printf(" (%02X)\n", c); break; } case TSet: { printcharset(treebuffer(tree)); printf("\n"); break; } case TOpenCall: case TCall: { printf(" key: %d\n", tree->key); break; } case TBehind: { printf(" %d\n", tree->u.n); printtree(sib1(tree), ident + 2); break; } case TCapture: { printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); printtree(sib1(tree), ident + 2); break; } case TRule: { printf(" n: %d key: %d\n", tree->cap, tree->key); printtree(sib1(tree), ident + 2); break; /* do not print next rule as a sibling */ } case TGrammar: { TTree *rule = sib1(tree); printf(" %d\n", tree->u.n); /* number of rules */ for (i = 0; i < tree->u.n; i++) { printtree(rule, ident + 2); rule = sib2(rule); } assert(rule->tag == TTrue); /* sentinel */ break; } default: { int sibs = numsiblings[tree->tag]; printf("\n"); if (sibs >= 1) { printtree(sib1(tree), ident + 2); if (sibs >= 2) printtree(sib2(tree), ident + 2); } break; } } } void printktable (lua_State *L, int idx) { int n, i; lua_getfenv(L, idx); if (lua_isnil(L, -1)) /* no ktable? */ return; n = lua_objlen(L, -1); printf("["); for (i = 1; i <= n; i++) { printf("%d = ", i); lua_rawgeti(L, -1, i); if (lua_isstring(L, -1)) printf("%s ", lua_tostring(L, -1)); else printf("%s ", lua_typename(L, lua_type(L, -1))); lua_pop(L, 1); } printf("]\n"); /* leave ktable at the stack */ } /* }====================================================== */ #endif /* ** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $ ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include #include #include "lua.h" #include "lauxlib.h" /* number of siblings for each tree */ const byte numsiblings[] = { 0, 0, 0, /* char, set, any */ 0, 0, /* true, false */ 1, /* rep */ 2, 2, /* seq, choice */ 1, 1, /* not, and */ 0, 0, 2, 1, /* call, opencall, rule, grammar */ 1, /* behind */ 1, 1 /* capture, runtime capture */ }; static TTree *newgrammar (lua_State *L, int arg); /* ** returns a reasonable name for value at index 'idx' on the stack */ static const char *val2str (lua_State *L, int idx) { const char *k = lua_tostring(L, idx); if (k != NULL) return lua_pushfstring(L, "%s", k); else return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); } /* ** Fix a TOpenCall into a TCall node, using table 'postable' to ** translate a key to its rule address in the tree. Raises an ** error if key does not exist. */ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { int n; lua_rawgeti(L, -1, t->key); /* get rule's name */ lua_gettable(L, postable); /* query name in position table */ n = lua_tonumber(L, -1); /* get (absolute) position */ lua_pop(L, 1); /* remove position */ if (n == 0) { /* no position? */ lua_rawgeti(L, -1, t->key); /* get rule's name again */ luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); } t->tag = TCall; t->u.ps = n - (t - g); /* position relative to node */ assert(sib2(t)->tag == TRule); sib2(t)->key = t->key; } /* ** Transform left associative constructions into right ** associative ones, for sequence and choice; that is: ** (t11 + t12) + t2 => t11 + (t12 + t2) ** (t11 * t12) * t2 => t11 * (t12 * t2) ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) */ static void correctassociativity (TTree *tree) { TTree *t1 = sib1(tree); assert(tree->tag == TChoice || tree->tag == TSeq); while (t1->tag == tree->tag) { int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ int n11size = t1->u.ps - 1; int n12size = n1size - n11size - 1; memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ tree->u.ps = n11size + 1; sib2(tree)->tag = tree->tag; sib2(tree)->u.ps = n12size + 1; } } /* ** Make final adjustments in a tree. Fix open calls in tree 't', ** making them refer to their respective rules or raising appropriate ** errors (if not inside a grammar). Correct associativity of associative ** constructions (making them right associative). Assume that tree's ** ktable is at the top of the stack (for error messages). */ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { tailcall: switch (t->tag) { case TGrammar: /* subgrammars were already fixed */ return; case TOpenCall: { if (g != NULL) /* inside a grammar? */ fixonecall(L, postable, g, t); else { /* open call outside grammar */ lua_rawgeti(L, -1, t->key); luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); } break; } case TSeq: case TChoice: correctassociativity(t); break; } switch (numsiblings[t->tag]) { case 1: /* finalfix(L, postable, g, sib1(t)); */ t = sib1(t); goto tailcall; case 2: finalfix(L, postable, g, sib1(t)); t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ default: assert(numsiblings[t->tag] == 0); break; } } /* ** {====================================================== ** Tree generation ** ======================================================= */ /* ** In 5.2, could use 'luaL_testudata'... */ static int testpattern (lua_State *L, int idx) { if (lua_touserdata(L, idx)) { /* value is a userdata? */ if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ luaL_getmetatable(L, PATTERN_T); if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ lua_pop(L, 2); /* remove both metatables */ return 1; } } } return 0; } static Pattern *getpattern (lua_State *L, int idx) { return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); } static int getsize (lua_State *L, int idx) { return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; } static TTree *gettree (lua_State *L, int idx, int *len) { Pattern *p = getpattern(L, idx); if (len) *len = getsize(L, idx); return p->tree; } /* ** create a pattern */ static TTree *newtree (lua_State *L, int len) { size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); Pattern *p = (Pattern *)lua_newuserdata(L, size); luaL_getmetatable(L, PATTERN_T); lua_setmetatable(L, -2); p->code = NULL; p->codesize = 0; return p->tree; } static TTree *newleaf (lua_State *L, int tag) { TTree *tree = newtree(L, 1); tree->tag = tag; return tree; } static TTree *newcharset (lua_State *L) { TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); tree->tag = TSet; loopset(i, treebuffer(tree)[i] = 0); return tree; } /* ** add to tree a sequence where first sibling is 'sib' (with size ** 'sibsize'); returns position for second sibling */ static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { tree->tag = TSeq; tree->u.ps = sibsize + 1; memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); return sib2(tree); } /* ** Add element 'idx' to 'ktable' of pattern at the top of the stack; ** create new 'ktable' if necessary. Return index of new element. */ static int addtoktable (lua_State *L, int idx) { if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */ return 0; else { int n; lua_getfenv(L, -1); /* get ktable from pattern */ n = lua_objlen(L, -1); if (n == 0) { /* is it empty/non-existent? */ lua_pop(L, 1); /* remove it */ lua_createtable(L, 1, 0); /* create a fresh table */ } lua_pushvalue(L, idx); /* element to be added */ lua_rawseti(L, -2, n + 1); lua_setfenv(L, -2); /* set it as ktable for pattern */ return n + 1; } } /* ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it ** must build a sequence of sequence of sequence...) */ static void fillseq (TTree *tree, int tag, int n, const char *s) { int i; for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ tree->tag = TSeq; tree->u.ps = 2; sib1(tree)->tag = tag; sib1(tree)->u.n = s ? (byte)s[i] : 0; tree = sib2(tree); } tree->tag = tag; /* last one does not need TSeq */ tree->u.n = s ? (byte)s[i] : 0; } /* ** Numbers as patterns: ** 0 == true (always match); n == TAny repeated 'n' times; ** -n == not (TAny repeated 'n' times) */ static TTree *numtree (lua_State *L, int n) { if (n == 0) return newleaf(L, TTrue); else { TTree *tree, *nd; if (n > 0) tree = nd = newtree(L, 2 * n - 1); else { /* negative: code it as !(-n) */ n = -n; tree = newtree(L, 2 * n); tree->tag = TNot; nd = sib1(tree); } fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ return tree; } } /* ** Convert value at index 'idx' to a pattern */ static TTree *getpatt (lua_State *L, int idx, int *len) { TTree *tree; switch (lua_type(L, idx)) { case LUA_TSTRING: { size_t slen; const char *s = lua_tolstring(L, idx, &slen); /* get string */ if (slen == 0) /* empty? */ tree = newleaf(L, TTrue); /* always match */ else { tree = newtree(L, 2 * (slen - 1) + 1); fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ } break; } case LUA_TNUMBER: { int n = lua_tointeger(L, idx); tree = numtree(L, n); break; } case LUA_TBOOLEAN: { tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); break; } case LUA_TTABLE: { tree = newgrammar(L, idx); break; } case LUA_TFUNCTION: { tree = newtree(L, 2); tree->tag = TRunTime; tree->key = addtoktable(L, idx); sib1(tree)->tag = TTrue; break; } default: { return gettree(L, idx, len); } } lua_replace(L, idx); /* put new tree into 'idx' slot */ if (len) *len = getsize(L, idx); return tree; } /* ** Return the number of elements in the ktable of pattern at 'idx'. ** In Lua 5.2, default "environment" for patterns is nil, not ** a table. Treat it as an empty table. In Lua 5.1, assumes that ** the environment has no numeric indices (len == 0) */ static int ktablelen (lua_State *L, int idx) { if (!lua_istable(L, idx)) return 0; else return lua_objlen(L, idx); } /* ** Concatentate the contents of table 'idx1' into table 'idx2'. ** (Assume that both indices are negative.) ** Return the original length of table 'idx2' */ static int concattable (lua_State *L, int idx1, int idx2) { int i; int n1 = ktablelen(L, idx1); int n2 = ktablelen(L, idx2); if (n1 == 0) return 0; /* nothing to correct */ for (i = 1; i <= n1; i++) { lua_rawgeti(L, idx1, i); lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ } return n2; } /* ** Make a merge of ktables from p1 and p2 the ktable for the new ** pattern at the top of the stack. */ static int joinktables (lua_State *L, int p1, int p2) { int n1, n2; lua_getfenv(L, p1); /* get ktables */ lua_getfenv(L, p2); n1 = ktablelen(L, -2); n2 = ktablelen(L, -1); if (n1 == 0 && n2 == 0) { /* are both tables empty? */ lua_pop(L, 2); /* nothing to be done; pop tables */ return 0; /* nothing to correct */ } if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */ lua_pop(L, 1); /* pop 2nd table */ lua_setfenv(L, -2); /* set 1st ktable into new pattern */ return 0; /* nothing to correct */ } if (n1 == 0) { /* first table is empty? */ lua_setfenv(L, -3); /* set 2nd table into new pattern */ lua_pop(L, 1); /* pop 1st table */ return 0; /* nothing to correct */ } else { lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ /* stack: new p; ktable p1; ktable p2; new ktable */ concattable(L, -3, -1); /* from p1 into new ktable */ concattable(L, -2, -1); /* from p2 into new ktable */ lua_setfenv(L, -4); /* new ktable becomes p env */ lua_pop(L, 2); /* pop other ktables */ return n1; /* correction for indices from p2 */ } } static void correctkeys (TTree *tree, int n) { if (n == 0) return; /* no correction? */ tailcall: switch (tree->tag) { case TOpenCall: case TCall: case TRunTime: case TRule: { if (tree->key > 0) tree->key += n; break; } case TCapture: { if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) tree->key += n; break; } default: break; } switch (numsiblings[tree->tag]) { case 1: /* correctkeys(sib1(tree), n); */ tree = sib1(tree); goto tailcall; case 2: correctkeys(sib1(tree), n); tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ default: assert(numsiblings[tree->tag] == 0); break; } } /* ** copy 'ktable' of element 'idx' to new tree (on top of stack) */ static void copyktable (lua_State *L, int idx) { lua_getfenv(L, idx); lua_setfenv(L, -2); } /* ** merge 'ktable' from rule at stack index 'idx' into 'ktable' ** from tree at the top of the stack, and correct corresponding ** tree. */ static void mergektable (lua_State *L, int idx, TTree *rule) { int n; lua_getfenv(L, -1); /* get ktables */ lua_getfenv(L, idx); n = concattable(L, -1, -2); lua_pop(L, 2); /* remove both ktables */ correctkeys(rule, n); } /* ** create a new tree, whith a new root and one sibling. ** Sibling must be on the Lua stack, at index 1. */ static TTree *newroot1sib (lua_State *L, int tag) { int s1; TTree *tree1 = getpatt(L, 1, &s1); TTree *tree = newtree(L, 1 + s1); /* create new tree */ tree->tag = tag; memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); copyktable(L, 1); return tree; } /* ** create a new tree, whith a new root and 2 siblings. ** Siblings must be on the Lua stack, first one at index 1. */ static TTree *newroot2sib (lua_State *L, int tag) { int s1, s2; TTree *tree1 = getpatt(L, 1, &s1); TTree *tree2 = getpatt(L, 2, &s2); TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ tree->tag = tag; tree->u.ps = 1 + s1; memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); correctkeys(sib2(tree), joinktables(L, 1, 2)); return tree; } static int lp_P (lua_State *L) { luaL_checkany(L, 1); getpatt(L, 1, NULL); lua_settop(L, 1); return 1; } /* ** sequence operator; optimizations: ** false x => false, x true => x, true x => x ** (cannot do x . false => false because x may have runtime captures) */ static int lp_seq (lua_State *L) { TTree *tree1 = getpatt(L, 1, NULL); TTree *tree2 = getpatt(L, 2, NULL); if (tree1->tag == TFalse || tree2->tag == TTrue) lua_pushvalue(L, 1); /* false . x == false, x . true = x */ else if (tree1->tag == TTrue) lua_pushvalue(L, 2); /* true . x = x */ else newroot2sib(L, TSeq); return 1; } /* ** choice operator; optimizations: ** charset / charset => charset ** true / x => true, x / false => x, false / x => x ** (x / true is not equivalent to true) */ static int lp_choice (lua_State *L) { Charset st1, st2; TTree *t1 = getpatt(L, 1, NULL); TTree *t2 = getpatt(L, 2, NULL); if (tocharset(t1, &st1) && tocharset(t2, &st2)) { TTree *t = newcharset(L); loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); } else if (nofail(t1) || t2->tag == TFalse) lua_pushvalue(L, 1); /* true / x => true, x / false => x */ else if (t1->tag == TFalse) lua_pushvalue(L, 2); /* false / x => x */ else newroot2sib(L, TChoice); return 1; } /* ** p^n */ static int lp_star (lua_State *L) { int size1; int n = luaL_checkint(L, 2); TTree *tree1 = gettree(L, 1, &size1); if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ TTree *tree = newtree(L, (n + 1) * (size1 + 1)); if (nullable(tree1)) luaL_error(L, "loop body may accept empty string"); while (n--) /* repeat 'n' times */ tree = seqaux(tree, tree1, size1); tree->tag = TRep; memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); } else { /* choice (seq tree1 ... choice tree1 true ...) true */ TTree *tree; n = -n; /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ tree = newtree(L, n * (size1 + 3) - 1); for (; n > 1; n--) { /* repeat (n - 1) times */ tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; sib2(tree)->tag = TTrue; tree = sib1(tree); tree = seqaux(tree, tree1, size1); } tree->tag = TChoice; tree->u.ps = size1 + 1; sib2(tree)->tag = TTrue; memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); } copyktable(L, 1); return 1; } /* ** #p == &p */ static int lp_and (lua_State *L) { newroot1sib(L, TAnd); return 1; } /* ** -p == !p */ static int lp_not (lua_State *L) { newroot1sib(L, TNot); return 1; } /* ** [t1 - t2] == Seq (Not t2) t1 ** If t1 and t2 are charsets, make their difference. */ static int lp_sub (lua_State *L) { Charset st1, st2; int s1, s2; TTree *t1 = getpatt(L, 1, &s1); TTree *t2 = getpatt(L, 2, &s2); if (tocharset(t1, &st1) && tocharset(t2, &st2)) { TTree *t = newcharset(L); loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); } else { TTree *tree = newtree(L, 2 + s1 + s2); tree->tag = TSeq; /* sequence of... */ tree->u.ps = 2 + s2; sib1(tree)->tag = TNot; /* ...not... */ memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ correctkeys(sib1(tree), joinktables(L, 1, 2)); } return 1; } static int lp_set (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); TTree *tree = newcharset(L); while (l--) { setchar(treebuffer(tree), (byte)(*s)); s++; } return 1; } static int lp_range (lua_State *L) { int arg; int top = lua_gettop(L); TTree *tree = newcharset(L); for (arg = 1; arg <= top; arg++) { int c; size_t l; const char *r = luaL_checklstring(L, arg, &l); luaL_argcheck(L, l == 2, arg, "range must have two characters"); for (c = (byte)r[0]; c <= (byte)r[1]; c++) setchar(treebuffer(tree), c); } return 1; } /* ** Look-behind predicate */ static int lp_behind (lua_State *L) { TTree *tree; TTree *tree1 = getpatt(L, 1, NULL); int n = fixedlen(tree1); luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); tree = newroot1sib(L, TBehind); tree->u.n = n; return 1; } /* ** Create a non-terminal */ static int lp_V (lua_State *L) { TTree *tree = newleaf(L, TOpenCall); luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); tree->key = addtoktable(L, 1); return 1; } /* ** Create a tree for a non-empty capture, with a body and ** optionally with an associated Lua value (at index 'labelidx' in the ** stack) */ static int capture_aux (lua_State *L, int cap, int labelidx) { TTree *tree = newroot1sib(L, TCapture); tree->cap = cap; tree->key = addtoktable(L, labelidx); return 1; } /* ** Fill a tree with an empty capture, using an empty (TTrue) sibling. */ static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { tree->tag = TCapture; tree->cap = cap; tree->key = addtoktable(L, idx); sib1(tree)->tag = TTrue; return tree; } /* ** Create a tree for an empty capture */ static TTree *newemptycap (lua_State *L, int cap, int idx) { return auxemptycap(L, newtree(L, 2), cap, idx); } /* ** Captures with syntax p / v ** (function capture, query capture, string capture, or number capture) */ static int lp_divcapture (lua_State *L) { switch (lua_type(L, 2)) { case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); case LUA_TTABLE: return capture_aux(L, Cquery, 2); case LUA_TSTRING: return capture_aux(L, Cstring, 2); case LUA_TNUMBER: { int n = lua_tointeger(L, 2); TTree *tree = newroot1sib(L, TCapture); luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); tree->cap = Cnum; tree->key = n; return 1; } default: return luaL_argerror(L, 2, "invalid replacement value"); } } static int lp_substcapture (lua_State *L) { return capture_aux(L, Csubst, 0); } static int lp_tablecapture (lua_State *L) { return capture_aux(L, Ctable, 0); } static int lp_groupcapture (lua_State *L) { if (lua_isnoneornil(L, 2)) return capture_aux(L, Cgroup, 0); else { luaL_checkstring(L, 2); return capture_aux(L, Cgroup, 2); } } static int lp_foldcapture (lua_State *L) { luaL_checktype(L, 2, LUA_TFUNCTION); return capture_aux(L, Cfold, 2); } static int lp_simplecapture (lua_State *L) { return capture_aux(L, Csimple, 0); } static int lp_poscapture (lua_State *L) { newemptycap(L, Cposition, 0); return 1; } static int lp_argcapture (lua_State *L) { int n = luaL_checkint(L, 1); TTree *tree = newemptycap(L, Carg, 0); tree->key = n; luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); return 1; } static int lp_backref (lua_State *L) { luaL_checkstring(L, 1); newemptycap(L, Cbackref, 1); return 1; } /* ** Constant capture */ static int lp_constcapture (lua_State *L) { int i; int n = lua_gettop(L); /* number of values */ if (n == 0) /* no values? */ newleaf(L, TTrue); /* no capture */ else if (n == 1) newemptycap(L, Cconst, 1); /* single constant capture */ else { /* create a group capture with all values */ TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); tree->tag = TCapture; tree->cap = Cgroup; tree->key = 0; tree = sib1(tree); for (i = 1; i <= n - 1; i++) { tree->tag = TSeq; tree->u.ps = 3; /* skip TCapture and its sibling */ auxemptycap(L, sib1(tree), Cconst, i); tree = sib2(tree); } auxemptycap(L, tree, Cconst, i); } return 1; } static int lp_matchtime (lua_State *L) { TTree *tree; luaL_checktype(L, 2, LUA_TFUNCTION); tree = newroot1sib(L, TRunTime); tree->key = addtoktable(L, 2); return 1; } /* }====================================================== */ /* ** {====================================================== ** Grammar - Tree generation ** ======================================================= */ /* ** push on the stack the index and the pattern for the ** initial rule of grammar at index 'arg' in the stack; ** also add that index into position table. */ static void getfirstrule (lua_State *L, int arg, int postab) { lua_rawgeti(L, arg, 1); /* access first element */ if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ lua_pushvalue(L, -1); /* duplicate it to use as key */ lua_gettable(L, arg); /* get associated rule */ } else { lua_pushinteger(L, 1); /* key for initial rule */ lua_insert(L, -2); /* put it before rule */ } if (!testpattern(L, -1)) { /* initial rule not a pattern? */ if (lua_isnil(L, -1)) luaL_error(L, "grammar has no initial rule"); else luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); } lua_pushvalue(L, -2); /* push key */ lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ lua_settable(L, postab); /* insert pair at position table */ } /* ** traverse grammar at index 'arg', pushing all its keys and patterns ** into the stack. Create a new table (before all pairs key-pattern) to ** collect all keys and their associated positions in the final tree ** (the "position table"). ** Return the number of rules and (in 'totalsize') the total size ** for the new tree. */ static int collectrules (lua_State *L, int arg, int *totalsize) { int n = 1; /* to count number of rules */ int postab = lua_gettop(L) + 1; /* index of position table */ int size; /* accumulator for total size */ lua_newtable(L); /* create position table */ getfirstrule(L, arg, postab); size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ lua_pushnil(L); /* prepare to traverse grammar table */ while (lua_next(L, arg) != 0) { if (lua_tonumber(L, -2) == 1 || lua_equal(L, -2, postab + 1)) { /* initial rule? */ lua_pop(L, 1); /* remove value (keep key for lua_next) */ continue; } if (!testpattern(L, -1)) /* value is not a pattern? */ luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); lua_pushvalue(L, -2); /* push key (to insert into position table) */ lua_pushinteger(L, size); lua_settable(L, postab); size += 1 + getsize(L, -1); /* update size */ lua_pushvalue(L, -2); /* push key (for next lua_next) */ n++; } *totalsize = size + 1; /* TTrue to finish list of rules */ return n; } static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { int i; TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ for (i = 0; i < n; i++) { /* add each rule into new tree */ int ridx = frule + 2*i + 1; /* index of i-th rule */ int rulesize; TTree *rn = gettree(L, ridx, &rulesize); nd->tag = TRule; nd->key = 0; nd->cap = i; /* rule number */ nd->u.ps = rulesize + 1; /* point to next rule */ memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ nd = sib2(nd); /* move to next rule */ } nd->tag = TTrue; /* finish list of rules */ } /* ** Check whether a tree has potential infinite loops */ static int checkloops (TTree *tree) { tailcall: if (tree->tag == TRep && nullable(sib1(tree))) return 1; else if (tree->tag == TGrammar) return 0; /* sub-grammars already checked */ else { switch (numsiblings[tree->tag]) { case 1: /* return checkloops(sib1(tree)); */ tree = sib1(tree); goto tailcall; case 2: if (checkloops(sib1(tree))) return 1; /* else return checkloops(sib2(tree)); */ tree = sib2(tree); goto tailcall; default: assert(numsiblings[tree->tag] == 0); return 0; } } } static int verifyerror (lua_State *L, int *passed, int npassed) { int i, j; for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ for (j = i - 1; j >= 0; j--) { if (passed[i] == passed[j]) { lua_rawgeti(L, -1, passed[i]); /* get rule's key */ return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); } } } return luaL_error(L, "too many left calls in grammar"); } /* ** Check whether a rule can be left recursive; raise an error in that ** case; otherwise return 1 iff pattern is nullable. Assume ktable at ** the top of the stack. */ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, int nullable) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: case TFalse: return nullable; /* cannot pass from here */ case TTrue: case TBehind: /* look-behind cannot have calls */ return 1; case TNot: case TAnd: case TRep: /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ tree = sib1(tree); nullable = 1; goto tailcall; case TCapture: case TRunTime: /* return verifyrule(L, sib1(tree), passed, npassed); */ tree = sib1(tree); goto tailcall; case TCall: /* return verifyrule(L, sib2(tree), passed, npassed); */ tree = sib2(tree); goto tailcall; case TSeq: /* only check 2nd child if first is nullable */ if (!verifyrule(L, sib1(tree), passed, npassed, 0)) return nullable; /* else return verifyrule(L, sib2(tree), passed, npassed); */ tree = sib2(tree); goto tailcall; case TChoice: /* must check both children */ nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ tree = sib2(tree); goto tailcall; case TRule: if (npassed >= MAXRULES) return verifyerror(L, passed, npassed); else { passed[npassed++] = tree->key; /* return verifyrule(L, sib1(tree), passed, npassed); */ tree = sib1(tree); goto tailcall; } case TGrammar: return nullable(tree); /* sub-grammar cannot be left recursive */ default: assert(0); return 0; } } static void verifygrammar (lua_State *L, TTree *grammar) { int passed[MAXRULES]; TTree *rule; /* check left-recursive rules */ for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { if (rule->key == 0) continue; /* unused rule */ verifyrule(L, sib1(rule), passed, 0, 0); } assert(rule->tag == TTrue); /* check infinite loops inside rules */ for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { if (rule->key == 0) continue; /* unused rule */ if (checkloops(sib1(rule))) { lua_rawgeti(L, -1, rule->key); /* get rule's key */ luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); } } assert(rule->tag == TTrue); } /* ** Give a name for the initial rule if it is not referenced */ static void initialrulename (lua_State *L, TTree *grammar, int frule) { if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ int n = lua_objlen(L, -1) + 1; /* index for name */ lua_pushvalue(L, frule); /* rule's name */ lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ sib1(grammar)->key = n; } } static TTree *newgrammar (lua_State *L, int arg) { int treesize; int frule = lua_gettop(L) + 2; /* position of first rule's key */ int n = collectrules(L, arg, &treesize); TTree *g = newtree(L, treesize); luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); g->tag = TGrammar; g->u.n = n; lua_newtable(L); /* create 'ktable' */ lua_setfenv(L, -2); buildgrammar(L, g, frule, n); lua_getfenv(L, -1); /* get 'ktable' for new tree */ finalfix(L, frule - 1, g, sib1(g)); initialrulename(L, g, frule); verifygrammar(L, g); lua_pop(L, 1); /* remove 'ktable' */ lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ return g; /* new table at the top of the stack */ } /* }====================================================== */ static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */ finalfix(L, 0, NULL, p->tree); lua_pop(L, 1); /* remove 'ktable' */ return compile(L, p); } static int lp_printtree (lua_State *L) { TTree *tree = getpatt(L, 1, NULL); int c = lua_toboolean(L, 2); if (c) { lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */ finalfix(L, 0, NULL, tree); lua_pop(L, 1); /* remove 'ktable' */ } printktable(L, 1); printtree(tree, 0); return 0; } static int lp_printcode (lua_State *L) { Pattern *p = getpattern(L, 1); printktable(L, 1); if (p->code == NULL) /* not compiled yet? */ prepcompile(L, p, 1); printpatt(p->code, p->codesize); return 0; } /* ** Get the initial position for the match, interpreting negative ** values from the end of the subject */ static size_t initposition (lua_State *L, size_t len) { lua_Integer ii = luaL_optinteger(L, 3, 1); if (ii > 0) { /* positive index? */ if ((size_t)ii <= len) /* inside the string? */ return (size_t)ii - 1; /* return it (corrected to 0-base) */ else return len; /* crop at the end */ } else { /* negative index */ if ((size_t)(-ii) <= len) /* inside the string? */ return len - ((size_t)(-ii)); /* return position from the end */ else return 0; /* crop at the beginning */ } } /* ** Main match function */ static int lp_match (lua_State *L) { Capture capture[INITCAPSIZE]; const char *r; size_t l; Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); const char *s = luaL_checklstring(L, SUBJIDX, &l); size_t i = initposition(L, l); int ptop = lua_gettop(L); lua_pushnil(L); /* initialize subscache */ lua_pushlightuserdata(L, capture); /* initialize caplistidx */ lua_getfenv(L, 1); /* initialize penvidx */ r = match(L, s, s + i, s + l, code, capture, ptop); if (r == NULL) { lua_pushnil(L); return 1; } return getcaptures(L, s, r, ptop); } /* ** {====================================================== ** Library creation and functions not related to matching ** ======================================================= */ static int lp_setmax (lua_State *L) { luaL_optinteger(L, 1, -1); lua_settop(L, 1); lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); return 0; } static int lp_version (lua_State *L) { lua_pushstring(L, VERSION); return 1; } static int lp_type (lua_State *L) { if (testpattern(L, 1)) lua_pushliteral(L, "pattern"); else lua_pushnil(L); return 1; } int lp_gc (lua_State *L) { Pattern *p = getpattern(L, 1); if (p->codesize > 0) reallocprog(L, p, 0); return 0; } static void createcat (lua_State *L, const char *catname, int (catf) (int)) { TTree *t = newcharset(L); int i; for (i = 0; i <= UCHAR_MAX; i++) if (catf(i)) setchar(treebuffer(t), i); lua_setfield(L, -2, catname); } static int lp_locale (lua_State *L) { if (lua_isnoneornil(L, 1)) { lua_settop(L, 0); lua_createtable(L, 0, 12); } else { luaL_checktype(L, 1, LUA_TTABLE); lua_settop(L, 1); } createcat(L, "alnum", isalnum); createcat(L, "alpha", isalpha); createcat(L, "cntrl", iscntrl); createcat(L, "digit", isdigit); createcat(L, "graph", isgraph); createcat(L, "lower", islower); createcat(L, "print", isprint); createcat(L, "punct", ispunct); createcat(L, "space", isspace); createcat(L, "upper", isupper); createcat(L, "xdigit", isxdigit); return 1; } static struct luaL_Reg pattreg[] = { {"ptree", lp_printtree}, {"pcode", lp_printcode}, {"match", lp_match}, {"B", lp_behind}, {"V", lp_V}, {"C", lp_simplecapture}, {"Cc", lp_constcapture}, {"Cmt", lp_matchtime}, {"Cb", lp_backref}, {"Carg", lp_argcapture}, {"Cp", lp_poscapture}, {"Cs", lp_substcapture}, {"Ct", lp_tablecapture}, {"Cf", lp_foldcapture}, {"Cg", lp_groupcapture}, {"P", lp_P}, {"S", lp_set}, {"R", lp_range}, {"locale", lp_locale}, {"version", lp_version}, {"setmaxstack", lp_setmax}, {"type", lp_type}, {NULL, NULL} }; static struct luaL_Reg metareg[] = { {"__mul", lp_seq}, {"__add", lp_choice}, {"__pow", lp_star}, {"__gc", lp_gc}, {"__len", lp_and}, {"__div", lp_divcapture}, {"__unm", lp_not}, {"__sub", lp_sub}, {NULL, NULL} }; LUALIB_API int luaopen_lpeg (lua_State *L); LUALIB_API int luaopen_lpeg (lua_State *L) { luaL_newmetatable(L, PATTERN_T); lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); luaL_register(L, NULL, metareg); luaL_register(L, "lpeg", pattreg); lua_pushvalue(L, -1); lua_setfield(L, -3, "__index"); return 1; } /* }====================================================== */ /* ** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include #include "lua.h" #include "lauxlib.h" /* initial size for call/backtrack stack */ #if !defined(INITBACK) #define INITBACK 100 #endif #define getoffset(p) (((p) + 1)->offset) static const Instruction giveup = {{IGiveup, 0, 0}}; /* ** {====================================================== ** Virtual Machine ** ======================================================= */ typedef struct Stack { const char *s; /* saved position (or NULL for calls) */ const Instruction *p; /* next instruction */ int caplevel; } Stack; #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) /* ** Double the size of the array of captures */ static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { Capture *newc; if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) luaL_error(L, "too many captures"); newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); memcpy(newc, cap, captop * sizeof(Capture)); lua_replace(L, caplistidx(ptop)); return newc; } /* ** Double the size of the stack */ static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { Stack *stack = getstackbase(L, ptop); Stack *newstack; int n = *stacklimit - stack; /* current stack size */ int max, newn; lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); max = lua_tointeger(L, -1); /* maximum allowed size */ lua_pop(L, 1); if (n >= max) /* already at maximum size? */ luaL_error(L, "too many pending calls/choices"); newn = 2 * n; /* new size */ if (newn > max) newn = max; newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); memcpy(newstack, stack, n * sizeof(Stack)); lua_replace(L, stackidx(ptop)); *stacklimit = newstack + newn; return newstack + n; /* return next position */ } /* ** Interpret the result of a dynamic capture: false -> fail; ** true -> keep current position; number -> next position. ** Return new subject position. 'fr' is stack index where ** is the result; 'curr' is current subject position; 'limit' ** is subject's size. */ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { lua_Integer res; if (!lua_toboolean(L, fr)) { /* false value? */ lua_settop(L, fr - 1); /* remove results */ return -1; /* and fail */ } else if (lua_isboolean(L, fr)) /* true? */ res = curr; /* keep current position */ else { res = lua_tointeger(L, fr) - 1; /* new position */ if (res < curr || res > limit) luaL_error(L, "invalid position returned by match-time capture"); } lua_remove(L, fr); /* remove first result (offset) */ return res; } /* ** Add capture values returned by a dynamic capture to the capture list ** 'base', nested inside a group capture. 'fd' indexes the first capture ** value, 'n' is the number of values (at least 1). */ static void adddyncaptures (const char *s, Capture *base, int n, int fd) { int i; /* Cgroup capture is already there */ assert(base[0].kind == Cgroup && base[0].siz == 0); base[0].idx = 0; /* make it an anonymous group */ for (i = 1; i <= n; i++) { /* add runtime captures */ base[i].kind = Cruntime; base[i].siz = 1; /* mark it as closed */ base[i].idx = fd + i - 1; /* stack index of capture value */ base[i].s = s; } base[i].kind = Cclose; /* close group */ base[i].siz = 1; base[i].s = s; } /* ** Remove dynamic captures from the Lua stack (called in case of failure) */ static int removedyncap (lua_State *L, Capture *capture, int level, int last) { int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ int top = lua_gettop(L); if (id == 0) return 0; /* no dynamic captures? */ lua_settop(L, id - 1); /* remove captures */ return top - id + 1; /* number of values removed */ } /* ** Opcode interpreter */ const char *match (lua_State *L, const char *o, const char *s, const char *e, Instruction *op, Capture *capture, int ptop) { Stack stackbase[INITBACK]; Stack *stacklimit = stackbase + INITBACK; Stack *stack = stackbase; /* point to first empty slot in stack */ int capsize = INITCAPSIZE; int captop = 0; /* point to first empty slot in captures */ int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ const Instruction *p = op; /* current instruction */ stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; lua_pushlightuserdata(L, stackbase); for (;;) { #if defined(DEBUG) printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", s, stack - getstackbase(L, ptop), ndyncap, captop); printinst(op, p); printcaplist(capture, capture + captop); #endif assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); switch ((Opcode)p->i.code) { case IEnd: { assert(stack == getstackbase(L, ptop) + 1); capture[captop].kind = Cclose; capture[captop].s = NULL; return s; } case IGiveup: { assert(stack == getstackbase(L, ptop)); return NULL; } case IRet: { assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); p = (--stack)->p; continue; } case IAny: { if (s < e) { p++; s++; } else goto fail; continue; } case ITestAny: { if (s < e) p += 2; else p += getoffset(p); continue; } case IChar: { if ((byte)*s == p->i.aux && s < e) { p++; s++; } else goto fail; continue; } case ITestChar: { if ((byte)*s == p->i.aux && s < e) p += 2; else p += getoffset(p); continue; } case ISet: { int c = (byte)*s; if (testchar((p+1)->buff, c) && s < e) { p += CHARSETINSTSIZE; s++; } else goto fail; continue; } case ITestSet: { int c = (byte)*s; if (testchar((p + 2)->buff, c) && s < e) p += 1 + CHARSETINSTSIZE; else p += getoffset(p); continue; } case IBehind: { int n = p->i.aux; if (n > s - o) goto fail; s -= n; p++; continue; } case ISpan: { for (; s < e; s++) { int c = (byte)*s; if (!testchar((p+1)->buff, c)) break; } p += CHARSETINSTSIZE; continue; } case IJmp: { p += getoffset(p); continue; } case IChoice: { if (stack == stacklimit) stack = doublestack(L, &stacklimit, ptop); stack->p = p + getoffset(p); stack->s = s; stack->caplevel = captop; stack++; p += 2; continue; } case ICall: { if (stack == stacklimit) stack = doublestack(L, &stacklimit, ptop); stack->s = NULL; stack->p = p + 2; /* save return address */ stack++; p += getoffset(p); continue; } case ICommit: { assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); stack--; p += getoffset(p); continue; } case IPartialCommit: { assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); (stack - 1)->s = s; (stack - 1)->caplevel = captop; p += getoffset(p); continue; } case IBackCommit: { assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); s = (--stack)->s; captop = stack->caplevel; p += getoffset(p); continue; } case IFailTwice: assert(stack > getstackbase(L, ptop)); stack--; /* go through */ case IFail: fail: { /* pattern failed: try to backtrack */ do { /* remove pending calls */ assert(stack > getstackbase(L, ptop)); s = (--stack)->s; } while (s == NULL); if (ndyncap > 0) /* is there matchtime captures? */ ndyncap -= removedyncap(L, capture, stack->caplevel, captop); captop = stack->caplevel; p = stack->p; continue; } case ICloseRunTime: { CapState cs; int rem, res, n; int fr = lua_gettop(L) + 1; /* stack index of first result */ cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ captop -= n; /* remove nested captures */ fr -= rem; /* 'rem' items were popped from Lua stack */ res = resdyncaptures(L, fr, s - o, e - o); /* get result */ if (res == -1) /* fail? */ goto fail; s = o + res; /* else update current position */ n = lua_gettop(L) - fr + 1; /* number of new captures */ ndyncap += n - rem; /* update number of dynamic captures */ if (n > 0) { /* any new capture? */ if ((captop += n + 2) >= capsize) { capture = doublecap(L, capture, captop, ptop); capsize = 2 * captop; } /* add new captures to 'capture' list */ adddyncaptures(s, capture + captop - n - 2, n, fr); } p++; continue; } case ICloseCapture: { const char *s1 = s; assert(captop > 0); /* if possible, turn capture into a full capture */ if (capture[captop - 1].siz == 0 && s1 - capture[captop - 1].s < UCHAR_MAX) { capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; p++; continue; } else { capture[captop].siz = 1; /* mark entry as closed */ capture[captop].s = s; goto pushcapture; } } case IOpenCapture: capture[captop].siz = 0; /* mark entry as open */ capture[captop].s = s; goto pushcapture; case IFullCapture: capture[captop].siz = getoff(p) + 1; /* save capture size */ capture[captop].s = s - getoff(p); /* goto pushcapture; */ pushcapture: { capture[captop].idx = p->i.key; capture[captop].kind = getkind(p); if (++captop >= capsize) { capture = doublecap(L, capture, captop, ptop); capsize = 2 * captop; } p++; continue; } default: assert(0); return NULL; } } } /* }====================================================== */