The Squirrel interpreter. See http://www.squirrel-lang.org/
sqstdlib/sqstdrex.cpp@3:7268a3ceaffc, 2014-12-16 (annotated)
- Committer:
- jhnwkmn
- Date:
- Tue Dec 16 11:39:42 2014 +0000
- Revision:
- 3:7268a3ceaffc
- Parent:
- 0:97a4f8cc534c
Accepts \r as line terminator as well.
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
jhnwkmn | 0:97a4f8cc534c | 1 | /* see copyright notice in squirrel.h */ |
jhnwkmn | 0:97a4f8cc534c | 2 | #include <squirrel.h> |
jhnwkmn | 0:97a4f8cc534c | 3 | #include <string.h> |
jhnwkmn | 0:97a4f8cc534c | 4 | #include <ctype.h> |
jhnwkmn | 0:97a4f8cc534c | 5 | #include <setjmp.h> |
jhnwkmn | 0:97a4f8cc534c | 6 | #include <sqstdstring.h> |
jhnwkmn | 0:97a4f8cc534c | 7 | |
jhnwkmn | 0:97a4f8cc534c | 8 | #ifdef _UINCODE |
jhnwkmn | 0:97a4f8cc534c | 9 | #define scisprint iswprint |
jhnwkmn | 0:97a4f8cc534c | 10 | #else |
jhnwkmn | 0:97a4f8cc534c | 11 | #define scisprint isprint |
jhnwkmn | 0:97a4f8cc534c | 12 | #endif |
jhnwkmn | 0:97a4f8cc534c | 13 | |
jhnwkmn | 0:97a4f8cc534c | 14 | #ifdef _DEBUG |
jhnwkmn | 0:97a4f8cc534c | 15 | #include <stdio.h> |
jhnwkmn | 0:97a4f8cc534c | 16 | |
jhnwkmn | 0:97a4f8cc534c | 17 | static const SQChar *g_nnames[] = |
jhnwkmn | 0:97a4f8cc534c | 18 | { |
jhnwkmn | 0:97a4f8cc534c | 19 | _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"), |
jhnwkmn | 0:97a4f8cc534c | 20 | _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"), |
jhnwkmn | 0:97a4f8cc534c | 21 | _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"), |
jhnwkmn | 0:97a4f8cc534c | 22 | _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB") |
jhnwkmn | 0:97a4f8cc534c | 23 | }; |
jhnwkmn | 0:97a4f8cc534c | 24 | |
jhnwkmn | 0:97a4f8cc534c | 25 | #endif |
jhnwkmn | 0:97a4f8cc534c | 26 | |
jhnwkmn | 0:97a4f8cc534c | 27 | #define OP_GREEDY (MAX_CHAR+1) // * + ? {n} |
jhnwkmn | 0:97a4f8cc534c | 28 | #define OP_OR (MAX_CHAR+2) |
jhnwkmn | 0:97a4f8cc534c | 29 | #define OP_EXPR (MAX_CHAR+3) //parentesis () |
jhnwkmn | 0:97a4f8cc534c | 30 | #define OP_NOCAPEXPR (MAX_CHAR+4) //parentesis (?:) |
jhnwkmn | 0:97a4f8cc534c | 31 | #define OP_DOT (MAX_CHAR+5) |
jhnwkmn | 0:97a4f8cc534c | 32 | #define OP_CLASS (MAX_CHAR+6) |
jhnwkmn | 0:97a4f8cc534c | 33 | #define OP_CCLASS (MAX_CHAR+7) |
jhnwkmn | 0:97a4f8cc534c | 34 | #define OP_NCLASS (MAX_CHAR+8) //negates class the [^ |
jhnwkmn | 0:97a4f8cc534c | 35 | #define OP_RANGE (MAX_CHAR+9) |
jhnwkmn | 0:97a4f8cc534c | 36 | #define OP_CHAR (MAX_CHAR+10) |
jhnwkmn | 0:97a4f8cc534c | 37 | #define OP_EOL (MAX_CHAR+11) |
jhnwkmn | 0:97a4f8cc534c | 38 | #define OP_BOL (MAX_CHAR+12) |
jhnwkmn | 0:97a4f8cc534c | 39 | #define OP_WB (MAX_CHAR+13) |
jhnwkmn | 0:97a4f8cc534c | 40 | |
jhnwkmn | 0:97a4f8cc534c | 41 | #define SQREX_SYMBOL_ANY_CHAR ('.') |
jhnwkmn | 0:97a4f8cc534c | 42 | #define SQREX_SYMBOL_GREEDY_ONE_OR_MORE ('+') |
jhnwkmn | 0:97a4f8cc534c | 43 | #define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*') |
jhnwkmn | 0:97a4f8cc534c | 44 | #define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?') |
jhnwkmn | 0:97a4f8cc534c | 45 | #define SQREX_SYMBOL_BRANCH ('|') |
jhnwkmn | 0:97a4f8cc534c | 46 | #define SQREX_SYMBOL_END_OF_STRING ('$') |
jhnwkmn | 0:97a4f8cc534c | 47 | #define SQREX_SYMBOL_BEGINNING_OF_STRING ('^') |
jhnwkmn | 0:97a4f8cc534c | 48 | #define SQREX_SYMBOL_ESCAPE_CHAR ('\\') |
jhnwkmn | 0:97a4f8cc534c | 49 | |
jhnwkmn | 0:97a4f8cc534c | 50 | |
jhnwkmn | 0:97a4f8cc534c | 51 | typedef int SQRexNodeType; |
jhnwkmn | 0:97a4f8cc534c | 52 | |
jhnwkmn | 0:97a4f8cc534c | 53 | typedef struct tagSQRexNode{ |
jhnwkmn | 0:97a4f8cc534c | 54 | SQRexNodeType type; |
jhnwkmn | 0:97a4f8cc534c | 55 | SQInteger left; |
jhnwkmn | 0:97a4f8cc534c | 56 | SQInteger right; |
jhnwkmn | 0:97a4f8cc534c | 57 | SQInteger next; |
jhnwkmn | 0:97a4f8cc534c | 58 | }SQRexNode; |
jhnwkmn | 0:97a4f8cc534c | 59 | |
jhnwkmn | 0:97a4f8cc534c | 60 | struct SQRex{ |
jhnwkmn | 0:97a4f8cc534c | 61 | const SQChar *_eol; |
jhnwkmn | 0:97a4f8cc534c | 62 | const SQChar *_bol; |
jhnwkmn | 0:97a4f8cc534c | 63 | const SQChar *_p; |
jhnwkmn | 0:97a4f8cc534c | 64 | SQInteger _first; |
jhnwkmn | 0:97a4f8cc534c | 65 | SQInteger _op; |
jhnwkmn | 0:97a4f8cc534c | 66 | SQRexNode *_nodes; |
jhnwkmn | 0:97a4f8cc534c | 67 | SQInteger _nallocated; |
jhnwkmn | 0:97a4f8cc534c | 68 | SQInteger _nsize; |
jhnwkmn | 0:97a4f8cc534c | 69 | SQInteger _nsubexpr; |
jhnwkmn | 0:97a4f8cc534c | 70 | SQRexMatch *_matches; |
jhnwkmn | 0:97a4f8cc534c | 71 | SQInteger _currsubexp; |
jhnwkmn | 0:97a4f8cc534c | 72 | void *_jmpbuf; |
jhnwkmn | 0:97a4f8cc534c | 73 | const SQChar **_error; |
jhnwkmn | 0:97a4f8cc534c | 74 | }; |
jhnwkmn | 0:97a4f8cc534c | 75 | |
jhnwkmn | 0:97a4f8cc534c | 76 | static SQInteger sqstd_rex_list(SQRex *exp); |
jhnwkmn | 0:97a4f8cc534c | 77 | |
jhnwkmn | 0:97a4f8cc534c | 78 | static SQInteger sqstd_rex_newnode(SQRex *exp, SQRexNodeType type) |
jhnwkmn | 0:97a4f8cc534c | 79 | { |
jhnwkmn | 0:97a4f8cc534c | 80 | SQRexNode n; |
jhnwkmn | 0:97a4f8cc534c | 81 | n.type = type; |
jhnwkmn | 0:97a4f8cc534c | 82 | n.next = n.right = n.left = -1; |
jhnwkmn | 0:97a4f8cc534c | 83 | if(type == OP_EXPR) |
jhnwkmn | 0:97a4f8cc534c | 84 | n.right = exp->_nsubexpr++; |
jhnwkmn | 0:97a4f8cc534c | 85 | if(exp->_nallocated < (exp->_nsize + 1)) { |
jhnwkmn | 0:97a4f8cc534c | 86 | SQInteger oldsize = exp->_nallocated; |
jhnwkmn | 0:97a4f8cc534c | 87 | exp->_nallocated *= 2; |
jhnwkmn | 0:97a4f8cc534c | 88 | exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode)); |
jhnwkmn | 0:97a4f8cc534c | 89 | } |
jhnwkmn | 0:97a4f8cc534c | 90 | exp->_nodes[exp->_nsize++] = n; |
jhnwkmn | 0:97a4f8cc534c | 91 | SQInteger newid = exp->_nsize - 1; |
jhnwkmn | 0:97a4f8cc534c | 92 | return (SQInteger)newid; |
jhnwkmn | 0:97a4f8cc534c | 93 | } |
jhnwkmn | 0:97a4f8cc534c | 94 | |
jhnwkmn | 0:97a4f8cc534c | 95 | static void sqstd_rex_error(SQRex *exp,const SQChar *error) |
jhnwkmn | 0:97a4f8cc534c | 96 | { |
jhnwkmn | 0:97a4f8cc534c | 97 | if(exp->_error) *exp->_error = error; |
jhnwkmn | 0:97a4f8cc534c | 98 | longjmp(*((jmp_buf*)exp->_jmpbuf),-1); |
jhnwkmn | 0:97a4f8cc534c | 99 | } |
jhnwkmn | 0:97a4f8cc534c | 100 | |
jhnwkmn | 0:97a4f8cc534c | 101 | static void sqstd_rex_expect(SQRex *exp, SQInteger n){ |
jhnwkmn | 0:97a4f8cc534c | 102 | if((*exp->_p) != n) |
jhnwkmn | 0:97a4f8cc534c | 103 | sqstd_rex_error(exp, _SC("expected paren")); |
jhnwkmn | 0:97a4f8cc534c | 104 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 105 | } |
jhnwkmn | 0:97a4f8cc534c | 106 | |
jhnwkmn | 0:97a4f8cc534c | 107 | static SQChar sqstd_rex_escapechar(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 108 | { |
jhnwkmn | 0:97a4f8cc534c | 109 | if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){ |
jhnwkmn | 0:97a4f8cc534c | 110 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 111 | switch(*exp->_p) { |
jhnwkmn | 0:97a4f8cc534c | 112 | case 'v': exp->_p++; return '\v'; |
jhnwkmn | 0:97a4f8cc534c | 113 | case 'n': exp->_p++; return '\n'; |
jhnwkmn | 0:97a4f8cc534c | 114 | case 't': exp->_p++; return '\t'; |
jhnwkmn | 0:97a4f8cc534c | 115 | case 'r': exp->_p++; return '\r'; |
jhnwkmn | 0:97a4f8cc534c | 116 | case 'f': exp->_p++; return '\f'; |
jhnwkmn | 0:97a4f8cc534c | 117 | default: return (*exp->_p++); |
jhnwkmn | 0:97a4f8cc534c | 118 | } |
jhnwkmn | 0:97a4f8cc534c | 119 | } else if(!scisprint(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected")); |
jhnwkmn | 0:97a4f8cc534c | 120 | return (*exp->_p++); |
jhnwkmn | 0:97a4f8cc534c | 121 | } |
jhnwkmn | 0:97a4f8cc534c | 122 | |
jhnwkmn | 0:97a4f8cc534c | 123 | static SQInteger sqstd_rex_charclass(SQRex *exp,SQInteger classid) |
jhnwkmn | 0:97a4f8cc534c | 124 | { |
jhnwkmn | 0:97a4f8cc534c | 125 | SQInteger n = sqstd_rex_newnode(exp,OP_CCLASS); |
jhnwkmn | 0:97a4f8cc534c | 126 | exp->_nodes[n].left = classid; |
jhnwkmn | 0:97a4f8cc534c | 127 | return n; |
jhnwkmn | 0:97a4f8cc534c | 128 | } |
jhnwkmn | 0:97a4f8cc534c | 129 | |
jhnwkmn | 0:97a4f8cc534c | 130 | static SQInteger sqstd_rex_charnode(SQRex *exp,SQBool isclass) |
jhnwkmn | 0:97a4f8cc534c | 131 | { |
jhnwkmn | 0:97a4f8cc534c | 132 | SQChar t; |
jhnwkmn | 0:97a4f8cc534c | 133 | if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) { |
jhnwkmn | 0:97a4f8cc534c | 134 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 135 | switch(*exp->_p) { |
jhnwkmn | 0:97a4f8cc534c | 136 | case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n'); |
jhnwkmn | 0:97a4f8cc534c | 137 | case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t'); |
jhnwkmn | 0:97a4f8cc534c | 138 | case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r'); |
jhnwkmn | 0:97a4f8cc534c | 139 | case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f'); |
jhnwkmn | 0:97a4f8cc534c | 140 | case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v'); |
jhnwkmn | 0:97a4f8cc534c | 141 | case 'a': case 'A': case 'w': case 'W': case 's': case 'S': |
jhnwkmn | 0:97a4f8cc534c | 142 | case 'd': case 'D': case 'x': case 'X': case 'c': case 'C': |
jhnwkmn | 0:97a4f8cc534c | 143 | case 'p': case 'P': case 'l': case 'u': |
jhnwkmn | 0:97a4f8cc534c | 144 | { |
jhnwkmn | 0:97a4f8cc534c | 145 | t = *exp->_p; exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 146 | return sqstd_rex_charclass(exp,t); |
jhnwkmn | 0:97a4f8cc534c | 147 | } |
jhnwkmn | 0:97a4f8cc534c | 148 | case 'b': |
jhnwkmn | 0:97a4f8cc534c | 149 | case 'B': |
jhnwkmn | 0:97a4f8cc534c | 150 | if(!isclass) { |
jhnwkmn | 0:97a4f8cc534c | 151 | SQInteger node = sqstd_rex_newnode(exp,OP_WB); |
jhnwkmn | 0:97a4f8cc534c | 152 | exp->_nodes[node].left = *exp->_p; |
jhnwkmn | 0:97a4f8cc534c | 153 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 154 | return node; |
jhnwkmn | 0:97a4f8cc534c | 155 | } //else default |
jhnwkmn | 0:97a4f8cc534c | 156 | default: |
jhnwkmn | 0:97a4f8cc534c | 157 | t = *exp->_p; exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 158 | return sqstd_rex_newnode(exp,t); |
jhnwkmn | 0:97a4f8cc534c | 159 | } |
jhnwkmn | 0:97a4f8cc534c | 160 | } |
jhnwkmn | 0:97a4f8cc534c | 161 | else if(!scisprint(*exp->_p)) { |
jhnwkmn | 0:97a4f8cc534c | 162 | |
jhnwkmn | 0:97a4f8cc534c | 163 | sqstd_rex_error(exp,_SC("letter expected")); |
jhnwkmn | 0:97a4f8cc534c | 164 | } |
jhnwkmn | 0:97a4f8cc534c | 165 | t = *exp->_p; exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 166 | return sqstd_rex_newnode(exp,t); |
jhnwkmn | 0:97a4f8cc534c | 167 | } |
jhnwkmn | 0:97a4f8cc534c | 168 | static SQInteger sqstd_rex_class(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 169 | { |
jhnwkmn | 0:97a4f8cc534c | 170 | SQInteger ret = -1; |
jhnwkmn | 0:97a4f8cc534c | 171 | SQInteger first = -1,chain; |
jhnwkmn | 0:97a4f8cc534c | 172 | if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){ |
jhnwkmn | 0:97a4f8cc534c | 173 | ret = sqstd_rex_newnode(exp,OP_NCLASS); |
jhnwkmn | 0:97a4f8cc534c | 174 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 175 | }else ret = sqstd_rex_newnode(exp,OP_CLASS); |
jhnwkmn | 0:97a4f8cc534c | 176 | |
jhnwkmn | 0:97a4f8cc534c | 177 | if(*exp->_p == ']') sqstd_rex_error(exp,_SC("empty class")); |
jhnwkmn | 0:97a4f8cc534c | 178 | chain = ret; |
jhnwkmn | 0:97a4f8cc534c | 179 | while(*exp->_p != ']' && exp->_p != exp->_eol) { |
jhnwkmn | 0:97a4f8cc534c | 180 | if(*exp->_p == '-' && first != -1){ |
jhnwkmn | 0:97a4f8cc534c | 181 | SQInteger r; |
jhnwkmn | 0:97a4f8cc534c | 182 | if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range")); |
jhnwkmn | 0:97a4f8cc534c | 183 | r = sqstd_rex_newnode(exp,OP_RANGE); |
jhnwkmn | 0:97a4f8cc534c | 184 | if(exp->_nodes[first].type>*exp->_p) sqstd_rex_error(exp,_SC("invalid range")); |
jhnwkmn | 0:97a4f8cc534c | 185 | if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges")); |
jhnwkmn | 0:97a4f8cc534c | 186 | exp->_nodes[r].left = exp->_nodes[first].type; |
jhnwkmn | 0:97a4f8cc534c | 187 | SQInteger t = sqstd_rex_escapechar(exp); |
jhnwkmn | 0:97a4f8cc534c | 188 | exp->_nodes[r].right = t; |
jhnwkmn | 0:97a4f8cc534c | 189 | exp->_nodes[chain].next = r; |
jhnwkmn | 0:97a4f8cc534c | 190 | chain = r; |
jhnwkmn | 0:97a4f8cc534c | 191 | first = -1; |
jhnwkmn | 0:97a4f8cc534c | 192 | } |
jhnwkmn | 0:97a4f8cc534c | 193 | else{ |
jhnwkmn | 0:97a4f8cc534c | 194 | if(first!=-1){ |
jhnwkmn | 0:97a4f8cc534c | 195 | SQInteger c = first; |
jhnwkmn | 0:97a4f8cc534c | 196 | exp->_nodes[chain].next = c; |
jhnwkmn | 0:97a4f8cc534c | 197 | chain = c; |
jhnwkmn | 0:97a4f8cc534c | 198 | first = sqstd_rex_charnode(exp,SQTrue); |
jhnwkmn | 0:97a4f8cc534c | 199 | } |
jhnwkmn | 0:97a4f8cc534c | 200 | else{ |
jhnwkmn | 0:97a4f8cc534c | 201 | first = sqstd_rex_charnode(exp,SQTrue); |
jhnwkmn | 0:97a4f8cc534c | 202 | } |
jhnwkmn | 0:97a4f8cc534c | 203 | } |
jhnwkmn | 0:97a4f8cc534c | 204 | } |
jhnwkmn | 0:97a4f8cc534c | 205 | if(first!=-1){ |
jhnwkmn | 0:97a4f8cc534c | 206 | SQInteger c = first; |
jhnwkmn | 0:97a4f8cc534c | 207 | exp->_nodes[chain].next = c; |
jhnwkmn | 0:97a4f8cc534c | 208 | chain = c; |
jhnwkmn | 0:97a4f8cc534c | 209 | first = -1; |
jhnwkmn | 0:97a4f8cc534c | 210 | } |
jhnwkmn | 0:97a4f8cc534c | 211 | /* hack? */ |
jhnwkmn | 0:97a4f8cc534c | 212 | exp->_nodes[ret].left = exp->_nodes[ret].next; |
jhnwkmn | 0:97a4f8cc534c | 213 | exp->_nodes[ret].next = -1; |
jhnwkmn | 0:97a4f8cc534c | 214 | return ret; |
jhnwkmn | 0:97a4f8cc534c | 215 | } |
jhnwkmn | 0:97a4f8cc534c | 216 | |
jhnwkmn | 0:97a4f8cc534c | 217 | static SQInteger sqstd_rex_parsenumber(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 218 | { |
jhnwkmn | 0:97a4f8cc534c | 219 | SQInteger ret = *exp->_p-'0'; |
jhnwkmn | 0:97a4f8cc534c | 220 | SQInteger positions = 10; |
jhnwkmn | 0:97a4f8cc534c | 221 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 222 | while(isdigit(*exp->_p)) { |
jhnwkmn | 0:97a4f8cc534c | 223 | ret = ret*10+(*exp->_p++-'0'); |
jhnwkmn | 0:97a4f8cc534c | 224 | if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant")); |
jhnwkmn | 0:97a4f8cc534c | 225 | positions *= 10; |
jhnwkmn | 0:97a4f8cc534c | 226 | }; |
jhnwkmn | 0:97a4f8cc534c | 227 | return ret; |
jhnwkmn | 0:97a4f8cc534c | 228 | } |
jhnwkmn | 0:97a4f8cc534c | 229 | |
jhnwkmn | 0:97a4f8cc534c | 230 | static SQInteger sqstd_rex_element(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 231 | { |
jhnwkmn | 0:97a4f8cc534c | 232 | SQInteger ret = -1; |
jhnwkmn | 0:97a4f8cc534c | 233 | switch(*exp->_p) |
jhnwkmn | 0:97a4f8cc534c | 234 | { |
jhnwkmn | 0:97a4f8cc534c | 235 | case '(': { |
jhnwkmn | 0:97a4f8cc534c | 236 | SQInteger expr; |
jhnwkmn | 0:97a4f8cc534c | 237 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 238 | |
jhnwkmn | 0:97a4f8cc534c | 239 | |
jhnwkmn | 0:97a4f8cc534c | 240 | if(*exp->_p =='?') { |
jhnwkmn | 0:97a4f8cc534c | 241 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 242 | sqstd_rex_expect(exp,':'); |
jhnwkmn | 0:97a4f8cc534c | 243 | expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR); |
jhnwkmn | 0:97a4f8cc534c | 244 | } |
jhnwkmn | 0:97a4f8cc534c | 245 | else |
jhnwkmn | 0:97a4f8cc534c | 246 | expr = sqstd_rex_newnode(exp,OP_EXPR); |
jhnwkmn | 0:97a4f8cc534c | 247 | SQInteger newn = sqstd_rex_list(exp); |
jhnwkmn | 0:97a4f8cc534c | 248 | exp->_nodes[expr].left = newn; |
jhnwkmn | 0:97a4f8cc534c | 249 | ret = expr; |
jhnwkmn | 0:97a4f8cc534c | 250 | sqstd_rex_expect(exp,')'); |
jhnwkmn | 0:97a4f8cc534c | 251 | } |
jhnwkmn | 0:97a4f8cc534c | 252 | break; |
jhnwkmn | 0:97a4f8cc534c | 253 | case '[': |
jhnwkmn | 0:97a4f8cc534c | 254 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 255 | ret = sqstd_rex_class(exp); |
jhnwkmn | 0:97a4f8cc534c | 256 | sqstd_rex_expect(exp,']'); |
jhnwkmn | 0:97a4f8cc534c | 257 | break; |
jhnwkmn | 0:97a4f8cc534c | 258 | case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break; |
jhnwkmn | 0:97a4f8cc534c | 259 | case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break; |
jhnwkmn | 0:97a4f8cc534c | 260 | default: |
jhnwkmn | 0:97a4f8cc534c | 261 | ret = sqstd_rex_charnode(exp,SQFalse); |
jhnwkmn | 0:97a4f8cc534c | 262 | break; |
jhnwkmn | 0:97a4f8cc534c | 263 | } |
jhnwkmn | 0:97a4f8cc534c | 264 | |
jhnwkmn | 0:97a4f8cc534c | 265 | |
jhnwkmn | 0:97a4f8cc534c | 266 | SQBool isgreedy = SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 267 | unsigned short p0 = 0, p1 = 0; |
jhnwkmn | 0:97a4f8cc534c | 268 | switch(*exp->_p){ |
jhnwkmn | 0:97a4f8cc534c | 269 | case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; isgreedy = SQTrue; break; |
jhnwkmn | 0:97a4f8cc534c | 270 | case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; isgreedy = SQTrue; break; |
jhnwkmn | 0:97a4f8cc534c | 271 | case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; isgreedy = SQTrue; break; |
jhnwkmn | 0:97a4f8cc534c | 272 | case '{': |
jhnwkmn | 0:97a4f8cc534c | 273 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 274 | if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected")); |
jhnwkmn | 0:97a4f8cc534c | 275 | p0 = (unsigned short)sqstd_rex_parsenumber(exp); |
jhnwkmn | 0:97a4f8cc534c | 276 | /*******************************/ |
jhnwkmn | 0:97a4f8cc534c | 277 | switch(*exp->_p) { |
jhnwkmn | 0:97a4f8cc534c | 278 | case '}': |
jhnwkmn | 0:97a4f8cc534c | 279 | p1 = p0; exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 280 | break; |
jhnwkmn | 0:97a4f8cc534c | 281 | case ',': |
jhnwkmn | 0:97a4f8cc534c | 282 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 283 | p1 = 0xFFFF; |
jhnwkmn | 0:97a4f8cc534c | 284 | if(isdigit(*exp->_p)){ |
jhnwkmn | 0:97a4f8cc534c | 285 | p1 = (unsigned short)sqstd_rex_parsenumber(exp); |
jhnwkmn | 0:97a4f8cc534c | 286 | } |
jhnwkmn | 0:97a4f8cc534c | 287 | sqstd_rex_expect(exp,'}'); |
jhnwkmn | 0:97a4f8cc534c | 288 | break; |
jhnwkmn | 0:97a4f8cc534c | 289 | default: |
jhnwkmn | 0:97a4f8cc534c | 290 | sqstd_rex_error(exp,_SC(", or } expected")); |
jhnwkmn | 0:97a4f8cc534c | 291 | } |
jhnwkmn | 0:97a4f8cc534c | 292 | /*******************************/ |
jhnwkmn | 0:97a4f8cc534c | 293 | isgreedy = SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 294 | break; |
jhnwkmn | 0:97a4f8cc534c | 295 | |
jhnwkmn | 0:97a4f8cc534c | 296 | } |
jhnwkmn | 0:97a4f8cc534c | 297 | if(isgreedy) { |
jhnwkmn | 0:97a4f8cc534c | 298 | SQInteger nnode = sqstd_rex_newnode(exp,OP_GREEDY); |
jhnwkmn | 0:97a4f8cc534c | 299 | exp->_nodes[nnode].left = ret; |
jhnwkmn | 0:97a4f8cc534c | 300 | exp->_nodes[nnode].right = ((p0)<<16)|p1; |
jhnwkmn | 0:97a4f8cc534c | 301 | ret = nnode; |
jhnwkmn | 0:97a4f8cc534c | 302 | } |
jhnwkmn | 0:97a4f8cc534c | 303 | |
jhnwkmn | 0:97a4f8cc534c | 304 | if((*exp->_p != SQREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE) && (*exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) { |
jhnwkmn | 0:97a4f8cc534c | 305 | SQInteger nnode = sqstd_rex_element(exp); |
jhnwkmn | 0:97a4f8cc534c | 306 | exp->_nodes[ret].next = nnode; |
jhnwkmn | 0:97a4f8cc534c | 307 | } |
jhnwkmn | 0:97a4f8cc534c | 308 | |
jhnwkmn | 0:97a4f8cc534c | 309 | return ret; |
jhnwkmn | 0:97a4f8cc534c | 310 | } |
jhnwkmn | 0:97a4f8cc534c | 311 | |
jhnwkmn | 0:97a4f8cc534c | 312 | static SQInteger sqstd_rex_list(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 313 | { |
jhnwkmn | 0:97a4f8cc534c | 314 | SQInteger ret=-1,e; |
jhnwkmn | 0:97a4f8cc534c | 315 | if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) { |
jhnwkmn | 0:97a4f8cc534c | 316 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 317 | ret = sqstd_rex_newnode(exp,OP_BOL); |
jhnwkmn | 0:97a4f8cc534c | 318 | } |
jhnwkmn | 0:97a4f8cc534c | 319 | e = sqstd_rex_element(exp); |
jhnwkmn | 0:97a4f8cc534c | 320 | if(ret != -1) { |
jhnwkmn | 0:97a4f8cc534c | 321 | exp->_nodes[ret].next = e; |
jhnwkmn | 0:97a4f8cc534c | 322 | } |
jhnwkmn | 0:97a4f8cc534c | 323 | else ret = e; |
jhnwkmn | 0:97a4f8cc534c | 324 | |
jhnwkmn | 0:97a4f8cc534c | 325 | if(*exp->_p == SQREX_SYMBOL_BRANCH) { |
jhnwkmn | 0:97a4f8cc534c | 326 | SQInteger temp,tright; |
jhnwkmn | 0:97a4f8cc534c | 327 | exp->_p++; |
jhnwkmn | 0:97a4f8cc534c | 328 | temp = sqstd_rex_newnode(exp,OP_OR); |
jhnwkmn | 0:97a4f8cc534c | 329 | exp->_nodes[temp].left = ret; |
jhnwkmn | 0:97a4f8cc534c | 330 | tright = sqstd_rex_list(exp); |
jhnwkmn | 0:97a4f8cc534c | 331 | exp->_nodes[temp].right = tright; |
jhnwkmn | 0:97a4f8cc534c | 332 | ret = temp; |
jhnwkmn | 0:97a4f8cc534c | 333 | } |
jhnwkmn | 0:97a4f8cc534c | 334 | return ret; |
jhnwkmn | 0:97a4f8cc534c | 335 | } |
jhnwkmn | 0:97a4f8cc534c | 336 | |
jhnwkmn | 0:97a4f8cc534c | 337 | static SQBool sqstd_rex_matchcclass(SQInteger cclass,SQChar c) |
jhnwkmn | 0:97a4f8cc534c | 338 | { |
jhnwkmn | 0:97a4f8cc534c | 339 | switch(cclass) { |
jhnwkmn | 0:97a4f8cc534c | 340 | case 'a': return isalpha(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 341 | case 'A': return !isalpha(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 342 | case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 343 | case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 344 | case 's': return isspace(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 345 | case 'S': return !isspace(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 346 | case 'd': return isdigit(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 347 | case 'D': return !isdigit(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 348 | case 'x': return isxdigit(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 349 | case 'X': return !isxdigit(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 350 | case 'c': return iscntrl(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 351 | case 'C': return !iscntrl(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 352 | case 'p': return ispunct(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 353 | case 'P': return !ispunct(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 354 | case 'l': return islower(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 355 | case 'u': return isupper(c)?SQTrue:SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 356 | } |
jhnwkmn | 0:97a4f8cc534c | 357 | return SQFalse; /*cannot happen*/ |
jhnwkmn | 0:97a4f8cc534c | 358 | } |
jhnwkmn | 0:97a4f8cc534c | 359 | |
jhnwkmn | 0:97a4f8cc534c | 360 | static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c) |
jhnwkmn | 0:97a4f8cc534c | 361 | { |
jhnwkmn | 0:97a4f8cc534c | 362 | do { |
jhnwkmn | 0:97a4f8cc534c | 363 | switch(node->type) { |
jhnwkmn | 0:97a4f8cc534c | 364 | case OP_RANGE: |
jhnwkmn | 0:97a4f8cc534c | 365 | if(c >= node->left && c <= node->right) return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 366 | break; |
jhnwkmn | 0:97a4f8cc534c | 367 | case OP_CCLASS: |
jhnwkmn | 0:97a4f8cc534c | 368 | if(sqstd_rex_matchcclass(node->left,c)) return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 369 | break; |
jhnwkmn | 0:97a4f8cc534c | 370 | default: |
jhnwkmn | 0:97a4f8cc534c | 371 | if(c == node->type)return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 372 | } |
jhnwkmn | 0:97a4f8cc534c | 373 | } while((node->next != -1) && (node = &exp->_nodes[node->next])); |
jhnwkmn | 0:97a4f8cc534c | 374 | return SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 375 | } |
jhnwkmn | 0:97a4f8cc534c | 376 | |
jhnwkmn | 0:97a4f8cc534c | 377 | static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str,SQRexNode *next) |
jhnwkmn | 0:97a4f8cc534c | 378 | { |
jhnwkmn | 0:97a4f8cc534c | 379 | |
jhnwkmn | 0:97a4f8cc534c | 380 | SQRexNodeType type = node->type; |
jhnwkmn | 0:97a4f8cc534c | 381 | switch(type) { |
jhnwkmn | 0:97a4f8cc534c | 382 | case OP_GREEDY: { |
jhnwkmn | 0:97a4f8cc534c | 383 | //SQRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL; |
jhnwkmn | 0:97a4f8cc534c | 384 | SQRexNode *greedystop = NULL; |
jhnwkmn | 0:97a4f8cc534c | 385 | SQInteger p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0; |
jhnwkmn | 0:97a4f8cc534c | 386 | const SQChar *s=str, *good = str; |
jhnwkmn | 0:97a4f8cc534c | 387 | |
jhnwkmn | 0:97a4f8cc534c | 388 | if(node->next != -1) { |
jhnwkmn | 0:97a4f8cc534c | 389 | greedystop = &exp->_nodes[node->next]; |
jhnwkmn | 0:97a4f8cc534c | 390 | } |
jhnwkmn | 0:97a4f8cc534c | 391 | else { |
jhnwkmn | 0:97a4f8cc534c | 392 | greedystop = next; |
jhnwkmn | 0:97a4f8cc534c | 393 | } |
jhnwkmn | 0:97a4f8cc534c | 394 | |
jhnwkmn | 0:97a4f8cc534c | 395 | while((nmaches == 0xFFFF || nmaches < p1)) { |
jhnwkmn | 0:97a4f8cc534c | 396 | |
jhnwkmn | 0:97a4f8cc534c | 397 | const SQChar *stop; |
jhnwkmn | 0:97a4f8cc534c | 398 | if(!(s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s,greedystop))) |
jhnwkmn | 0:97a4f8cc534c | 399 | break; |
jhnwkmn | 0:97a4f8cc534c | 400 | nmaches++; |
jhnwkmn | 0:97a4f8cc534c | 401 | good=s; |
jhnwkmn | 0:97a4f8cc534c | 402 | if(greedystop) { |
jhnwkmn | 0:97a4f8cc534c | 403 | //checks that 0 matches satisfy the expression(if so skips) |
jhnwkmn | 0:97a4f8cc534c | 404 | //if not would always stop(for instance if is a '?') |
jhnwkmn | 0:97a4f8cc534c | 405 | if(greedystop->type != OP_GREEDY || |
jhnwkmn | 0:97a4f8cc534c | 406 | (greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0)) |
jhnwkmn | 0:97a4f8cc534c | 407 | { |
jhnwkmn | 0:97a4f8cc534c | 408 | SQRexNode *gnext = NULL; |
jhnwkmn | 0:97a4f8cc534c | 409 | if(greedystop->next != -1) { |
jhnwkmn | 0:97a4f8cc534c | 410 | gnext = &exp->_nodes[greedystop->next]; |
jhnwkmn | 0:97a4f8cc534c | 411 | }else if(next && next->next != -1){ |
jhnwkmn | 0:97a4f8cc534c | 412 | gnext = &exp->_nodes[next->next]; |
jhnwkmn | 0:97a4f8cc534c | 413 | } |
jhnwkmn | 0:97a4f8cc534c | 414 | stop = sqstd_rex_matchnode(exp,greedystop,s,gnext); |
jhnwkmn | 0:97a4f8cc534c | 415 | if(stop) { |
jhnwkmn | 0:97a4f8cc534c | 416 | //if satisfied stop it |
jhnwkmn | 0:97a4f8cc534c | 417 | if(p0 == p1 && p0 == nmaches) break; |
jhnwkmn | 0:97a4f8cc534c | 418 | else if(nmaches >= p0 && p1 == 0xFFFF) break; |
jhnwkmn | 0:97a4f8cc534c | 419 | else if(nmaches >= p0 && nmaches <= p1) break; |
jhnwkmn | 0:97a4f8cc534c | 420 | } |
jhnwkmn | 0:97a4f8cc534c | 421 | } |
jhnwkmn | 0:97a4f8cc534c | 422 | } |
jhnwkmn | 0:97a4f8cc534c | 423 | |
jhnwkmn | 0:97a4f8cc534c | 424 | if(s >= exp->_eol) |
jhnwkmn | 0:97a4f8cc534c | 425 | break; |
jhnwkmn | 0:97a4f8cc534c | 426 | } |
jhnwkmn | 0:97a4f8cc534c | 427 | if(p0 == p1 && p0 == nmaches) return good; |
jhnwkmn | 0:97a4f8cc534c | 428 | else if(nmaches >= p0 && p1 == 0xFFFF) return good; |
jhnwkmn | 0:97a4f8cc534c | 429 | else if(nmaches >= p0 && nmaches <= p1) return good; |
jhnwkmn | 0:97a4f8cc534c | 430 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 431 | } |
jhnwkmn | 0:97a4f8cc534c | 432 | case OP_OR: { |
jhnwkmn | 0:97a4f8cc534c | 433 | const SQChar *asd = str; |
jhnwkmn | 0:97a4f8cc534c | 434 | SQRexNode *temp=&exp->_nodes[node->left]; |
jhnwkmn | 0:97a4f8cc534c | 435 | while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) { |
jhnwkmn | 0:97a4f8cc534c | 436 | if(temp->next != -1) |
jhnwkmn | 0:97a4f8cc534c | 437 | temp = &exp->_nodes[temp->next]; |
jhnwkmn | 0:97a4f8cc534c | 438 | else |
jhnwkmn | 0:97a4f8cc534c | 439 | return asd; |
jhnwkmn | 0:97a4f8cc534c | 440 | } |
jhnwkmn | 0:97a4f8cc534c | 441 | asd = str; |
jhnwkmn | 0:97a4f8cc534c | 442 | temp = &exp->_nodes[node->right]; |
jhnwkmn | 0:97a4f8cc534c | 443 | while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) { |
jhnwkmn | 0:97a4f8cc534c | 444 | if(temp->next != -1) |
jhnwkmn | 0:97a4f8cc534c | 445 | temp = &exp->_nodes[temp->next]; |
jhnwkmn | 0:97a4f8cc534c | 446 | else |
jhnwkmn | 0:97a4f8cc534c | 447 | return asd; |
jhnwkmn | 0:97a4f8cc534c | 448 | } |
jhnwkmn | 0:97a4f8cc534c | 449 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 450 | break; |
jhnwkmn | 0:97a4f8cc534c | 451 | } |
jhnwkmn | 0:97a4f8cc534c | 452 | case OP_EXPR: |
jhnwkmn | 0:97a4f8cc534c | 453 | case OP_NOCAPEXPR:{ |
jhnwkmn | 0:97a4f8cc534c | 454 | SQRexNode *n = &exp->_nodes[node->left]; |
jhnwkmn | 0:97a4f8cc534c | 455 | const SQChar *cur = str; |
jhnwkmn | 0:97a4f8cc534c | 456 | SQInteger capture = -1; |
jhnwkmn | 0:97a4f8cc534c | 457 | if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) { |
jhnwkmn | 0:97a4f8cc534c | 458 | capture = exp->_currsubexp; |
jhnwkmn | 0:97a4f8cc534c | 459 | exp->_matches[capture].begin = cur; |
jhnwkmn | 0:97a4f8cc534c | 460 | exp->_currsubexp++; |
jhnwkmn | 0:97a4f8cc534c | 461 | } |
jhnwkmn | 0:97a4f8cc534c | 462 | int tempcap = exp->_currsubexp; |
jhnwkmn | 0:97a4f8cc534c | 463 | do { |
jhnwkmn | 0:97a4f8cc534c | 464 | SQRexNode *subnext = NULL; |
jhnwkmn | 0:97a4f8cc534c | 465 | if(n->next != -1) { |
jhnwkmn | 0:97a4f8cc534c | 466 | subnext = &exp->_nodes[n->next]; |
jhnwkmn | 0:97a4f8cc534c | 467 | }else { |
jhnwkmn | 0:97a4f8cc534c | 468 | subnext = next; |
jhnwkmn | 0:97a4f8cc534c | 469 | } |
jhnwkmn | 0:97a4f8cc534c | 470 | if(!(cur = sqstd_rex_matchnode(exp,n,cur,subnext))) { |
jhnwkmn | 0:97a4f8cc534c | 471 | if(capture != -1){ |
jhnwkmn | 0:97a4f8cc534c | 472 | exp->_matches[capture].begin = 0; |
jhnwkmn | 0:97a4f8cc534c | 473 | exp->_matches[capture].len = 0; |
jhnwkmn | 0:97a4f8cc534c | 474 | } |
jhnwkmn | 0:97a4f8cc534c | 475 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 476 | } |
jhnwkmn | 0:97a4f8cc534c | 477 | } while((n->next != -1) && (n = &exp->_nodes[n->next])); |
jhnwkmn | 0:97a4f8cc534c | 478 | |
jhnwkmn | 0:97a4f8cc534c | 479 | exp->_currsubexp = tempcap; |
jhnwkmn | 0:97a4f8cc534c | 480 | if(capture != -1) |
jhnwkmn | 0:97a4f8cc534c | 481 | exp->_matches[capture].len = cur - exp->_matches[capture].begin; |
jhnwkmn | 0:97a4f8cc534c | 482 | return cur; |
jhnwkmn | 0:97a4f8cc534c | 483 | } |
jhnwkmn | 0:97a4f8cc534c | 484 | case OP_WB: |
jhnwkmn | 0:97a4f8cc534c | 485 | if((str == exp->_bol && !isspace(*str)) |
jhnwkmn | 0:97a4f8cc534c | 486 | || (str == exp->_eol && !isspace(*(str-1))) |
jhnwkmn | 0:97a4f8cc534c | 487 | || (!isspace(*str) && isspace(*(str+1))) |
jhnwkmn | 0:97a4f8cc534c | 488 | || (isspace(*str) && !isspace(*(str+1))) ) { |
jhnwkmn | 0:97a4f8cc534c | 489 | return (node->left == 'b')?str:NULL; |
jhnwkmn | 0:97a4f8cc534c | 490 | } |
jhnwkmn | 0:97a4f8cc534c | 491 | return (node->left == 'b')?NULL:str; |
jhnwkmn | 0:97a4f8cc534c | 492 | case OP_BOL: |
jhnwkmn | 0:97a4f8cc534c | 493 | if(str == exp->_bol) return str; |
jhnwkmn | 0:97a4f8cc534c | 494 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 495 | case OP_EOL: |
jhnwkmn | 0:97a4f8cc534c | 496 | if(str == exp->_eol) return str; |
jhnwkmn | 0:97a4f8cc534c | 497 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 498 | case OP_DOT:{ |
jhnwkmn | 0:97a4f8cc534c | 499 | str++; |
jhnwkmn | 0:97a4f8cc534c | 500 | } |
jhnwkmn | 0:97a4f8cc534c | 501 | return str; |
jhnwkmn | 0:97a4f8cc534c | 502 | case OP_NCLASS: |
jhnwkmn | 0:97a4f8cc534c | 503 | case OP_CLASS: |
jhnwkmn | 0:97a4f8cc534c | 504 | if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) { |
jhnwkmn | 0:97a4f8cc534c | 505 | str++; |
jhnwkmn | 0:97a4f8cc534c | 506 | return str; |
jhnwkmn | 0:97a4f8cc534c | 507 | } |
jhnwkmn | 0:97a4f8cc534c | 508 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 509 | case OP_CCLASS: |
jhnwkmn | 0:97a4f8cc534c | 510 | if(sqstd_rex_matchcclass(node->left,*str)) { |
jhnwkmn | 0:97a4f8cc534c | 511 | str++; |
jhnwkmn | 0:97a4f8cc534c | 512 | return str; |
jhnwkmn | 0:97a4f8cc534c | 513 | } |
jhnwkmn | 0:97a4f8cc534c | 514 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 515 | default: /* char */ |
jhnwkmn | 0:97a4f8cc534c | 516 | if(*str != node->type) return NULL; |
jhnwkmn | 0:97a4f8cc534c | 517 | str++; |
jhnwkmn | 0:97a4f8cc534c | 518 | return str; |
jhnwkmn | 0:97a4f8cc534c | 519 | } |
jhnwkmn | 0:97a4f8cc534c | 520 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 521 | } |
jhnwkmn | 0:97a4f8cc534c | 522 | |
jhnwkmn | 0:97a4f8cc534c | 523 | /* public api */ |
jhnwkmn | 0:97a4f8cc534c | 524 | SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error) |
jhnwkmn | 0:97a4f8cc534c | 525 | { |
jhnwkmn | 0:97a4f8cc534c | 526 | SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex)); |
jhnwkmn | 0:97a4f8cc534c | 527 | exp->_eol = exp->_bol = NULL; |
jhnwkmn | 0:97a4f8cc534c | 528 | exp->_p = pattern; |
jhnwkmn | 0:97a4f8cc534c | 529 | exp->_nallocated = (SQInteger)scstrlen(pattern) * sizeof(SQChar); |
jhnwkmn | 0:97a4f8cc534c | 530 | exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode)); |
jhnwkmn | 0:97a4f8cc534c | 531 | exp->_nsize = 0; |
jhnwkmn | 0:97a4f8cc534c | 532 | exp->_matches = 0; |
jhnwkmn | 0:97a4f8cc534c | 533 | exp->_nsubexpr = 0; |
jhnwkmn | 0:97a4f8cc534c | 534 | exp->_first = sqstd_rex_newnode(exp,OP_EXPR); |
jhnwkmn | 0:97a4f8cc534c | 535 | exp->_error = error; |
jhnwkmn | 0:97a4f8cc534c | 536 | exp->_jmpbuf = sq_malloc(sizeof(jmp_buf)); |
jhnwkmn | 0:97a4f8cc534c | 537 | if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) { |
jhnwkmn | 0:97a4f8cc534c | 538 | SQInteger res = sqstd_rex_list(exp); |
jhnwkmn | 0:97a4f8cc534c | 539 | exp->_nodes[exp->_first].left = res; |
jhnwkmn | 0:97a4f8cc534c | 540 | if(*exp->_p!='\0') |
jhnwkmn | 0:97a4f8cc534c | 541 | sqstd_rex_error(exp,_SC("unexpected character")); |
jhnwkmn | 0:97a4f8cc534c | 542 | #ifdef _DEBUG |
jhnwkmn | 0:97a4f8cc534c | 543 | { |
jhnwkmn | 0:97a4f8cc534c | 544 | SQInteger nsize,i; |
jhnwkmn | 0:97a4f8cc534c | 545 | SQRexNode *t; |
jhnwkmn | 0:97a4f8cc534c | 546 | nsize = exp->_nsize; |
jhnwkmn | 0:97a4f8cc534c | 547 | t = &exp->_nodes[0]; |
jhnwkmn | 0:97a4f8cc534c | 548 | scprintf(_SC("\n")); |
jhnwkmn | 0:97a4f8cc534c | 549 | for(i = 0;i < nsize; i++) { |
jhnwkmn | 0:97a4f8cc534c | 550 | if(exp->_nodes[i].type>MAX_CHAR) |
jhnwkmn | 0:97a4f8cc534c | 551 | scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]); |
jhnwkmn | 0:97a4f8cc534c | 552 | else |
jhnwkmn | 0:97a4f8cc534c | 553 | scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type); |
jhnwkmn | 0:97a4f8cc534c | 554 | scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next); |
jhnwkmn | 0:97a4f8cc534c | 555 | } |
jhnwkmn | 0:97a4f8cc534c | 556 | scprintf(_SC("\n")); |
jhnwkmn | 0:97a4f8cc534c | 557 | } |
jhnwkmn | 0:97a4f8cc534c | 558 | #endif |
jhnwkmn | 0:97a4f8cc534c | 559 | exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch)); |
jhnwkmn | 0:97a4f8cc534c | 560 | memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch)); |
jhnwkmn | 0:97a4f8cc534c | 561 | } |
jhnwkmn | 0:97a4f8cc534c | 562 | else{ |
jhnwkmn | 0:97a4f8cc534c | 563 | sqstd_rex_free(exp); |
jhnwkmn | 0:97a4f8cc534c | 564 | return NULL; |
jhnwkmn | 0:97a4f8cc534c | 565 | } |
jhnwkmn | 0:97a4f8cc534c | 566 | return exp; |
jhnwkmn | 0:97a4f8cc534c | 567 | } |
jhnwkmn | 0:97a4f8cc534c | 568 | |
jhnwkmn | 0:97a4f8cc534c | 569 | void sqstd_rex_free(SQRex *exp) |
jhnwkmn | 0:97a4f8cc534c | 570 | { |
jhnwkmn | 0:97a4f8cc534c | 571 | if(exp) { |
jhnwkmn | 0:97a4f8cc534c | 572 | if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode)); |
jhnwkmn | 0:97a4f8cc534c | 573 | if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf)); |
jhnwkmn | 0:97a4f8cc534c | 574 | if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch)); |
jhnwkmn | 0:97a4f8cc534c | 575 | sq_free(exp,sizeof(SQRex)); |
jhnwkmn | 0:97a4f8cc534c | 576 | } |
jhnwkmn | 0:97a4f8cc534c | 577 | } |
jhnwkmn | 0:97a4f8cc534c | 578 | |
jhnwkmn | 0:97a4f8cc534c | 579 | SQBool sqstd_rex_match(SQRex* exp,const SQChar* text) |
jhnwkmn | 0:97a4f8cc534c | 580 | { |
jhnwkmn | 0:97a4f8cc534c | 581 | const SQChar* res = NULL; |
jhnwkmn | 0:97a4f8cc534c | 582 | exp->_bol = text; |
jhnwkmn | 0:97a4f8cc534c | 583 | exp->_eol = text + scstrlen(text); |
jhnwkmn | 0:97a4f8cc534c | 584 | exp->_currsubexp = 0; |
jhnwkmn | 0:97a4f8cc534c | 585 | res = sqstd_rex_matchnode(exp,exp->_nodes,text,NULL); |
jhnwkmn | 0:97a4f8cc534c | 586 | if(res == NULL || res != exp->_eol) |
jhnwkmn | 0:97a4f8cc534c | 587 | return SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 588 | return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 589 | } |
jhnwkmn | 0:97a4f8cc534c | 590 | |
jhnwkmn | 0:97a4f8cc534c | 591 | SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end) |
jhnwkmn | 0:97a4f8cc534c | 592 | { |
jhnwkmn | 0:97a4f8cc534c | 593 | const SQChar *cur = NULL; |
jhnwkmn | 0:97a4f8cc534c | 594 | SQInteger node = exp->_first; |
jhnwkmn | 0:97a4f8cc534c | 595 | if(text_begin >= text_end) return SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 596 | exp->_bol = text_begin; |
jhnwkmn | 0:97a4f8cc534c | 597 | exp->_eol = text_end; |
jhnwkmn | 0:97a4f8cc534c | 598 | do { |
jhnwkmn | 0:97a4f8cc534c | 599 | cur = text_begin; |
jhnwkmn | 0:97a4f8cc534c | 600 | while(node != -1) { |
jhnwkmn | 0:97a4f8cc534c | 601 | exp->_currsubexp = 0; |
jhnwkmn | 0:97a4f8cc534c | 602 | cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur,NULL); |
jhnwkmn | 0:97a4f8cc534c | 603 | if(!cur) |
jhnwkmn | 0:97a4f8cc534c | 604 | break; |
jhnwkmn | 0:97a4f8cc534c | 605 | node = exp->_nodes[node].next; |
jhnwkmn | 0:97a4f8cc534c | 606 | } |
jhnwkmn | 0:97a4f8cc534c | 607 | text_begin++; |
jhnwkmn | 0:97a4f8cc534c | 608 | } while(cur == NULL && text_begin != text_end); |
jhnwkmn | 0:97a4f8cc534c | 609 | |
jhnwkmn | 0:97a4f8cc534c | 610 | if(cur == NULL) |
jhnwkmn | 0:97a4f8cc534c | 611 | return SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 612 | |
jhnwkmn | 0:97a4f8cc534c | 613 | --text_begin; |
jhnwkmn | 0:97a4f8cc534c | 614 | |
jhnwkmn | 0:97a4f8cc534c | 615 | if(out_begin) *out_begin = text_begin; |
jhnwkmn | 0:97a4f8cc534c | 616 | if(out_end) *out_end = cur; |
jhnwkmn | 0:97a4f8cc534c | 617 | return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 618 | } |
jhnwkmn | 0:97a4f8cc534c | 619 | |
jhnwkmn | 0:97a4f8cc534c | 620 | SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end) |
jhnwkmn | 0:97a4f8cc534c | 621 | { |
jhnwkmn | 0:97a4f8cc534c | 622 | return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end); |
jhnwkmn | 0:97a4f8cc534c | 623 | } |
jhnwkmn | 0:97a4f8cc534c | 624 | |
jhnwkmn | 0:97a4f8cc534c | 625 | SQInteger sqstd_rex_getsubexpcount(SQRex* exp) |
jhnwkmn | 0:97a4f8cc534c | 626 | { |
jhnwkmn | 0:97a4f8cc534c | 627 | return exp->_nsubexpr; |
jhnwkmn | 0:97a4f8cc534c | 628 | } |
jhnwkmn | 0:97a4f8cc534c | 629 | |
jhnwkmn | 0:97a4f8cc534c | 630 | SQBool sqstd_rex_getsubexp(SQRex* exp, SQInteger n, SQRexMatch *subexp) |
jhnwkmn | 0:97a4f8cc534c | 631 | { |
jhnwkmn | 0:97a4f8cc534c | 632 | if( n<0 || n >= exp->_nsubexpr) return SQFalse; |
jhnwkmn | 0:97a4f8cc534c | 633 | *subexp = exp->_matches[n]; |
jhnwkmn | 0:97a4f8cc534c | 634 | return SQTrue; |
jhnwkmn | 0:97a4f8cc534c | 635 | } |
jhnwkmn | 0:97a4f8cc534c | 636 |