Publishing SLRE regex - as a library rather than program as is the case for the existing publish
Dependents: EMBEDDED_CW2 EMBEDDED_CW2_Final USBSerialIORemotizer
slre.cpp@0:bc6b8c94c371, 2011-04-08 (annotated)
- Committer:
- andrewbonney
- Date:
- Fri Apr 08 14:41:11 2011 +0000
- Revision:
- 0:bc6b8c94c371
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
andrewbonney | 0:bc6b8c94c371 | 1 | /* |
andrewbonney | 0:bc6b8c94c371 | 2 | * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com> |
andrewbonney | 0:bc6b8c94c371 | 3 | * All rights reserved |
andrewbonney | 0:bc6b8c94c371 | 4 | * |
andrewbonney | 0:bc6b8c94c371 | 5 | * "THE BEER-WARE LICENSE" (Revision 42): |
andrewbonney | 0:bc6b8c94c371 | 6 | * Sergey Lyubka wrote this file. As long as you retain this notice you |
andrewbonney | 0:bc6b8c94c371 | 7 | * can do whatever you want with this stuff. If we meet some day, and you think |
andrewbonney | 0:bc6b8c94c371 | 8 | * this stuff is worth it, you can buy me a beer in return. |
andrewbonney | 0:bc6b8c94c371 | 9 | */ |
andrewbonney | 0:bc6b8c94c371 | 10 | |
andrewbonney | 0:bc6b8c94c371 | 11 | #include <stdio.h> |
andrewbonney | 0:bc6b8c94c371 | 12 | #include <assert.h> |
andrewbonney | 0:bc6b8c94c371 | 13 | #include <ctype.h> |
andrewbonney | 0:bc6b8c94c371 | 14 | #include <stdlib.h> |
andrewbonney | 0:bc6b8c94c371 | 15 | #include <string.h> |
andrewbonney | 0:bc6b8c94c371 | 16 | #include <errno.h> |
andrewbonney | 0:bc6b8c94c371 | 17 | |
andrewbonney | 0:bc6b8c94c371 | 18 | #include "slre.h" |
andrewbonney | 0:bc6b8c94c371 | 19 | |
andrewbonney | 0:bc6b8c94c371 | 20 | enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, |
andrewbonney | 0:bc6b8c94c371 | 21 | STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; |
andrewbonney | 0:bc6b8c94c371 | 22 | |
andrewbonney | 0:bc6b8c94c371 | 23 | static struct { |
andrewbonney | 0:bc6b8c94c371 | 24 | const char *name; |
andrewbonney | 0:bc6b8c94c371 | 25 | int narg; |
andrewbonney | 0:bc6b8c94c371 | 26 | const char *flags; |
andrewbonney | 0:bc6b8c94c371 | 27 | } opcodes[] = { |
andrewbonney | 0:bc6b8c94c371 | 28 | {"END", 0, ""}, /* End of code block or program */ |
andrewbonney | 0:bc6b8c94c371 | 29 | {"BRANCH", 2, "oo"}, /* Alternative operator, "|" */ |
andrewbonney | 0:bc6b8c94c371 | 30 | {"ANY", 0, ""}, /* Match any character, "." */ |
andrewbonney | 0:bc6b8c94c371 | 31 | {"EXACT", 2, "d"}, /* Match exact string */ |
andrewbonney | 0:bc6b8c94c371 | 32 | {"ANYOF", 2, "D"}, /* Match any from set, "[]" */ |
andrewbonney | 0:bc6b8c94c371 | 33 | {"ANYBUT", 2, "D"}, /* Match any but from set, "[^]"*/ |
andrewbonney | 0:bc6b8c94c371 | 34 | {"OPEN ", 1, "i"}, /* Capture start, "(" */ |
andrewbonney | 0:bc6b8c94c371 | 35 | {"CLOSE", 1, "i"}, /* Capture end, ")" */ |
andrewbonney | 0:bc6b8c94c371 | 36 | {"BOL", 0, ""}, /* Beginning of string, "^" */ |
andrewbonney | 0:bc6b8c94c371 | 37 | {"EOL", 0, ""}, /* End of string, "$" */ |
andrewbonney | 0:bc6b8c94c371 | 38 | {"STAR", 1, "o"}, /* Match zero or more times "*" */ |
andrewbonney | 0:bc6b8c94c371 | 39 | {"PLUS", 1, "o"}, /* Match one or more times, "+" */ |
andrewbonney | 0:bc6b8c94c371 | 40 | {"STARQ", 1, "o"}, /* Non-greedy STAR, "*?" */ |
andrewbonney | 0:bc6b8c94c371 | 41 | {"PLUSQ", 1, "o"}, /* Non-greedy PLUS, "+?" */ |
andrewbonney | 0:bc6b8c94c371 | 42 | {"QUEST", 1, "o"}, /* Match zero or one time, "?" */ |
andrewbonney | 0:bc6b8c94c371 | 43 | {"SPACE", 0, ""}, /* Match whitespace, "\s" */ |
andrewbonney | 0:bc6b8c94c371 | 44 | {"NONSPACE", 0, ""}, /* Match non-space, "\S" */ |
andrewbonney | 0:bc6b8c94c371 | 45 | {"DIGIT", 0, ""} /* Match digit, "\d" */ |
andrewbonney | 0:bc6b8c94c371 | 46 | }; |
andrewbonney | 0:bc6b8c94c371 | 47 | |
andrewbonney | 0:bc6b8c94c371 | 48 | /* |
andrewbonney | 0:bc6b8c94c371 | 49 | * Commands and operands are all unsigned char (1 byte long). All code offsets |
andrewbonney | 0:bc6b8c94c371 | 50 | * are relative to current address, and positive (always point forward). Data |
andrewbonney | 0:bc6b8c94c371 | 51 | * offsets are absolute. Commands with operands: |
andrewbonney | 0:bc6b8c94c371 | 52 | * |
andrewbonney | 0:bc6b8c94c371 | 53 | * BRANCH offset1 offset2 |
andrewbonney | 0:bc6b8c94c371 | 54 | * Try to match the code block that follows the BRANCH instruction |
andrewbonney | 0:bc6b8c94c371 | 55 | * (code block ends with END). If no match, try to match code block that |
andrewbonney | 0:bc6b8c94c371 | 56 | * starts at offset1. If either of these match, jump to offset2. |
andrewbonney | 0:bc6b8c94c371 | 57 | * |
andrewbonney | 0:bc6b8c94c371 | 58 | * EXACT data_offset data_length |
andrewbonney | 0:bc6b8c94c371 | 59 | * Try to match exact string. String is recorded in data section from |
andrewbonney | 0:bc6b8c94c371 | 60 | * data_offset, and has length data_length. |
andrewbonney | 0:bc6b8c94c371 | 61 | * |
andrewbonney | 0:bc6b8c94c371 | 62 | * OPEN capture_number, CLOSE capture_number |
andrewbonney | 0:bc6b8c94c371 | 63 | * If the user have passed 'struct cap' array for captures, OPEN |
andrewbonney | 0:bc6b8c94c371 | 64 | * records the beginning of the matched substring (cap->ptr), CLOSE |
andrewbonney | 0:bc6b8c94c371 | 65 | * sets the length (cap->len) for respective capture_number. |
andrewbonney | 0:bc6b8c94c371 | 66 | * |
andrewbonney | 0:bc6b8c94c371 | 67 | * STAR code_offset, PLUS code_offset, QUEST code_offset |
andrewbonney | 0:bc6b8c94c371 | 68 | * *, +, ?, respectively. Try to gobble as much as possible from the |
andrewbonney | 0:bc6b8c94c371 | 69 | * matched buffer, until code block that follows these instructions |
andrewbonney | 0:bc6b8c94c371 | 70 | * matches. When the longest possible string is matched, |
andrewbonney | 0:bc6b8c94c371 | 71 | * jump to code_offset |
andrewbonney | 0:bc6b8c94c371 | 72 | * |
andrewbonney | 0:bc6b8c94c371 | 73 | * STARQ, PLUSQ are non-greedy versions of STAR and PLUS. |
andrewbonney | 0:bc6b8c94c371 | 74 | */ |
andrewbonney | 0:bc6b8c94c371 | 75 | |
andrewbonney | 0:bc6b8c94c371 | 76 | static const char *meta_chars = "|.^$*+?()[\\"; |
andrewbonney | 0:bc6b8c94c371 | 77 | |
andrewbonney | 0:bc6b8c94c371 | 78 | static void |
andrewbonney | 0:bc6b8c94c371 | 79 | print_character_set(FILE *fp, const unsigned char *p, int len) |
andrewbonney | 0:bc6b8c94c371 | 80 | { |
andrewbonney | 0:bc6b8c94c371 | 81 | int i; |
andrewbonney | 0:bc6b8c94c371 | 82 | |
andrewbonney | 0:bc6b8c94c371 | 83 | for (i = 0; i < len; i++) { |
andrewbonney | 0:bc6b8c94c371 | 84 | if (i > 0) |
andrewbonney | 0:bc6b8c94c371 | 85 | (void) fputc(',', fp); |
andrewbonney | 0:bc6b8c94c371 | 86 | if (p[i] == 0) { |
andrewbonney | 0:bc6b8c94c371 | 87 | i++; |
andrewbonney | 0:bc6b8c94c371 | 88 | if (p[i] == 0) |
andrewbonney | 0:bc6b8c94c371 | 89 | (void) fprintf(fp, "\\x%02x", p[i]); |
andrewbonney | 0:bc6b8c94c371 | 90 | else |
andrewbonney | 0:bc6b8c94c371 | 91 | (void) fprintf(fp, "%s", opcodes[p[i]].name); |
andrewbonney | 0:bc6b8c94c371 | 92 | } else if (isprint(p[i])) { |
andrewbonney | 0:bc6b8c94c371 | 93 | (void) fputc(p[i], fp); |
andrewbonney | 0:bc6b8c94c371 | 94 | } else { |
andrewbonney | 0:bc6b8c94c371 | 95 | (void) fprintf(fp,"\\x%02x", p[i]); |
andrewbonney | 0:bc6b8c94c371 | 96 | } |
andrewbonney | 0:bc6b8c94c371 | 97 | } |
andrewbonney | 0:bc6b8c94c371 | 98 | } |
andrewbonney | 0:bc6b8c94c371 | 99 | |
andrewbonney | 0:bc6b8c94c371 | 100 | void |
andrewbonney | 0:bc6b8c94c371 | 101 | slre_dump(const struct slre *r, FILE *fp) |
andrewbonney | 0:bc6b8c94c371 | 102 | { |
andrewbonney | 0:bc6b8c94c371 | 103 | int i, j, ch, op, pc; |
andrewbonney | 0:bc6b8c94c371 | 104 | |
andrewbonney | 0:bc6b8c94c371 | 105 | for (pc = 0; pc < r->code_size; pc++) { |
andrewbonney | 0:bc6b8c94c371 | 106 | |
andrewbonney | 0:bc6b8c94c371 | 107 | op = r->code[pc]; |
andrewbonney | 0:bc6b8c94c371 | 108 | (void) fprintf(fp, "%3d %s ", pc, opcodes[op].name); |
andrewbonney | 0:bc6b8c94c371 | 109 | |
andrewbonney | 0:bc6b8c94c371 | 110 | for (i = 0; opcodes[op].flags[i] != '\0'; i++) |
andrewbonney | 0:bc6b8c94c371 | 111 | switch (opcodes[op].flags[i]) { |
andrewbonney | 0:bc6b8c94c371 | 112 | case 'i': |
andrewbonney | 0:bc6b8c94c371 | 113 | (void) fprintf(fp, "%d ", r->code[pc + 1]); |
andrewbonney | 0:bc6b8c94c371 | 114 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 115 | break; |
andrewbonney | 0:bc6b8c94c371 | 116 | case 'o': |
andrewbonney | 0:bc6b8c94c371 | 117 | (void) fprintf(fp, "%d ", |
andrewbonney | 0:bc6b8c94c371 | 118 | pc + r->code[pc + 1] - i); |
andrewbonney | 0:bc6b8c94c371 | 119 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 120 | break; |
andrewbonney | 0:bc6b8c94c371 | 121 | case 'D': |
andrewbonney | 0:bc6b8c94c371 | 122 | print_character_set(fp, r->data + |
andrewbonney | 0:bc6b8c94c371 | 123 | r->code[pc + 1], r->code[pc + 2]); |
andrewbonney | 0:bc6b8c94c371 | 124 | pc += 2; |
andrewbonney | 0:bc6b8c94c371 | 125 | break; |
andrewbonney | 0:bc6b8c94c371 | 126 | case 'd': |
andrewbonney | 0:bc6b8c94c371 | 127 | (void) fputc('"', fp); |
andrewbonney | 0:bc6b8c94c371 | 128 | for (j = 0; j < r->code[pc + 2]; j++) { |
andrewbonney | 0:bc6b8c94c371 | 129 | ch = r->data[r->code[pc + 1] + j]; |
andrewbonney | 0:bc6b8c94c371 | 130 | if (isprint(ch)) |
andrewbonney | 0:bc6b8c94c371 | 131 | (void) fputc(ch, fp); |
andrewbonney | 0:bc6b8c94c371 | 132 | else |
andrewbonney | 0:bc6b8c94c371 | 133 | (void) fprintf(fp,"\\x%02x",ch); |
andrewbonney | 0:bc6b8c94c371 | 134 | } |
andrewbonney | 0:bc6b8c94c371 | 135 | (void) fputc('"', fp); |
andrewbonney | 0:bc6b8c94c371 | 136 | pc += 2; |
andrewbonney | 0:bc6b8c94c371 | 137 | break; |
andrewbonney | 0:bc6b8c94c371 | 138 | } |
andrewbonney | 0:bc6b8c94c371 | 139 | |
andrewbonney | 0:bc6b8c94c371 | 140 | (void) fputc('\n', fp); |
andrewbonney | 0:bc6b8c94c371 | 141 | } |
andrewbonney | 0:bc6b8c94c371 | 142 | } |
andrewbonney | 0:bc6b8c94c371 | 143 | |
andrewbonney | 0:bc6b8c94c371 | 144 | static void |
andrewbonney | 0:bc6b8c94c371 | 145 | set_jump_offset(struct slre *r, int pc, int offset) |
andrewbonney | 0:bc6b8c94c371 | 146 | { |
andrewbonney | 0:bc6b8c94c371 | 147 | assert(offset < r->code_size); |
andrewbonney | 0:bc6b8c94c371 | 148 | |
andrewbonney | 0:bc6b8c94c371 | 149 | if (r->code_size - offset > 0xff) { |
andrewbonney | 0:bc6b8c94c371 | 150 | r->err_str = "Jump offset is too big"; |
andrewbonney | 0:bc6b8c94c371 | 151 | } else { |
andrewbonney | 0:bc6b8c94c371 | 152 | r->code[pc] = (unsigned char) (r->code_size - offset); |
andrewbonney | 0:bc6b8c94c371 | 153 | } |
andrewbonney | 0:bc6b8c94c371 | 154 | } |
andrewbonney | 0:bc6b8c94c371 | 155 | |
andrewbonney | 0:bc6b8c94c371 | 156 | static void |
andrewbonney | 0:bc6b8c94c371 | 157 | emit(struct slre *r, int code) |
andrewbonney | 0:bc6b8c94c371 | 158 | { |
andrewbonney | 0:bc6b8c94c371 | 159 | if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0]))) |
andrewbonney | 0:bc6b8c94c371 | 160 | r->err_str = "RE is too long (code overflow)"; |
andrewbonney | 0:bc6b8c94c371 | 161 | else |
andrewbonney | 0:bc6b8c94c371 | 162 | r->code[r->code_size++] = (unsigned char) code; |
andrewbonney | 0:bc6b8c94c371 | 163 | } |
andrewbonney | 0:bc6b8c94c371 | 164 | |
andrewbonney | 0:bc6b8c94c371 | 165 | static void |
andrewbonney | 0:bc6b8c94c371 | 166 | store_char_in_data(struct slre *r, int ch) |
andrewbonney | 0:bc6b8c94c371 | 167 | { |
andrewbonney | 0:bc6b8c94c371 | 168 | if (r->data_size >= (int) sizeof(r->data)) |
andrewbonney | 0:bc6b8c94c371 | 169 | r->err_str = "RE is too long (data overflow)"; |
andrewbonney | 0:bc6b8c94c371 | 170 | else |
andrewbonney | 0:bc6b8c94c371 | 171 | r->data[r->data_size++] = ch; |
andrewbonney | 0:bc6b8c94c371 | 172 | } |
andrewbonney | 0:bc6b8c94c371 | 173 | |
andrewbonney | 0:bc6b8c94c371 | 174 | static void |
andrewbonney | 0:bc6b8c94c371 | 175 | exact(struct slre *r, const char **re) |
andrewbonney | 0:bc6b8c94c371 | 176 | { |
andrewbonney | 0:bc6b8c94c371 | 177 | int old_data_size = r->data_size; |
andrewbonney | 0:bc6b8c94c371 | 178 | |
andrewbonney | 0:bc6b8c94c371 | 179 | while (**re != '\0' && (strchr(meta_chars, **re)) == NULL) |
andrewbonney | 0:bc6b8c94c371 | 180 | store_char_in_data(r, *(*re)++); |
andrewbonney | 0:bc6b8c94c371 | 181 | |
andrewbonney | 0:bc6b8c94c371 | 182 | emit(r, EXACT); |
andrewbonney | 0:bc6b8c94c371 | 183 | emit(r, old_data_size); |
andrewbonney | 0:bc6b8c94c371 | 184 | emit(r, r->data_size - old_data_size); |
andrewbonney | 0:bc6b8c94c371 | 185 | } |
andrewbonney | 0:bc6b8c94c371 | 186 | |
andrewbonney | 0:bc6b8c94c371 | 187 | static int |
andrewbonney | 0:bc6b8c94c371 | 188 | get_escape_char(const char **re) |
andrewbonney | 0:bc6b8c94c371 | 189 | { |
andrewbonney | 0:bc6b8c94c371 | 190 | int res; |
andrewbonney | 0:bc6b8c94c371 | 191 | |
andrewbonney | 0:bc6b8c94c371 | 192 | switch (*(*re)++) { |
andrewbonney | 0:bc6b8c94c371 | 193 | case 'n': res = '\n'; break; |
andrewbonney | 0:bc6b8c94c371 | 194 | case 'r': res = '\r'; break; |
andrewbonney | 0:bc6b8c94c371 | 195 | case 't': res = '\t'; break; |
andrewbonney | 0:bc6b8c94c371 | 196 | case '0': res = 0; break; |
andrewbonney | 0:bc6b8c94c371 | 197 | case 'S': res = NONSPACE << 8; break; |
andrewbonney | 0:bc6b8c94c371 | 198 | case 's': res = SPACE << 8; break; |
andrewbonney | 0:bc6b8c94c371 | 199 | case 'd': res = DIGIT << 8; break; |
andrewbonney | 0:bc6b8c94c371 | 200 | default: res = (*re)[-1]; break; |
andrewbonney | 0:bc6b8c94c371 | 201 | } |
andrewbonney | 0:bc6b8c94c371 | 202 | |
andrewbonney | 0:bc6b8c94c371 | 203 | return (res); |
andrewbonney | 0:bc6b8c94c371 | 204 | } |
andrewbonney | 0:bc6b8c94c371 | 205 | |
andrewbonney | 0:bc6b8c94c371 | 206 | static void |
andrewbonney | 0:bc6b8c94c371 | 207 | anyof(struct slre *r, const char **re) |
andrewbonney | 0:bc6b8c94c371 | 208 | { |
andrewbonney | 0:bc6b8c94c371 | 209 | int esc, old_data_size = r->data_size, op = ANYOF; |
andrewbonney | 0:bc6b8c94c371 | 210 | |
andrewbonney | 0:bc6b8c94c371 | 211 | if (**re == '^') { |
andrewbonney | 0:bc6b8c94c371 | 212 | op = ANYBUT; |
andrewbonney | 0:bc6b8c94c371 | 213 | (*re)++; |
andrewbonney | 0:bc6b8c94c371 | 214 | } |
andrewbonney | 0:bc6b8c94c371 | 215 | |
andrewbonney | 0:bc6b8c94c371 | 216 | while (**re != '\0') |
andrewbonney | 0:bc6b8c94c371 | 217 | |
andrewbonney | 0:bc6b8c94c371 | 218 | switch (*(*re)++) { |
andrewbonney | 0:bc6b8c94c371 | 219 | case ']': |
andrewbonney | 0:bc6b8c94c371 | 220 | emit(r, op); |
andrewbonney | 0:bc6b8c94c371 | 221 | emit(r, old_data_size); |
andrewbonney | 0:bc6b8c94c371 | 222 | emit(r, r->data_size - old_data_size); |
andrewbonney | 0:bc6b8c94c371 | 223 | return; |
andrewbonney | 0:bc6b8c94c371 | 224 | /* NOTREACHED */ |
andrewbonney | 0:bc6b8c94c371 | 225 | break; |
andrewbonney | 0:bc6b8c94c371 | 226 | case '\\': |
andrewbonney | 0:bc6b8c94c371 | 227 | esc = get_escape_char(re); |
andrewbonney | 0:bc6b8c94c371 | 228 | if ((esc & 0xff) == 0) { |
andrewbonney | 0:bc6b8c94c371 | 229 | store_char_in_data(r, 0); |
andrewbonney | 0:bc6b8c94c371 | 230 | store_char_in_data(r, esc >> 8); |
andrewbonney | 0:bc6b8c94c371 | 231 | } else { |
andrewbonney | 0:bc6b8c94c371 | 232 | store_char_in_data(r, esc); |
andrewbonney | 0:bc6b8c94c371 | 233 | } |
andrewbonney | 0:bc6b8c94c371 | 234 | break; |
andrewbonney | 0:bc6b8c94c371 | 235 | default: |
andrewbonney | 0:bc6b8c94c371 | 236 | store_char_in_data(r, (*re)[-1]); |
andrewbonney | 0:bc6b8c94c371 | 237 | break; |
andrewbonney | 0:bc6b8c94c371 | 238 | } |
andrewbonney | 0:bc6b8c94c371 | 239 | |
andrewbonney | 0:bc6b8c94c371 | 240 | r->err_str = "No closing ']' bracket"; |
andrewbonney | 0:bc6b8c94c371 | 241 | } |
andrewbonney | 0:bc6b8c94c371 | 242 | |
andrewbonney | 0:bc6b8c94c371 | 243 | static void |
andrewbonney | 0:bc6b8c94c371 | 244 | relocate(struct slre *r, int begin, int shift) |
andrewbonney | 0:bc6b8c94c371 | 245 | { |
andrewbonney | 0:bc6b8c94c371 | 246 | emit(r, END); |
andrewbonney | 0:bc6b8c94c371 | 247 | memmove(r->code + begin + shift, r->code + begin, r->code_size - begin); |
andrewbonney | 0:bc6b8c94c371 | 248 | r->code_size += shift; |
andrewbonney | 0:bc6b8c94c371 | 249 | } |
andrewbonney | 0:bc6b8c94c371 | 250 | |
andrewbonney | 0:bc6b8c94c371 | 251 | static void |
andrewbonney | 0:bc6b8c94c371 | 252 | quantifier(struct slre *r, int prev, int op) |
andrewbonney | 0:bc6b8c94c371 | 253 | { |
andrewbonney | 0:bc6b8c94c371 | 254 | if (r->code[prev] == EXACT && r->code[prev + 2] > 1) { |
andrewbonney | 0:bc6b8c94c371 | 255 | r->code[prev + 2]--; |
andrewbonney | 0:bc6b8c94c371 | 256 | emit(r, EXACT); |
andrewbonney | 0:bc6b8c94c371 | 257 | emit(r, r->code[prev + 1] + r->code[prev + 2]); |
andrewbonney | 0:bc6b8c94c371 | 258 | emit(r, 1); |
andrewbonney | 0:bc6b8c94c371 | 259 | prev = r->code_size - 3; |
andrewbonney | 0:bc6b8c94c371 | 260 | } |
andrewbonney | 0:bc6b8c94c371 | 261 | relocate(r, prev, 2); |
andrewbonney | 0:bc6b8c94c371 | 262 | r->code[prev] = op; |
andrewbonney | 0:bc6b8c94c371 | 263 | set_jump_offset(r, prev + 1, prev); |
andrewbonney | 0:bc6b8c94c371 | 264 | } |
andrewbonney | 0:bc6b8c94c371 | 265 | |
andrewbonney | 0:bc6b8c94c371 | 266 | static void |
andrewbonney | 0:bc6b8c94c371 | 267 | exact_one_char(struct slre *r, int ch) |
andrewbonney | 0:bc6b8c94c371 | 268 | { |
andrewbonney | 0:bc6b8c94c371 | 269 | emit(r, EXACT); |
andrewbonney | 0:bc6b8c94c371 | 270 | emit(r, r->data_size); |
andrewbonney | 0:bc6b8c94c371 | 271 | emit(r, 1); |
andrewbonney | 0:bc6b8c94c371 | 272 | store_char_in_data(r, ch); |
andrewbonney | 0:bc6b8c94c371 | 273 | } |
andrewbonney | 0:bc6b8c94c371 | 274 | |
andrewbonney | 0:bc6b8c94c371 | 275 | static void |
andrewbonney | 0:bc6b8c94c371 | 276 | fixup_branch(struct slre *r, int fixup) |
andrewbonney | 0:bc6b8c94c371 | 277 | { |
andrewbonney | 0:bc6b8c94c371 | 278 | if (fixup > 0) { |
andrewbonney | 0:bc6b8c94c371 | 279 | emit(r, END); |
andrewbonney | 0:bc6b8c94c371 | 280 | set_jump_offset(r, fixup, fixup - 2); |
andrewbonney | 0:bc6b8c94c371 | 281 | } |
andrewbonney | 0:bc6b8c94c371 | 282 | } |
andrewbonney | 0:bc6b8c94c371 | 283 | |
andrewbonney | 0:bc6b8c94c371 | 284 | static void |
andrewbonney | 0:bc6b8c94c371 | 285 | compile(struct slre *r, const char **re) |
andrewbonney | 0:bc6b8c94c371 | 286 | { |
andrewbonney | 0:bc6b8c94c371 | 287 | int op, esc, branch_start, last_op, fixup, cap_no, level; |
andrewbonney | 0:bc6b8c94c371 | 288 | |
andrewbonney | 0:bc6b8c94c371 | 289 | fixup = 0; |
andrewbonney | 0:bc6b8c94c371 | 290 | level = r->num_caps; |
andrewbonney | 0:bc6b8c94c371 | 291 | branch_start = last_op = r->code_size; |
andrewbonney | 0:bc6b8c94c371 | 292 | |
andrewbonney | 0:bc6b8c94c371 | 293 | for (;;) |
andrewbonney | 0:bc6b8c94c371 | 294 | switch (*(*re)++) { |
andrewbonney | 0:bc6b8c94c371 | 295 | case '\0': |
andrewbonney | 0:bc6b8c94c371 | 296 | (*re)--; |
andrewbonney | 0:bc6b8c94c371 | 297 | return; |
andrewbonney | 0:bc6b8c94c371 | 298 | /* NOTREACHED */ |
andrewbonney | 0:bc6b8c94c371 | 299 | break; |
andrewbonney | 0:bc6b8c94c371 | 300 | case '^': |
andrewbonney | 0:bc6b8c94c371 | 301 | emit(r, BOL); |
andrewbonney | 0:bc6b8c94c371 | 302 | break; |
andrewbonney | 0:bc6b8c94c371 | 303 | case '$': |
andrewbonney | 0:bc6b8c94c371 | 304 | emit(r, EOL); |
andrewbonney | 0:bc6b8c94c371 | 305 | break; |
andrewbonney | 0:bc6b8c94c371 | 306 | case '.': |
andrewbonney | 0:bc6b8c94c371 | 307 | last_op = r->code_size; |
andrewbonney | 0:bc6b8c94c371 | 308 | emit(r, ANY); |
andrewbonney | 0:bc6b8c94c371 | 309 | break; |
andrewbonney | 0:bc6b8c94c371 | 310 | case '[': |
andrewbonney | 0:bc6b8c94c371 | 311 | anyof(r, re); |
andrewbonney | 0:bc6b8c94c371 | 312 | break; |
andrewbonney | 0:bc6b8c94c371 | 313 | case '\\': |
andrewbonney | 0:bc6b8c94c371 | 314 | last_op = r->code_size; |
andrewbonney | 0:bc6b8c94c371 | 315 | esc = get_escape_char(re); |
andrewbonney | 0:bc6b8c94c371 | 316 | if (esc & 0xff00) { |
andrewbonney | 0:bc6b8c94c371 | 317 | emit(r, esc >> 8); |
andrewbonney | 0:bc6b8c94c371 | 318 | } else { |
andrewbonney | 0:bc6b8c94c371 | 319 | exact_one_char(r, esc); |
andrewbonney | 0:bc6b8c94c371 | 320 | } |
andrewbonney | 0:bc6b8c94c371 | 321 | break; |
andrewbonney | 0:bc6b8c94c371 | 322 | case '(': |
andrewbonney | 0:bc6b8c94c371 | 323 | last_op = r->code_size; |
andrewbonney | 0:bc6b8c94c371 | 324 | cap_no = ++r->num_caps; |
andrewbonney | 0:bc6b8c94c371 | 325 | emit(r, OPEN); |
andrewbonney | 0:bc6b8c94c371 | 326 | emit(r, cap_no); |
andrewbonney | 0:bc6b8c94c371 | 327 | |
andrewbonney | 0:bc6b8c94c371 | 328 | compile(r, re); |
andrewbonney | 0:bc6b8c94c371 | 329 | if (*(*re)++ != ')') { |
andrewbonney | 0:bc6b8c94c371 | 330 | r->err_str = "No closing bracket"; |
andrewbonney | 0:bc6b8c94c371 | 331 | return; |
andrewbonney | 0:bc6b8c94c371 | 332 | } |
andrewbonney | 0:bc6b8c94c371 | 333 | |
andrewbonney | 0:bc6b8c94c371 | 334 | emit(r, CLOSE); |
andrewbonney | 0:bc6b8c94c371 | 335 | emit(r, cap_no); |
andrewbonney | 0:bc6b8c94c371 | 336 | break; |
andrewbonney | 0:bc6b8c94c371 | 337 | case ')': |
andrewbonney | 0:bc6b8c94c371 | 338 | (*re)--; |
andrewbonney | 0:bc6b8c94c371 | 339 | fixup_branch(r, fixup); |
andrewbonney | 0:bc6b8c94c371 | 340 | if (level == 0) { |
andrewbonney | 0:bc6b8c94c371 | 341 | r->err_str = "Unbalanced brackets"; |
andrewbonney | 0:bc6b8c94c371 | 342 | return; |
andrewbonney | 0:bc6b8c94c371 | 343 | } |
andrewbonney | 0:bc6b8c94c371 | 344 | return; |
andrewbonney | 0:bc6b8c94c371 | 345 | /* NOTREACHED */ |
andrewbonney | 0:bc6b8c94c371 | 346 | break; |
andrewbonney | 0:bc6b8c94c371 | 347 | case '+': |
andrewbonney | 0:bc6b8c94c371 | 348 | case '*': |
andrewbonney | 0:bc6b8c94c371 | 349 | op = (*re)[-1] == '*' ? STAR: PLUS; |
andrewbonney | 0:bc6b8c94c371 | 350 | if (**re == '?') { |
andrewbonney | 0:bc6b8c94c371 | 351 | (*re)++; |
andrewbonney | 0:bc6b8c94c371 | 352 | op = op == STAR ? STARQ : PLUSQ; |
andrewbonney | 0:bc6b8c94c371 | 353 | } |
andrewbonney | 0:bc6b8c94c371 | 354 | quantifier(r, last_op, op); |
andrewbonney | 0:bc6b8c94c371 | 355 | break; |
andrewbonney | 0:bc6b8c94c371 | 356 | case '?': |
andrewbonney | 0:bc6b8c94c371 | 357 | quantifier(r, last_op, QUEST); |
andrewbonney | 0:bc6b8c94c371 | 358 | break; |
andrewbonney | 0:bc6b8c94c371 | 359 | case '|': |
andrewbonney | 0:bc6b8c94c371 | 360 | fixup_branch(r, fixup); |
andrewbonney | 0:bc6b8c94c371 | 361 | relocate(r, branch_start, 3); |
andrewbonney | 0:bc6b8c94c371 | 362 | r->code[branch_start] = BRANCH; |
andrewbonney | 0:bc6b8c94c371 | 363 | set_jump_offset(r, branch_start + 1, branch_start); |
andrewbonney | 0:bc6b8c94c371 | 364 | fixup = branch_start + 2; |
andrewbonney | 0:bc6b8c94c371 | 365 | r->code[fixup] = 0xff; |
andrewbonney | 0:bc6b8c94c371 | 366 | break; |
andrewbonney | 0:bc6b8c94c371 | 367 | default: |
andrewbonney | 0:bc6b8c94c371 | 368 | (*re)--; |
andrewbonney | 0:bc6b8c94c371 | 369 | last_op = r->code_size; |
andrewbonney | 0:bc6b8c94c371 | 370 | exact(r, re); |
andrewbonney | 0:bc6b8c94c371 | 371 | break; |
andrewbonney | 0:bc6b8c94c371 | 372 | } |
andrewbonney | 0:bc6b8c94c371 | 373 | } |
andrewbonney | 0:bc6b8c94c371 | 374 | |
andrewbonney | 0:bc6b8c94c371 | 375 | int |
andrewbonney | 0:bc6b8c94c371 | 376 | slre_compile(struct slre *r, const char *re) |
andrewbonney | 0:bc6b8c94c371 | 377 | { |
andrewbonney | 0:bc6b8c94c371 | 378 | r->err_str = NULL; |
andrewbonney | 0:bc6b8c94c371 | 379 | r->code_size = r->data_size = r->num_caps = r->anchored = 0; |
andrewbonney | 0:bc6b8c94c371 | 380 | |
andrewbonney | 0:bc6b8c94c371 | 381 | if (*re == '^') |
andrewbonney | 0:bc6b8c94c371 | 382 | r->anchored++; |
andrewbonney | 0:bc6b8c94c371 | 383 | |
andrewbonney | 0:bc6b8c94c371 | 384 | emit(r, OPEN); /* This will capture what matches full RE */ |
andrewbonney | 0:bc6b8c94c371 | 385 | emit(r, 0); |
andrewbonney | 0:bc6b8c94c371 | 386 | |
andrewbonney | 0:bc6b8c94c371 | 387 | while (*re != '\0') |
andrewbonney | 0:bc6b8c94c371 | 388 | compile(r, &re); |
andrewbonney | 0:bc6b8c94c371 | 389 | |
andrewbonney | 0:bc6b8c94c371 | 390 | if (r->code[2] == BRANCH) |
andrewbonney | 0:bc6b8c94c371 | 391 | fixup_branch(r, 4); |
andrewbonney | 0:bc6b8c94c371 | 392 | |
andrewbonney | 0:bc6b8c94c371 | 393 | emit(r, CLOSE); |
andrewbonney | 0:bc6b8c94c371 | 394 | emit(r, 0); |
andrewbonney | 0:bc6b8c94c371 | 395 | emit(r, END); |
andrewbonney | 0:bc6b8c94c371 | 396 | |
andrewbonney | 0:bc6b8c94c371 | 397 | return (r->err_str == NULL ? 1 : 0); |
andrewbonney | 0:bc6b8c94c371 | 398 | } |
andrewbonney | 0:bc6b8c94c371 | 399 | |
andrewbonney | 0:bc6b8c94c371 | 400 | static int match(const struct slre *, int, |
andrewbonney | 0:bc6b8c94c371 | 401 | const char *, int, int *, struct cap *); |
andrewbonney | 0:bc6b8c94c371 | 402 | |
andrewbonney | 0:bc6b8c94c371 | 403 | static void |
andrewbonney | 0:bc6b8c94c371 | 404 | loop_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) |
andrewbonney | 0:bc6b8c94c371 | 405 | { |
andrewbonney | 0:bc6b8c94c371 | 406 | int saved_offset, matched_offset; |
andrewbonney | 0:bc6b8c94c371 | 407 | |
andrewbonney | 0:bc6b8c94c371 | 408 | saved_offset = matched_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 409 | |
andrewbonney | 0:bc6b8c94c371 | 410 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
andrewbonney | 0:bc6b8c94c371 | 411 | saved_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 412 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
andrewbonney | 0:bc6b8c94c371 | 413 | matched_offset = saved_offset; |
andrewbonney | 0:bc6b8c94c371 | 414 | *ofs = saved_offset; |
andrewbonney | 0:bc6b8c94c371 | 415 | } |
andrewbonney | 0:bc6b8c94c371 | 416 | |
andrewbonney | 0:bc6b8c94c371 | 417 | *ofs = matched_offset; |
andrewbonney | 0:bc6b8c94c371 | 418 | } |
andrewbonney | 0:bc6b8c94c371 | 419 | |
andrewbonney | 0:bc6b8c94c371 | 420 | static void |
andrewbonney | 0:bc6b8c94c371 | 421 | loop_non_greedy(const struct slre *r, int pc, const char *s,int len, int *ofs) |
andrewbonney | 0:bc6b8c94c371 | 422 | { |
andrewbonney | 0:bc6b8c94c371 | 423 | int saved_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 424 | |
andrewbonney | 0:bc6b8c94c371 | 425 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
andrewbonney | 0:bc6b8c94c371 | 426 | saved_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 427 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
andrewbonney | 0:bc6b8c94c371 | 428 | break; |
andrewbonney | 0:bc6b8c94c371 | 429 | } |
andrewbonney | 0:bc6b8c94c371 | 430 | |
andrewbonney | 0:bc6b8c94c371 | 431 | *ofs = saved_offset; |
andrewbonney | 0:bc6b8c94c371 | 432 | } |
andrewbonney | 0:bc6b8c94c371 | 433 | |
andrewbonney | 0:bc6b8c94c371 | 434 | static int |
andrewbonney | 0:bc6b8c94c371 | 435 | is_any_of(const unsigned char *p, int len, const char *s, int *ofs) |
andrewbonney | 0:bc6b8c94c371 | 436 | { |
andrewbonney | 0:bc6b8c94c371 | 437 | int i, ch; |
andrewbonney | 0:bc6b8c94c371 | 438 | |
andrewbonney | 0:bc6b8c94c371 | 439 | ch = s[*ofs]; |
andrewbonney | 0:bc6b8c94c371 | 440 | |
andrewbonney | 0:bc6b8c94c371 | 441 | for (i = 0; i < len; i++) |
andrewbonney | 0:bc6b8c94c371 | 442 | if (p[i] == ch) { |
andrewbonney | 0:bc6b8c94c371 | 443 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 444 | return (1); |
andrewbonney | 0:bc6b8c94c371 | 445 | } |
andrewbonney | 0:bc6b8c94c371 | 446 | |
andrewbonney | 0:bc6b8c94c371 | 447 | return (0); |
andrewbonney | 0:bc6b8c94c371 | 448 | } |
andrewbonney | 0:bc6b8c94c371 | 449 | |
andrewbonney | 0:bc6b8c94c371 | 450 | static int |
andrewbonney | 0:bc6b8c94c371 | 451 | is_any_but(const unsigned char *p, int len, const char *s, int *ofs) |
andrewbonney | 0:bc6b8c94c371 | 452 | { |
andrewbonney | 0:bc6b8c94c371 | 453 | int i, ch; |
andrewbonney | 0:bc6b8c94c371 | 454 | |
andrewbonney | 0:bc6b8c94c371 | 455 | ch = s[*ofs]; |
andrewbonney | 0:bc6b8c94c371 | 456 | |
andrewbonney | 0:bc6b8c94c371 | 457 | for (i = 0; i < len; i++) |
andrewbonney | 0:bc6b8c94c371 | 458 | if (p[i] == ch) |
andrewbonney | 0:bc6b8c94c371 | 459 | return (0); |
andrewbonney | 0:bc6b8c94c371 | 460 | |
andrewbonney | 0:bc6b8c94c371 | 461 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 462 | return (1); |
andrewbonney | 0:bc6b8c94c371 | 463 | } |
andrewbonney | 0:bc6b8c94c371 | 464 | |
andrewbonney | 0:bc6b8c94c371 | 465 | static int |
andrewbonney | 0:bc6b8c94c371 | 466 | match(const struct slre *r, int pc, const char *s, int len, |
andrewbonney | 0:bc6b8c94c371 | 467 | int *ofs, struct cap *caps) |
andrewbonney | 0:bc6b8c94c371 | 468 | { |
andrewbonney | 0:bc6b8c94c371 | 469 | int n, saved_offset, res = 1; |
andrewbonney | 0:bc6b8c94c371 | 470 | |
andrewbonney | 0:bc6b8c94c371 | 471 | while (res && r->code[pc] != END) { |
andrewbonney | 0:bc6b8c94c371 | 472 | |
andrewbonney | 0:bc6b8c94c371 | 473 | assert(pc < r->code_size); |
andrewbonney | 0:bc6b8c94c371 | 474 | assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0]))); |
andrewbonney | 0:bc6b8c94c371 | 475 | |
andrewbonney | 0:bc6b8c94c371 | 476 | switch (r->code[pc]) { |
andrewbonney | 0:bc6b8c94c371 | 477 | case BRANCH: |
andrewbonney | 0:bc6b8c94c371 | 478 | saved_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 479 | res = match(r, pc + 3, s, len, ofs, caps); |
andrewbonney | 0:bc6b8c94c371 | 480 | if (res == 0) { |
andrewbonney | 0:bc6b8c94c371 | 481 | *ofs = saved_offset; |
andrewbonney | 0:bc6b8c94c371 | 482 | res = match(r, pc + r->code[pc + 1], |
andrewbonney | 0:bc6b8c94c371 | 483 | s, len, ofs, caps); |
andrewbonney | 0:bc6b8c94c371 | 484 | } |
andrewbonney | 0:bc6b8c94c371 | 485 | pc += r->code[pc + 2]; |
andrewbonney | 0:bc6b8c94c371 | 486 | break; |
andrewbonney | 0:bc6b8c94c371 | 487 | case EXACT: |
andrewbonney | 0:bc6b8c94c371 | 488 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 489 | n = r->code[pc + 2]; /* String length */ |
andrewbonney | 0:bc6b8c94c371 | 490 | if (n <= len - *ofs && !memcmp(s + *ofs, r->data + |
andrewbonney | 0:bc6b8c94c371 | 491 | r->code[pc + 1], n)) { |
andrewbonney | 0:bc6b8c94c371 | 492 | (*ofs) += n; |
andrewbonney | 0:bc6b8c94c371 | 493 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 494 | } |
andrewbonney | 0:bc6b8c94c371 | 495 | pc += 3; |
andrewbonney | 0:bc6b8c94c371 | 496 | break; |
andrewbonney | 0:bc6b8c94c371 | 497 | case QUEST: |
andrewbonney | 0:bc6b8c94c371 | 498 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 499 | saved_offset = *ofs; |
andrewbonney | 0:bc6b8c94c371 | 500 | if (!match(r, pc + 2, s, len, ofs, caps)) |
andrewbonney | 0:bc6b8c94c371 | 501 | *ofs = saved_offset; |
andrewbonney | 0:bc6b8c94c371 | 502 | pc += r->code[pc + 1]; |
andrewbonney | 0:bc6b8c94c371 | 503 | break; |
andrewbonney | 0:bc6b8c94c371 | 504 | case STAR: |
andrewbonney | 0:bc6b8c94c371 | 505 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 506 | loop_greedy(r, pc, s, len, ofs); |
andrewbonney | 0:bc6b8c94c371 | 507 | pc += r->code[pc + 1]; |
andrewbonney | 0:bc6b8c94c371 | 508 | break; |
andrewbonney | 0:bc6b8c94c371 | 509 | case STARQ: |
andrewbonney | 0:bc6b8c94c371 | 510 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 511 | loop_non_greedy(r, pc, s, len, ofs); |
andrewbonney | 0:bc6b8c94c371 | 512 | pc += r->code[pc + 1]; |
andrewbonney | 0:bc6b8c94c371 | 513 | break; |
andrewbonney | 0:bc6b8c94c371 | 514 | case PLUS: |
andrewbonney | 0:bc6b8c94c371 | 515 | if ((res = match(r, pc + 2, s, len, ofs, caps)) == 0) |
andrewbonney | 0:bc6b8c94c371 | 516 | break; |
andrewbonney | 0:bc6b8c94c371 | 517 | |
andrewbonney | 0:bc6b8c94c371 | 518 | loop_greedy(r, pc, s, len, ofs); |
andrewbonney | 0:bc6b8c94c371 | 519 | pc += r->code[pc + 1]; |
andrewbonney | 0:bc6b8c94c371 | 520 | break; |
andrewbonney | 0:bc6b8c94c371 | 521 | case PLUSQ: |
andrewbonney | 0:bc6b8c94c371 | 522 | if ((res = match(r, pc + 2, s, len, ofs, caps)) == 0) |
andrewbonney | 0:bc6b8c94c371 | 523 | break; |
andrewbonney | 0:bc6b8c94c371 | 524 | |
andrewbonney | 0:bc6b8c94c371 | 525 | loop_non_greedy(r, pc, s, len, ofs); |
andrewbonney | 0:bc6b8c94c371 | 526 | pc += r->code[pc + 1]; |
andrewbonney | 0:bc6b8c94c371 | 527 | break; |
andrewbonney | 0:bc6b8c94c371 | 528 | case SPACE: |
andrewbonney | 0:bc6b8c94c371 | 529 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 530 | if (*ofs < len && isspace(((unsigned char *)s)[*ofs])) { |
andrewbonney | 0:bc6b8c94c371 | 531 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 532 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 533 | } |
andrewbonney | 0:bc6b8c94c371 | 534 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 535 | break; |
andrewbonney | 0:bc6b8c94c371 | 536 | case NONSPACE: |
andrewbonney | 0:bc6b8c94c371 | 537 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 538 | if (*ofs <len && !isspace(((unsigned char *)s)[*ofs])) { |
andrewbonney | 0:bc6b8c94c371 | 539 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 540 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 541 | } |
andrewbonney | 0:bc6b8c94c371 | 542 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 543 | break; |
andrewbonney | 0:bc6b8c94c371 | 544 | case DIGIT: |
andrewbonney | 0:bc6b8c94c371 | 545 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 546 | if (*ofs < len && isdigit(((unsigned char *)s)[*ofs])) { |
andrewbonney | 0:bc6b8c94c371 | 547 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 548 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 549 | } |
andrewbonney | 0:bc6b8c94c371 | 550 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 551 | break; |
andrewbonney | 0:bc6b8c94c371 | 552 | case ANY: |
andrewbonney | 0:bc6b8c94c371 | 553 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 554 | if (*ofs < len) { |
andrewbonney | 0:bc6b8c94c371 | 555 | (*ofs)++; |
andrewbonney | 0:bc6b8c94c371 | 556 | res = 1; |
andrewbonney | 0:bc6b8c94c371 | 557 | } |
andrewbonney | 0:bc6b8c94c371 | 558 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 559 | break; |
andrewbonney | 0:bc6b8c94c371 | 560 | case ANYOF: |
andrewbonney | 0:bc6b8c94c371 | 561 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 562 | if (*ofs < len) |
andrewbonney | 0:bc6b8c94c371 | 563 | res = is_any_of(r->data + r->code[pc + 1], |
andrewbonney | 0:bc6b8c94c371 | 564 | r->code[pc + 2], s, ofs); |
andrewbonney | 0:bc6b8c94c371 | 565 | pc += 3; |
andrewbonney | 0:bc6b8c94c371 | 566 | break; |
andrewbonney | 0:bc6b8c94c371 | 567 | case ANYBUT: |
andrewbonney | 0:bc6b8c94c371 | 568 | res = 0; |
andrewbonney | 0:bc6b8c94c371 | 569 | if (*ofs < len) |
andrewbonney | 0:bc6b8c94c371 | 570 | res = is_any_but(r->data + r->code[pc + 1], |
andrewbonney | 0:bc6b8c94c371 | 571 | r->code[pc + 2], s, ofs); |
andrewbonney | 0:bc6b8c94c371 | 572 | pc += 3; |
andrewbonney | 0:bc6b8c94c371 | 573 | break; |
andrewbonney | 0:bc6b8c94c371 | 574 | case BOL: |
andrewbonney | 0:bc6b8c94c371 | 575 | res = *ofs == 0 ? 1 : 0; |
andrewbonney | 0:bc6b8c94c371 | 576 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 577 | break; |
andrewbonney | 0:bc6b8c94c371 | 578 | case EOL: |
andrewbonney | 0:bc6b8c94c371 | 579 | res = *ofs == len ? 1 : 0; |
andrewbonney | 0:bc6b8c94c371 | 580 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 581 | break; |
andrewbonney | 0:bc6b8c94c371 | 582 | case OPEN: |
andrewbonney | 0:bc6b8c94c371 | 583 | if (caps != NULL) |
andrewbonney | 0:bc6b8c94c371 | 584 | caps[r->code[pc + 1]].ptr = s + *ofs; |
andrewbonney | 0:bc6b8c94c371 | 585 | pc += 2; |
andrewbonney | 0:bc6b8c94c371 | 586 | break; |
andrewbonney | 0:bc6b8c94c371 | 587 | case CLOSE: |
andrewbonney | 0:bc6b8c94c371 | 588 | if (caps != NULL) |
andrewbonney | 0:bc6b8c94c371 | 589 | caps[r->code[pc + 1]].len = (s + *ofs) - |
andrewbonney | 0:bc6b8c94c371 | 590 | caps[r->code[pc + 1]].ptr; |
andrewbonney | 0:bc6b8c94c371 | 591 | pc += 2; |
andrewbonney | 0:bc6b8c94c371 | 592 | break; |
andrewbonney | 0:bc6b8c94c371 | 593 | case END: |
andrewbonney | 0:bc6b8c94c371 | 594 | pc++; |
andrewbonney | 0:bc6b8c94c371 | 595 | break; |
andrewbonney | 0:bc6b8c94c371 | 596 | default: |
andrewbonney | 0:bc6b8c94c371 | 597 | printf("unknown cmd (%d) at %d\n", r->code[pc], pc); |
andrewbonney | 0:bc6b8c94c371 | 598 | assert(0); |
andrewbonney | 0:bc6b8c94c371 | 599 | break; |
andrewbonney | 0:bc6b8c94c371 | 600 | } |
andrewbonney | 0:bc6b8c94c371 | 601 | } |
andrewbonney | 0:bc6b8c94c371 | 602 | |
andrewbonney | 0:bc6b8c94c371 | 603 | return (res); |
andrewbonney | 0:bc6b8c94c371 | 604 | } |
andrewbonney | 0:bc6b8c94c371 | 605 | |
andrewbonney | 0:bc6b8c94c371 | 606 | int |
andrewbonney | 0:bc6b8c94c371 | 607 | slre_match(const struct slre *r, const char *buf, int len, |
andrewbonney | 0:bc6b8c94c371 | 608 | struct cap *caps) |
andrewbonney | 0:bc6b8c94c371 | 609 | { |
andrewbonney | 0:bc6b8c94c371 | 610 | int i, ofs = 0, res = 0; |
andrewbonney | 0:bc6b8c94c371 | 611 | |
andrewbonney | 0:bc6b8c94c371 | 612 | if (r->anchored) { |
andrewbonney | 0:bc6b8c94c371 | 613 | res = match(r, 0, buf, len, &ofs, caps); |
andrewbonney | 0:bc6b8c94c371 | 614 | } else { |
andrewbonney | 0:bc6b8c94c371 | 615 | for (i = 0; i < len && res == 0; i++) { |
andrewbonney | 0:bc6b8c94c371 | 616 | ofs = i; |
andrewbonney | 0:bc6b8c94c371 | 617 | res = match(r, 0, buf, len, &ofs, caps); |
andrewbonney | 0:bc6b8c94c371 | 618 | } |
andrewbonney | 0:bc6b8c94c371 | 619 | } |
andrewbonney | 0:bc6b8c94c371 | 620 | |
andrewbonney | 0:bc6b8c94c371 | 621 | return (res); |
andrewbonney | 0:bc6b8c94c371 | 622 | } |
andrewbonney | 0:bc6b8c94c371 | 623 |