1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=80:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS regular expressions, after Perl.
43 */
44 #include "jsstddef.h"
45 #include <stdlib.h>
46 #include <string.h>
47 #include "jstypes.h"
48 #include "jsarena.h" /* Added by JSIFY */
49 #include "jsutil.h" /* Added by JSIFY */
50 #include "jsapi.h"
51 #include "jsarray.h"
52 #include "jsatom.h"
53 #include "jscntxt.h"
54 #include "jsconfig.h"
55 #include "jsfun.h"
56 #include "jsgc.h"
57 #include "jsinterp.h"
58 #include "jslock.h"
59 #include "jsnum.h"
60 #include "jsobj.h"
61 #include "jsopcode.h"
62 #include "jsregexp.h"
63 #include "jsscan.h"
64 #include "jsstr.h"
65
66 #if JS_HAS_REGEXPS
67
68 /* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */
69 typedef enum REOp {
70 REOP_EMPTY = 0, /* match rest of input against rest of r.e. */
71 REOP_ALT = 1, /* alternative subexpressions in kid and next */
72 REOP_SIMPLE_START = 2, /* start of 'simple opcodes' */
73 REOP_BOL = 2, /* beginning of input (or line if multiline) */
74 REOP_EOL = 3, /* end of input (or line if multiline) */
75 REOP_WBDRY = 4, /* match "" at word boundary */
76 REOP_WNONBDRY = 5, /* match "" at word non-boundary */
77 REOP_DOT = 6, /* stands for any character */
78 REOP_DIGIT = 7, /* match a digit char: [0-9] */
79 REOP_NONDIGIT = 8, /* match a non-digit char: [^0-9] */
80 REOP_ALNUM = 9, /* match an alphanumeric char: [0-9a-z_A-Z] */
81 REOP_NONALNUM = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
82 REOP_SPACE = 11, /* match a whitespace char */
83 REOP_NONSPACE = 12, /* match a non-whitespace char */
84 REOP_BACKREF = 13, /* back-reference (e.g., \1) to a parenthetical */
85 REOP_FLAT = 14, /* match a flat string */
86 REOP_FLAT1 = 15, /* match a single char */
87 REOP_FLATi = 16, /* case-independent REOP_FLAT */
88 REOP_FLAT1i = 17, /* case-independent REOP_FLAT1 */
89 REOP_UCFLAT1 = 18, /* single Unicode char */
90 REOP_UCFLAT1i = 19, /* case-independent REOP_UCFLAT1 */
91 REOP_UCFLAT = 20, /* flat Unicode string; len immediate counts chars */
92 REOP_UCFLATi = 21, /* case-independent REOP_UCFLAT */
93 REOP_CLASS = 22, /* character class with index */
94 REOP_NCLASS = 23, /* negated character class with index */
95 REOP_SIMPLE_END = 23, /* end of 'simple opcodes' */
96 REOP_QUANT = 25, /* quantified atom: atom{1,2} */
97 REOP_STAR = 26, /* zero or more occurrences of kid */
98 REOP_PLUS = 27, /* one or more occurrences of kid */
99 REOP_OPT = 28, /* optional subexpression in kid */
100 REOP_LPAREN = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
101 REOP_RPAREN = 30, /* right paren bytecode */
102 REOP_JUMP = 31, /* for deoptimized closure loops */
103 REOP_DOTSTAR = 32, /* optimize .* to use a single opcode */
104 REOP_ANCHOR = 33, /* like .* but skips left context to unanchored r.e. */
105 REOP_EOLONLY = 34, /* $ not preceded by any pattern */
106 REOP_BACKREFi = 37, /* case-independent REOP_BACKREF */
107 REOP_LPARENNON = 41, /* non-capturing version of REOP_LPAREN */
108 REOP_ASSERT = 43, /* zero width positive lookahead assertion */
109 REOP_ASSERT_NOT = 44, /* zero width negative lookahead assertion */
110 REOP_ASSERTTEST = 45, /* sentinel at end of assertion child */
111 REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
112 REOP_MINIMALSTAR = 47, /* non-greedy version of * */
113 REOP_MINIMALPLUS = 48, /* non-greedy version of + */
114 REOP_MINIMALOPT = 49, /* non-greedy version of ? */
115 REOP_MINIMALQUANT = 50, /* non-greedy version of {} */
116 REOP_ENDCHILD = 51, /* sentinel at end of quantifier child */
117 REOP_REPEAT = 52, /* directs execution of greedy quantifier */
118 REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
119 REOP_ALTPREREQ = 54, /* prerequisite for ALT, either of two chars */
120 REOP_ALTPREREQ2 = 55, /* prerequisite for ALT, a char or a class */
121 REOP_ENDALT = 56, /* end of final alternate */
122 REOP_CONCAT = 57, /* concatenation of terms (parse time only) */
123
124 REOP_END
125 } REOp;
126
127 #define REOP_IS_SIMPLE(op) ((unsigned)((op) - REOP_SIMPLE_START) < \
128 (unsigned)REOP_SIMPLE_END)
129
130 struct RENode {
131 REOp op; /* r.e. op bytecode */
132 RENode *next; /* next in concatenation order */
133 void *kid; /* first operand */
134 union {
135 void *kid2; /* second operand */
136 jsint num; /* could be a number */
137 size_t parenIndex; /* or a parenthesis index */
138 struct { /* or a quantifier range */
139 uintN min;
140 uintN max;
141 JSPackedBool greedy;
142 } range;
143 struct { /* or a character class */
144 size_t startIndex;
145 size_t kidlen; /* length of string at kid, in jschars */
146 size_t index; /* index into class list */
147 uint16 bmsize; /* bitmap size, based on max char code */
148 JSPackedBool sense;
149 } ucclass;
150 struct { /* or a literal sequence */
151 jschar chr; /* of one character */
152 size_t length; /* or many (via the kid) */
153 } flat;
154 struct {
155 RENode *kid2; /* second operand from ALT */
156 jschar ch1; /* match char for ALTPREREQ */
157 jschar ch2; /* ditto, or class index for ALTPREREQ2 */
158 } altprereq;
159 } u;
160 };
161
162 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
163 ((c >= 'a') && (c <= 'z')) )
164 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
165 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
166
167 #define CLASS_CACHE_SIZE 4
168
169 typedef struct CompilerState {
170 JSContext *context;
171 JSTokenStream *tokenStream; /* For reporting errors */
172 const jschar *cpbegin;
173 const jschar *cpend;
174 const jschar *cp;
175 size_t parenCount;
176 size_t classCount; /* number of [] encountered */
177 size_t treeDepth; /* maximum depth of parse tree */
178 size_t progLength; /* estimated bytecode length */
179 RENode *result;
180 size_t classBitmapsMem; /* memory to hold all class bitmaps */
181 struct {
182 const jschar *start; /* small cache of class strings */
183 size_t length; /* since they're often the same */
184 size_t index;
185 } classCache[CLASS_CACHE_SIZE];
186 uint16 flags;
187 } CompilerState;
188
189 typedef struct EmitStateStackEntry {
190 jsbytecode *altHead; /* start of REOP_ALT* opcode */
191 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
192 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
193 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
194 RENode *continueNode; /* original REOP_ALT* node being stacked */
195 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
196 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
197 avoid 16-bit unsigned offset overflow */
198 } EmitStateStackEntry;
199
200 /*
201 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
202 * the getters and setters take the pc of the offset, not of the opcode before
203 * the offset.
204 */
205 #define ARG_LEN 2
206 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
207 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
208 (pc)[1] = (jsbytecode) (arg))
209
210 #define OFFSET_LEN ARG_LEN
211 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
212 #define GET_OFFSET(pc) GET_ARG(pc)
213
214 /*
215 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
216 * For sanity, we limit it to 2^24 bytes.
217 */
218 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
219
220 /*
221 * The maximum memory that can be allocated for class bitmaps.
222 * For sanity, we limit it to 2^24 bytes.
223 */
224 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
225
226 /*
227 * Functions to get size and write/read bytecode that represent small indexes
228 * compactly.
229 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
230 * indicates that the following byte brings more bits to the index. Otherwise
231 * this is the last byte in the index bytecode representing highest index bits.
232 */
233 static size_t
234 GetCompactIndexWidth(size_t index)
235 749930 {
236 size_t width;
237
238 749930 for (width = 1; (index >>= 7) != 0; ++width) { }
239 749930 return width;
240 }
241
242 static jsbytecode *
243 WriteCompactIndex(jsbytecode *pc, size_t index)
244 1499724 {
245 size_t next;
246
247 3405126 while ((next = index >> 7) != 0) {
248 405678 *pc++ = (jsbytecode)(index | 0x80);
249 405678 index = next;
250 }
251 1499724 *pc++ = (jsbytecode)index;
252 1499724 return pc;
253 }
254
255 static jsbytecode *
256 ReadCompactIndex(jsbytecode *pc, size_t *result)
257 2464895 {
258 size_t nextByte;
259
260 2464895 nextByte = *pc++;
261 2464895 if ((nextByte & 0x80) == 0) {
262 /*
263 * Short-circuit the most common case when compact index <= 127.
264 */
265 2090351 *result = nextByte;
266 } else {
267 374544 size_t shift = 7;
268 374544 *result = 0x7F & nextByte;
269 do {
270 374544 nextByte = *pc++;
271 374544 *result |= (nextByte & 0x7F) << shift;
272 374544 shift += 7;
273 374544 } while ((nextByte & 0x80) != 0);
274 }
275 2464895 return pc;
276 }
277
278 typedef struct RECapture {
279 ptrdiff_t index; /* start of contents, -1 for empty */
280 size_t length; /* length of capture */
281 } RECapture;
282
283 typedef struct REMatchState {
284 const jschar *cp;
285 RECapture parens[1]; /* first of 're->parenCount' captures,
286 allocated at end of this struct */
287 } REMatchState;
288
289 struct REBackTrackData;
290
291 typedef struct REProgState {
292 jsbytecode *continue_pc; /* current continuation data */
293 jsbytecode continue_op;
294 ptrdiff_t index; /* progress in text */
295 size_t parenSoFar; /* highest indexed paren started */
296 union {
297 struct {
298 uintN min; /* current quantifier limits */
299 uintN max;
300 } quantifier;
301 struct {
302 size_t top; /* backtrack stack state */
303 size_t sz;
304 } assertion;
305 } u;
306 } REProgState;
307
308 typedef struct REBackTrackData {
309 size_t sz; /* size of previous stack entry */
310 jsbytecode *backtrack_pc; /* where to backtrack to */
311 jsbytecode backtrack_op;
312 const jschar *cp; /* index in text of match at backtrack */
313 size_t parenIndex; /* start index of saved paren contents */
314 size_t parenCount; /* # of saved paren contents */
315 size_t saveStateStackTop; /* number of parent states */
316 /* saved parent states follow */
317 /* saved paren contents follow */
318 } REBackTrackData;
319
320 #define INITIAL_STATESTACK 100
321 #define INITIAL_BACKTRACK 8000
322
323 typedef struct REGlobalData {
324 JSContext *cx;
325 JSRegExp *regexp; /* the RE in execution */
326 JSBool ok; /* runtime error (out_of_memory only?) */
327 size_t start; /* offset to start at */
328 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
329 const jschar *cpbegin; /* text base address */
330 const jschar *cpend; /* text limit address */
331
332 REProgState *stateStack; /* stack of state of current parents */
333 size_t stateStackTop;
334 size_t stateStackLimit;
335
336 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
337 REBackTrackData *backTrackSP;
338 size_t backTrackStackSize;
339 size_t cursz; /* size of current stack entry */
340
341 JSArenaPool pool; /* It's faster to use one malloc'd pool
342 than to malloc/free the three items
343 that are allocated from this pool */
344 } REGlobalData;
345
346 /*
347 * 1. If IgnoreCase is false, return ch.
348 * 2. Let u be ch converted to upper case as if by calling
349 * String.prototype.toUpperCase on the one-character string ch.
350 * 3. If u does not consist of a single character, return ch.
351 * 4. Let cu be u's character.
352 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
353 * code point value is less than decimal 128, then return ch.
354 * 6. Return cu.
355 */
356 static jschar
357 upcase(jschar ch)
358 0 {
359 0 jschar cu = JS_TOUPPER(ch);
360 0 if (ch >= 128 && cu < 128)
361 0 return ch;
362 0 return cu;
363 }
364
365 static jschar
366 downcase(jschar ch)
367 0 {
368 0 jschar cl = JS_TOLOWER(ch);
369 0 if (cl >= 128 && ch < 128)
370 0 return ch;
371 0 return cl;
372 }
373
374 /* Construct and initialize an RENode, returning NULL for out-of-memory */
375 static RENode *
376 NewRENode(CompilerState *state, REOp op)
377 4842115 {
378 JSContext *cx;
379 RENode *ren;
380
381 4842115 cx = state->context;
382 4842115 JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
383 4842115 if (!ren) {
384 0 JS_ReportOutOfMemory(cx);
385 0 return NULL;
386 }
387 4842115 ren->op = op;
388 4842115 ren->next = NULL;
389 4842115 ren->kid = NULL;
390 4842115 return ren;
391 }
392
393 /*
394 * Validates and converts hex ascii value.
395 */
396 static JSBool
397 isASCIIHexDigit(jschar c, uintN *digit)
398 0 {
399 0 uintN cv = c;
400
401 0 if (cv < '0')
402 0 return JS_FALSE;
403 0 if (cv <= '9') {
404 0 *digit = cv - '0';
405 0 return JS_TRUE;
406 }
407 0 cv |= 0x20;
408 0 if (cv >= 'a' && cv <= 'f') {
409 0 *digit = cv - 'a' + 10;
410 0 return JS_TRUE;
411 }
412 0 return JS_FALSE;
413 }
414
415
416 typedef struct {
417 REOp op;
418 const jschar *errPos;
419 size_t parenIndex;
420 } REOpData;
421
422
423 /*
424 * Process the op against the two top operands, reducing them to a single
425 * operand in the penultimate slot. Update progLength and treeDepth.
426 */
427 static JSBool
428 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
429 intN operandSP)
430 4092644 {
431 RENode *result;
432
433 4092644 switch (opData->op) {
434 case REOP_ALT:
435 717806 result = NewRENode(state, REOP_ALT);
436 717806 if (!result)
437 0 return JS_FALSE;
438 717806 result->kid = operandStack[operandSP - 2];
439 717806 result->u.kid2 = operandStack[operandSP - 1];
440 717806 operandStack[operandSP - 2] = result;
441
442 717806 if (state->treeDepth == TREE_DEPTH_MAX) {
443 0 js_ReportCompileErrorNumber(state->context, state->tokenStream,
444 JSREPORT_TS | JSREPORT_ERROR,
445 JSMSG_REGEXP_TOO_COMPLEX);
446 0 return JS_FALSE;
447 }
448 717806 ++state->treeDepth;
449
450 /*
451 * Look at both alternates to see if there's a FLAT or a CLASS at
452 * the start of each. If so, use a prerequisite match.
453 */
454 733443 if (((RENode *) result->kid)->op == REOP_FLAT &&
455 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
456 (state->flags & JSREG_FOLD) == 0) {
457 15637 result->op = REOP_ALTPREREQ;
458 15637 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
459 15637 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
460 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
461 JUMP, <end> ... ENDALT */
462 15637 state->progLength += 13;
463 }
464 else
465 702169 if (((RENode *) result->kid)->op == REOP_CLASS &&
466 ((RENode *) result->kid)->u.ucclass.index < 256 &&
467 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
468 (state->flags & JSREG_FOLD) == 0) {
469 0 result->op = REOP_ALTPREREQ2;
470 0 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
471 0 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
472 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
473 JUMP, <end> ... ENDALT */
474 0 state->progLength += 13;
475 }
476 else
477 702169 if (((RENode *) result->kid)->op == REOP_FLAT &&
478 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
479 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
480 (state->flags & JSREG_FOLD) == 0) {
481 0 result->op = REOP_ALTPREREQ2;
482 0 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
483 0 result->u.altprereq.ch2 =
484 ((RENode *) result->u.kid2)->u.ucclass.index;
485 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
486 JUMP, <end> ... ENDALT */
487 0 state->progLength += 13;
488 }
489 else {
490 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
491 702169 state->progLength += 7;
492 }
493 break;
494
495 case REOP_CONCAT:
496 3374838 result = operandStack[operandSP - 2];
497 6749676 while (result->next)
498 0 result = result->next;
499 3374838 result->next = operandStack[operandSP - 1];
500 3374838 break;
501
502 case REOP_ASSERT:
503 case REOP_ASSERT_NOT:
504 case REOP_LPARENNON:
505 case REOP_LPAREN:
506 /* These should have been processed by a close paren. */
507 0 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
508 JSREPORT_TS | JSREPORT_ERROR,
509 JSMSG_MISSING_PAREN, opData->errPos);
510 0 return JS_FALSE;
511
512 default:;
513 }
514 4092644 return JS_TRUE;
515 }
516
517 /*
518 * Parser forward declarations.
519 */
520 static JSBool ParseTerm(CompilerState *state);
521 static JSBool ParseQuantifier(CompilerState *state);
522 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
523
524 /*
525 * Top-down regular expression grammar, based closely on Perl4.
526 *
527 * regexp: altern A regular expression is one or more
528 * altern '|' regexp alternatives separated by vertical bar.
529 */
530 #define INITIAL_STACK_SIZE 128
531
532 static JSBool
533 ParseRegExp(CompilerState *state)
534 16028 {
535 size_t parenIndex;
536 RENode *operand;
537 REOpData *operatorStack;
538 RENode **operandStack;
539 REOp op;
540 intN i;
541 16028 JSBool result = JS_FALSE;
542
543 16028 intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
544 16028 intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
545
546 /* Watch out for empty regexp */
547 16028 if (state->cp == state->cpend) {
548 17 state->result = NewRENode(state, REOP_EMPTY);
549 17 return (state->result != NULL);
550 }
551
552 16011 operatorStack = (REOpData *)
553 JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
554 16011 if (!operatorStack)
555 0 return JS_FALSE;
556
557 16011 operandStack = (RENode **)
558 JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
559 16011 if (!operandStack)
560 4124292 goto out;
561
562 for (;;) {
563 4124292 parenIndex = state->parenCount;
564 4124292 if (state->cp == state->cpend) {
565 /*
566 * If we are at the end of the regexp and we're short one or more
567 * operands, the regexp must have the form /x|/ or some such, with
568 * left parentheses making us short more than one operand.
569 */
570 0 if (operatorSP >= operandSP) {
571 0 operand = NewRENode(state, REOP_EMPTY);
572 0 if (!operand)
573 goto out;
574 goto pushOperand;
575 }
576 } else {
577 4124292 switch (*state->cp) {
578 case '(':
579 15637 ++state->cp;
580 15637 if (state->cp + 1 < state->cpend &&
581 *state->cp == '?' &&
582 (state->cp[1] == '=' ||
583 state->cp[1] == '!' ||
584 state->cp[1] == ':')) {
585 0 switch (state->cp[1]) {
586 case '=':
587 0 op = REOP_ASSERT;
588 /* ASSERT, <next>, ... ASSERTTEST */
589 0 state->progLength += 4;
590 0 break;
591 case '!':
592 0 op = REOP_ASSERT_NOT;
593 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
594 0 state->progLength += 4;
595 0 break;
596 default:
597 0 op = REOP_LPARENNON;
598 break;
599 }
600 0 state->cp += 2;
601 } else {
602 15637 op = REOP_LPAREN;
603 /* LPAREN, <index>, ... RPAREN, <index> */
604 15637 state->progLength
605 += 2 * (1 + GetCompactIndexWidth(parenIndex));
606 15637 state->parenCount++;
607 15637 if (state->parenCount == 65535) {
608 0 js_ReportCompileErrorNumber(state->context,
609 state->tokenStream,
610 JSREPORT_TS |
611 JSREPORT_ERROR,
612 JSMSG_TOO_MANY_PARENS);
613 0 goto out;
614 }
615 }
616 goto pushOperator;
617
618 case ')':
619 /*
620 * If there's no stacked open parenthesis, throw syntax error.
621 */
622 0 for (i = operatorSP - 1; ; i--) {
623 0 if (i < 0) {
624 0 js_ReportCompileErrorNumber(state->context,
625 state->tokenStream,
626 JSREPORT_TS |
627 JSREPORT_ERROR,
628 JSMSG_UNMATCHED_RIGHT_PAREN);
629 0 goto out;
630 }
631 0 if (operatorStack[i].op == REOP_ASSERT ||
632 operatorStack[i].op == REOP_ASSERT_NOT ||
633 operatorStack[i].op == REOP_LPARENNON ||
634 operatorStack[i].op == REOP_LPAREN) {
635 break;
636 }
637 0 }
638 /* FALL THROUGH */
639
640 case '|':
641 /* Expected an operand before these, so make an empty one */
642 0 operand = NewRENode(state, REOP_EMPTY);
643 0 if (!operand)
644 goto out;
645 goto pushOperand;
646
647 default:
648 4108655 if (!ParseTerm(state))
649 4108655 goto out;
650 4108655 operand = state->result;
651 4108655 pushOperand:
652 4108655 if (operandSP == operandStackSize) {
653 0 operandStackSize += operandStackSize;
654 0 operandStack = (RENode **)
655 JS_realloc(state->context, operandStack,
656 sizeof(RENode *) * operandStackSize);
657 0 if (!operandStack)
658 4108655 goto out;
659 }
660 4108655 operandStack[operandSP++] = operand;
661 break;
662 }
663 }
664
665 /* At the end; process remaining operators. */
666 4124292 restartOperator:
667 4124292 if (state->cp == state->cpend) {
668 51467 while (operatorSP) {
669 35456 --operatorSP;
670 35456 if (!ProcessOp(state, &operatorStack[operatorSP],
671 operandStack, operandSP))
672 35456 goto out;
673 35456 --operandSP;
674 }
675 JS_ASSERT(operandSP == 1);
676 16011 state->result = operandStack[0];
677 16011 result = JS_TRUE;
678 16011 goto out;
679 }
680
681 4108281 switch (*state->cp) {
682 case '|':
683 /* Process any stacked 'concat' operators */
684 717806 ++state->cp;
685 4696843 while (operatorSP &&
686 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
687 3261231 --operatorSP;
688 3261231 if (!ProcessOp(state, &operatorStack[operatorSP],
689 operandStack, operandSP)) {
690 3261231 goto out;
691 }
692 3261231 --operandSP;
693 }
694 717806 op = REOP_ALT;
695 717806 goto pushOperator;
696
697 case ')':
698 /*
699 * If there's no stacked open parenthesis, throw syntax error.
700 */
701 811594 for (i = operatorSP - 1; ; i--) {
702 811594 if (i < 0) {
703 0 js_ReportCompileErrorNumber(state->context,
704 state->tokenStream,
705 JSREPORT_TS | JSREPORT_ERROR,
706 JSMSG_UNMATCHED_RIGHT_PAREN);
707 0 goto out;
708 }
709 811594 if (operatorStack[i].op == REOP_ASSERT ||
710 operatorStack[i].op == REOP_ASSERT_NOT ||
711 operatorStack[i].op == REOP_LPARENNON ||
712 operatorStack[i].op == REOP_LPAREN) {
713 break;
714 }
715 795957 }
716 15637 ++state->cp;
717
718 /* Process everything on the stack until the open parenthesis. */
719 for (;;) {
720 JS_ASSERT(operatorSP);
721 811594 --operatorSP;
722 811594 switch (operatorStack[operatorSP].op) {
723 case REOP_ASSERT:
724 case REOP_ASSERT_NOT:
725 case REOP_LPAREN:
726 15637 operand = NewRENode(state, operatorStack[operatorSP].op);
727 15637 if (!operand)
728 15637 goto out;
729 15637 operand->u.parenIndex =
730 operatorStack[operatorSP].parenIndex;
731 JS_ASSERT(operandSP);
732 15637 operand->kid = operandStack[operandSP - 1];
733 15637 operandStack[operandSP - 1] = operand;
734 15637 if (state->treeDepth == TREE_DEPTH_MAX) {
735 0 js_ReportCompileErrorNumber(state->context,
736 state->tokenStream,
737 JSREPORT_TS |
738 JSREPORT_ERROR,
739 JSMSG_REGEXP_TOO_COMPLEX);
740 0 goto out;
741 }
742 15637 ++state->treeDepth;
743 /* FALL THROUGH */
744
745 case REOP_LPARENNON:
746 15637 state->result = operandStack[operandSP - 1];
747 15637 if (!ParseQuantifier(state))
748 15637 goto out;
749 15637 operandStack[operandSP - 1] = state->result;
750 15637 goto restartOperator;
751 default:
752 795957 if (!ProcessOp(state, &operatorStack[operatorSP],
753 operandStack, operandSP))
754 795957 goto out;
755 795957 --operandSP;
756 break;
757 }
758 }
759 break;
760
761 case '{':
762 {
763 0 const jschar *errp = state->cp;
764
765 0 if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
766 /*
767 * This didn't even scan correctly as a quantifier, so we should
768 * treat it as flat.
769 */
770 0 op = REOP_CONCAT;
771 0 goto pushOperator;
772 }
773
774 0 state->cp = errp;
775 /* FALL THROUGH */
776 }
777
778 case '+':
779 case '*':
780 case '?':
781 0 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
782 JSREPORT_TS | JSREPORT_ERROR,
783 JSMSG_BAD_QUANTIFIER, state->cp);
784 0 result = JS_FALSE;
785 0 goto out;
786
787 default:
788 /* Anything else is the start of the next term. */
789 3374838 op = REOP_CONCAT;
790 4108281 pushOperator:
791 4108281 if (operatorSP == operatorStackSize) {
792 0 operatorStackSize += operatorStackSize;
793 0 operatorStack = (REOpData *)
794 JS_realloc(state->context, operatorStack,
795 sizeof(REOpData) * operatorStackSize);
796 0 if (!operatorStack)
797 4108281 goto out;
798 }
799 4108281 operatorStack[operatorSP].op = op;
800 4108281 operatorStack[operatorSP].errPos = state->cp;
801 4108281 operatorStack[operatorSP++].parenIndex = parenIndex;
802 break;
803 }
804 }
805 16011 out:
806 16011 if (operatorStack)
807 16011 JS_free(state->context, operatorStack);
808 16011 if (operandStack)
809 16011 JS_free(state->context, operandStack);
810 16011 return result;
811 }
812
813 /*
814 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
815 * its being on the stack, and to propagate errors to its callers.
816 */
817 #define JSREG_FIND_PAREN_COUNT 0x8000
818 #define JSREG_FIND_PAREN_ERROR 0x4000
819
820 /*
821 * Magic return value from FindParenCount and GetDecimalValue, to indicate
822 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
823 * its findMax parameter is non-null.
824 */
825 #define OVERFLOW_VALUE ((uintN)-1)
826
827 static uintN
828 FindParenCount(CompilerState *state)
829 0 {
830 CompilerState temp;
831 int i;
832
833 0 if (state->flags & JSREG_FIND_PAREN_COUNT)
834 0 return OVERFLOW_VALUE;
835
836 /*
837 * Copy state into temp, flag it so we never report an invalid backref,
838 * and reset its members to parse the entire regexp. This is obviously
839 * suboptimal, but GetDecimalValue calls us only if a backref appears to
840 * refer to a forward parenthetical, which is rare.
841 */
842 0 temp = *state;
843 0 temp.flags |= JSREG_FIND_PAREN_COUNT;
844 0 temp.cp = temp.cpbegin;
845 0 temp.parenCount = 0;
846 0 temp.classCount = 0;
847 0 temp.progLength = 0;
848 0 temp.treeDepth = 0;
849 0 temp.classBitmapsMem = 0;
850 0 for (i = 0; i < CLASS_CACHE_SIZE; i++)
851 0 temp.classCache[i].start = NULL;
852
853 0 if (!ParseRegExp(&temp)) {
854 0 state->flags |= JSREG_FIND_PAREN_ERROR;
855 0 return OVERFLOW_VALUE;
856 }
857 0 return temp.parenCount;
858 }
859
860 /*
861 * Extract and return a decimal value at state->cp. The initial character c
862 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
863 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
864 * state->flags to discover whether an error occurred under findMax.
865 */
866 static uintN
867 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
868 CompilerState *state)
869 0 {
870 0 uintN value = JS7_UNDEC(c);
871 0 JSBool overflow = (value > max && (!findMax || value > findMax(state)));
872
873 /* The following restriction allows simpler overflow checks. */
874 JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
875 0 while (state->cp < state->cpend) {
876 0 c = *state->cp;
877 0 if (!JS7_ISDEC(c))
878 break;
879 0 value = 10 * value + JS7_UNDEC(c);
880 0 if (!overflow && value > max && (!findMax || value > findMax(state)))
881 0 overflow = JS_TRUE;
882 0 ++state->cp;
883 }
884 0 return overflow ? OVERFLOW_VALUE : value;
885 }
886
887 /*
888 * Calculate the total size of the bitmap required for a class expression.
889 */
890 static JSBool
891 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
892 const jschar *end)
893 0 {
894 0 uintN max = 0;
895 0 JSBool inRange = JS_FALSE;
896 0 jschar c, rangeStart = 0;
897 uintN n, digit, nDigits, i;
898
899 0 target->u.ucclass.bmsize = 0;
900 0 target->u.ucclass.sense = JS_TRUE;
901
902 0 if (src == end)
903 0 return JS_TRUE;
904
905 0 if (*src == '^') {
906 0 ++src;
907 0 target->u.ucclass.sense = JS_FALSE;
908 }
909
910 0 while (src != end) {
911 0 uintN localMax = 0;
912 0 switch (*src) {
913 case '\\':
914 0 ++src;
915 0 c = *src++;
916 0 switch (c) {
917 case 'b':
918 0 localMax = 0x8;
919 0 break;
920 case 'f':
921 0 localMax = 0xC;
922 0 break;
923 case 'n':
924 0 localMax = 0xA;
925 0 break;
926 case 'r':
927 0 localMax = 0xD;
928 0 break;
929 case 't':
930 0 localMax = 0x9;
931 0 break;
932 case 'v':
933 0 localMax = 0xB;
934 0 break;
935 case 'c':
936 0 if (src + 1 < end && RE_IS_LETTER(src[1]))
937 0 localMax = (jschar) (*src++ & 0x1F);
938 else
939 0 localMax = '\\';
940 break;
941 case 'x':
942 0 nDigits = 2;
943 0 goto lexHex;
944 case 'u':
945 0 nDigits = 4;
946 0 lexHex:
947 0 n = 0;
948 0 for (i = 0; (i < nDigits) && (src < end); i++) {
949 0 c = *src++;
950 0 if (!isASCIIHexDigit(c, &digit)) {
951 /*
952 * Back off to accepting the original
953 *'\' as a literal.
954 */
955 0 src -= i + 1;
956 0 n = '\\';
957 0 break;
958 }
959 0 n = (n << 4) | digit;
960 }
961 0 localMax = n;
962 0 break;
963 case 'd':
964 0 if (inRange) {
965 0 JS_ReportErrorNumber(state->context,
966 js_GetErrorMessage, NULL,
967 JSMSG_BAD_CLASS_RANGE);
968 0 return JS_FALSE;
969 }
970 0 localMax = '9';
971 0 break;
972 case 'D':
973 case 's':
974 case 'S':
975 case 'w':
976 case 'W':
977 0 if (inRange) {
978 0 JS_ReportErrorNumber(state->context,
979 js_GetErrorMessage, NULL,
980 JSMSG_BAD_CLASS_RANGE);
981 0 return JS_FALSE;
982 }
983 0 target->u.ucclass.bmsize = 65535;
984 0 return JS_TRUE;
985 case '0':
986 case '1':
987 case '2':
988 case '3':
989 case '4':
990 case '5':
991 case '6':
992 case '7':
993 /*
994 * This is a non-ECMA extension - decimal escapes (in this
995 * case, octal!) are supposed to be an error inside class
996 * ranges, but supported here for backwards compatibility.
997 *
998 */
999 0 n = JS7_UNDEC(c);
1000 0 c = *src;
1001 0 if ('0' <= c && c <= '7') {
1002 0 src++;
1003 0 n = 8 * n + JS7_UNDEC(c);
1004 0 c = *src;
1005 0 if ('0' <= c && c <= '7') {
1006 0 src++;
1007 0 i = 8 * n + JS7_UNDEC(c);
1008 0 if (i <= 0377)
1009 0 n = i;
1010 else
1011 0 src--;
1012 }
1013 }
1014 0 localMax = n;
1015 0 break;
1016
1017 default:
1018 0 localMax = c;
1019 break;
1020 }
1021 break;
1022 default:
1023 0 localMax = *src++;
1024 break;
1025 }
1026 0 if (inRange) {
1027 0 if (rangeStart > localMax) {
1028 0 JS_ReportErrorNumber(state->context,
1029 js_GetErrorMessage, NULL,
1030 JSMSG_BAD_CLASS_RANGE);
1031 0 return JS_FALSE;
1032 }
1033 0 inRange = JS_FALSE;
1034 } else {
1035 0 if (src < end - 1) {
1036 0 if (*src == '-') {
1037 0 ++src;
1038 0 inRange = JS_TRUE;
1039 0 rangeStart = (jschar)localMax;
1040 0 continue;
1041 }
1042 }
1043 }
1044 0 if (state->flags & JSREG_FOLD) {
1045 0 c = JS_MAX(upcase((jschar)localMax), downcase((jschar)localMax));
1046 0 if (c > localMax)
1047 0 localMax = c;
1048 }
1049 0 if (localMax > max)
1050 0 max = localMax;
1051 }
1052 0 target->u.ucclass.bmsize = max;
1053 0 return JS_TRUE;
1054 }
1055
1056 /*
1057 * item: assertion An item is either an assertion or
1058 * quantatom a quantified atom.
1059 *
1060 * assertion: '^' Assertions match beginning of string
1061 * (or line if the class static property
1062 * RegExp.multiline is true).
1063 * '$' End of string (or line if the class
1064 * static property RegExp.multiline is
1065 * true).
1066 * '\b' Word boundary (between \w and \W).
1067 * '\B' Word non-boundary.
1068 *
1069 * quantatom: atom An unquantified atom.
1070 * quantatom '{' n ',' m '}'
1071 * Atom must occur between n and m times.
1072 * quantatom '{' n ',' '}' Atom must occur at least n times.
1073 * quantatom '{' n '}' Atom must occur exactly n times.
1074 * quantatom '*' Zero or more times (same as {0,}).
1075 * quantatom '+' One or more times (same as {1,}).
1076 * quantatom '?' Zero or one time (same as {0,1}).
1077 *
1078 * any of which can be optionally followed by '?' for ungreedy
1079 *
1080 * atom: '(' regexp ')' A parenthesized regexp (what matched
1081 * can be addressed using a backreference,
1082 * see '\' n below).
1083 * '.' Matches any char except '\n'.
1084 * '[' classlist ']' A character class.
1085 * '[' '^' classlist ']' A negated character class.
1086 * '\f' Form Feed.
1087 * '\n' Newline (Line Feed).
1088 * '\r' Carriage Return.
1089 * '\t' Horizontal Tab.
1090 * '\v' Vertical Tab.
1091 * '\d' A digit (same as [0-9]).
1092 * '\D' A non-digit.
1093 * '\w' A word character, [0-9a-z_A-Z].
1094 * '\W' A non-word character.
1095 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1096 * '\S' A non-whitespace character.
1097 * '\' n A backreference to the nth (n decimal
1098 * and positive) parenthesized expression.
1099 * '\' octal An octal escape sequence (octal must be
1100 * two or three digits long, unless it is
1101 * 0 for the null character).
1102 * '\x' hex A hex escape (hex must be two digits).
1103 * '\u' unicode A unicode escape (must be four digits).
1104 * '\c' ctrl A control character, ctrl is a letter.
1105 * '\' literalatomchar Any character except one of the above
1106 * that follow '\' in an atom.
1107 * otheratomchar Any character not first among the other
1108 * atom right-hand sides.
1109 */
1110 static JSBool
1111 ParseTerm(CompilerState *state)
1112 4108655 {
1113 4108655 jschar c = *state->cp++;
1114 uintN nDigits;
1115 uintN num, tmp, n, i;
1116 const jschar *termStart;
1117
1118 4108655 switch (c) {
1119 /* assertions and atoms */
1120 case '^':
1121 15671 state->result = NewRENode(state, REOP_BOL);
1122 15671 if (!state->result)
1123 0 return JS_FALSE;
1124 15671 state->progLength++;
1125 15671 return JS_TRUE;
1126 case '$':
1127 15603 state->result = NewRENode(state, REOP_EOL);
1128 15603 if (!state->result)
1129 0 return JS_FALSE;
1130 15603 state->progLength++;
1131 15603 return JS_TRUE;
1132 case '\\':
1133 374 if (state->cp >= state->cpend) {
1134 /* a trailing '\' is an error */
1135 0 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1136 JSREPORT_TS | JSREPORT_ERROR,
1137 JSMSG_TRAILING_SLASH);
1138 0 return JS_FALSE;
1139 }
1140 374 c = *state->cp++;
1141 374 switch (c) {
1142 /* assertion escapes */
1143 case 'b' :
1144 0 state->result = NewRENode(state, REOP_WBDRY);
1145 0 if (!state->result)
1146 0 return JS_FALSE;
1147 0 state->progLength++;
1148 0 return JS_TRUE;
1149 case 'B':
1150 0 state->result = NewRENode(state, REOP_WNONBDRY);
1151 0 if (!state->result)
1152 0 return JS_FALSE;
1153 0 state->progLength++;
1154 0 return JS_TRUE;
1155 /* Decimal escape */
1156 case '0':
1157 /* Give a strict warning. See also the note below. */
1158 0 if (!js_ReportCompileErrorNumber(state->context,
1159 state->tokenStream,
1160 JSREPORT_TS |
1161 JSREPORT_WARNING |
1162 JSREPORT_STRICT,
1163 JSMSG_INVALID_BACKREF)) {
1164 0 return JS_FALSE;
1165 }
1166 0 doOctal:
1167 0 num = 0;
1168 0 while (state->cp < state->cpend) {
1169 0 c = *state->cp;
1170 0 if (c < '0' || '7' < c)
1171 break;
1172 0 state->cp++;
1173 0 tmp = 8 * num + (uintN)JS7_UNDEC(c);
1174 0 if (tmp > 0377)
1175 0 break;
1176 0 num = tmp;
1177 }
1178 0 c = (jschar)num;
1179 0 doFlat:
1180 0 state->result = NewRENode(state, REOP_FLAT);
1181 0 if (!state->result)
1182 0 return JS_FALSE;
1183 0 state->result->u.flat.chr = c;
1184 0 state->result->u.flat.length = 1;
1185 0 state->progLength += 3;
1186 0 break;
1187 case '1':
1188 case '2':
1189 case '3':
1190 case '4':
1191 case '5':
1192 case '6':
1193 case '7':
1194 case '8':
1195 case '9':
1196 0 termStart = state->cp - 1;
1197 0 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1198 0 if (state->flags & JSREG_FIND_PAREN_ERROR)
1199 0 return JS_FALSE;
1200 0 if (num == OVERFLOW_VALUE) {
1201 /* Give a strict mode warning. */
1202 0 if (!js_ReportCompileErrorNumber(state->context,
1203 state->tokenStream,
1204 JSREPORT_TS |
1205 JSREPORT_WARNING |
1206 JSREPORT_STRICT,
1207 (c >= '8')
1208 ? JSMSG_INVALID_BACKREF
1209 : JSMSG_BAD_BACKREF)) {
1210 0 return JS_FALSE;
1211 }
1212
1213 /*
1214 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1215 * error here. However, for compatibility with IE, we treat the
1216 * whole backref as flat if the first character in it is not a
1217 * valid octal character, and as an octal escape otherwise.
1218 */
1219 0 state->cp = termStart;
1220 0 if (c >= '8') {
1221 /* Treat this as flat. termStart - 1 is the \. */
1222 0 c = '\\';
1223 0 goto asFlat;
1224 }
1225
1226 /* Treat this as an octal escape. */
1227 goto doOctal;
1228 }
1229 JS_ASSERT(1 <= num && num <= 0x10000);
1230 0 state->result = NewRENode(state, REOP_BACKREF);
1231 0 if (!state->result)
1232 0 return JS_FALSE;
1233 0 state->result->u.parenIndex = num - 1;
1234 0 state->progLength
1235 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1236 0 break;
1237 /* Control escape */
1238 case 'f':
1239 0 c = 0xC;
1240 0 goto doFlat;
1241 case 'n':
1242 0 c = 0xA;
1243 0 goto doFlat;
1244 case 'r':
1245 0 c = 0xD;
1246 0 goto doFlat;
1247 case 't':
1248 0 c = 0x9;
1249 0 goto doFlat;
1250 case 'v':
1251 0 c = 0xB;
1252 0 goto doFlat;
1253 /* Control letter */
1254 case 'c':
1255 0 if (state->cp + 1 < state->cpend && RE_IS_LETTER(state->cp[1])) {
1256 0 c = (jschar) (*state->cp++ & 0x1F);
1257 } else {
1258 /* back off to accepting the original '\' as a literal */
1259 0 --state->cp;
1260 0 c = '\\';
1261 }
1262 goto doFlat;
1263 /* HexEscapeSequence */
1264 case 'x':
1265 0 nDigits = 2;
1266 0 goto lexHex;
1267 /* UnicodeEscapeSequence */
1268 case 'u':
1269 0 nDigits = 4;
1270 0 lexHex:
1271 0 n = 0;
1272 0 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1273 uintN digit;
1274 0 c = *state->cp++;
1275 0 if (!isASCIIHexDigit(c, &digit)) {
1276 /*
1277 * Back off to accepting the original 'u' or 'x' as a
1278 * literal.
1279 */
1280 0 state->cp -= i + 2;
1281 0 n = *state->cp++;
1282 0 break;
1283 }
1284 0 n = (n << 4) | digit;
1285 }
1286 0 c = (jschar) n;
1287 0 goto doFlat;
1288 /* Character class escapes */
1289 case 'd':
1290 0 state->result = NewRENode(state, REOP_DIGIT);
1291 68 doSimple:
1292 68 if (!state->result)
1293 0 return JS_FALSE;
1294 68 state->progLength++;
1295 68 break;
1296 case 'D':
1297 0 state->result = NewRENode(state, REOP_NONDIGIT);
1298 0 goto doSimple;
1299 case 's':
1300 0 state->result = NewRENode(state, REOP_SPACE);
1301 0 goto doSimple;
1302 case 'S':
1303 0 state->result = NewRENode(state, REOP_NONSPACE);
1304 0 goto doSimple;
1305 case 'w':
1306 0 state->result = NewRENode(state, REOP_ALNUM);
1307 0 goto doSimple;
1308 case 'W':
1309 0 state->result = NewRENode(state, REOP_NONALNUM);
1310 0 goto doSimple;
1311 /* IdentityEscape */
1312 default:
1313 374 state->result = NewRENode(state, REOP_FLAT);
1314 374 if (!state->result)
1315 0 return JS_FALSE;
1316 374 state->result->u.flat.chr = c;
1317 374 state->result->u.flat.length = 1;
1318 374 state->result->kid = (void *) (state->cp - 1);
1319 374 state->progLength += 3;
1320 break;
1321 }
1322 break;
1323 case '[':
1324 0 state->result = NewRENode(state, REOP_CLASS);
1325 0 if (!state->result)
1326 0 return JS_FALSE;
1327 0 termStart = state->cp;
1328 0 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1329 for (;;) {
1330 0 if (state->cp == state->cpend) {
1331 0 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1332 JSREPORT_TS | JSREPORT_ERROR,
1333 JSMSG_UNTERM_CLASS, termStart);
1334
1335 0 return JS_FALSE;
1336 }
1337 0 if (*state->cp == '\\') {
1338 0 state->cp++;
1339 0 if (state->cp != state->cpend)
1340 0 state->cp++;
1341 continue;
1342 }
1343 0 if (*state->cp == ']') {
1344 0 state->result->u.ucclass.kidlen = state->cp - termStart;
1345 break;
1346 }
1347 0 state->cp++;
1348 }
1349 0 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1350 0 if (!state->classCache[i].start) {
1351 0 state->classCache[i].start = termStart;
1352 0 state->classCache[i].length = state->result->u.ucclass.kidlen;
1353 0 state->classCache[i].index = state->classCount;
1354 0 break;
1355 }
1356 0 if (state->classCache[i].length ==
1357 state->result->u.ucclass.kidlen) {
1358 0 for (n = 0; ; n++) {
1359 0 if (n == state->classCache[i].length) {
1360 0 state->result->u.ucclass.index
1361 = state->classCache[i].index;
1362 0 goto claim;
1363 }
1364 0 if (state->classCache[i].start[n] != termStart[n])
1365 0 break;
1366 0 }
1367 }
1368 }
1369 0 state->result->u.ucclass.index = state->classCount++;
1370
1371 0 claim:
1372 /*
1373 * Call CalculateBitmapSize now as we want any errors it finds
1374 * to be reported during the parse phase, not at execution.
1375 */
1376 0 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1377 0 return JS_FALSE;
1378 /*
1379 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1380 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1381 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1382 */
1383 0 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1384 0 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1385 0 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1386 JSREPORT_TS | JSREPORT_ERROR,
1387 JSMSG_REGEXP_TOO_COMPLEX);
1388 0 return JS_FALSE;
1389 }
1390 0 state->classBitmapsMem += n;
1391 /* CLASS, <index> */
1392 0 state->progLength
1393 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1394 0 break;
1395
1396 case '.':
1397 68 state->result = NewRENode(state, REOP_DOT);
1398 68 goto doSimple;
1399
1400 case '{':
1401 {
1402 0 const jschar *errp = state->cp--;
1403 intN err;
1404
1405 0 err = ParseMinMaxQuantifier(state, JS_TRUE);
1406 0 state->cp = errp;
1407
1408 0 if (err < 0)
1409 0 goto asFlat;
1410
1411 /* FALL THROUGH */
1412 }
1413 case '*':
1414 case '+':
1415 case '?':
1416 0 js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1417 JSREPORT_TS | JSREPORT_ERROR,
1418 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1419 0 return JS_FALSE;
1420 default:
1421 4076939 asFlat:
1422 4076939 state->result = NewRENode(state, REOP_FLAT);
1423 4076939 if (!state->result)
1424 0 return JS_FALSE;
1425 4076939 state->result->u.flat.chr = c;
1426 4076939 state->result->u.flat.length = 1;
1427 4076939 state->result->kid = (void *) (state->cp - 1);
1428 4076939 state->progLength += 3;
1429 break;
1430 }
1431 4077381 return ParseQuantifier(state);
1432 }
1433
1434 static JSBool
1435 ParseQuantifier(CompilerState *state)
1436 4093018 {
1437 RENode *term;
1438 4093018 term = state->result;
1439 4093018 if (state->cp < state->cpend) {
1440 4092610 switch (*state->cp) {
1441 case '+':
1442 0 state->result = NewRENode(state, REOP_QUANT);
1443 0 if (!state->result)
1444 0 return JS_FALSE;
1445 0 state->result->u.range.min = 1;
1446 0 state->result->u.range.max = (uintN)-1;
1447 /* <PLUS>, <next> ... <ENDCHILD> */
1448 0 state->progLength += 4;
1449 0 goto quantifier;
1450 case '*':
1451 0 state->result = NewRENode(state, REOP_QUANT);
1452 0 if (!state->result)
1453 0 return JS_FALSE;
1454 0 state->result->u.range.min = 0;
1455 0 state->result->u.range.max = (uintN)-1;
1456 /* <STAR>, <next> ... <ENDCHILD> */
1457 0 state->progLength += 4;
1458 0 goto quantifier;
1459 case '?':
1460 0 state->result = NewRENode(state, REOP_QUANT);
1461 0 if (!state->result)
1462 0 return JS_FALSE;
1463 0 state->result->u.range.min = 0;
1464 0 state->result->u.range.max = 1;
1465 /* <OPT>, <next> ... <ENDCHILD> */
1466 0 state->progLength += 4;
1467 0 goto quantifier;
1468 case '{': /* balance '}' */
1469 {
1470 intN err;
1471 0 const jschar *errp = state->cp;
1472
1473 0 err = ParseMinMaxQuantifier(state, JS_FALSE);
1474 0 if (err == 0)
1475 0 goto quantifier;
1476 0 if (err == -1)
1477 0 return JS_TRUE;
1478
1479 0 js_ReportCompileErrorNumberUC(state->context,
1480 state->tokenStream,
1481 JSREPORT_TS | JSREPORT_ERROR,
1482 err, errp);
1483 0 return JS_FALSE;
1484 }
1485 default:;
1486 }
1487 }
1488 4093018 return JS_TRUE;
1489
1490 0 quantifier:
1491 0 if (state->treeDepth == TREE_DEPTH_MAX) {
1492 0 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1493 JSREPORT_TS | JSREPORT_ERROR,
1494 JSMSG_REGEXP_TOO_COMPLEX);
1495 0 return JS_FALSE;
1496 }
1497
1498 0 ++state->treeDepth;
1499 0 ++state->cp;
1500 0 state->result->kid = term;
1501 0 if (state->cp < state->cpend && *state->cp == '?') {
1502 0 ++state->cp;
1503 0 state->result->u.range.greedy = JS_FALSE;
1504 } else {
1505 0 state->result->u.range.greedy = JS_TRUE;
1506 }
1507 0 return JS_TRUE;
1508 }
1509
1510 static intN
1511 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1512 0 {
1513 uintN min, max;
1514 jschar c;
1515 0 const jschar *errp = state->cp++;
1516
1517 0 c = *state->cp;
1518 0 if (JS7_ISDEC(c)) {
1519 0 ++state->cp;
1520 0 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1521 0 c = *state->cp;
1522
1523 0 if (!ignoreValues && min == OVERFLOW_VALUE)
1524 0 return JSMSG_MIN_TOO_BIG;
1525
1526 0 if (c == ',') {
1527 0 c = *++state->cp;
1528 0 if (JS7_ISDEC(c)) {
1529 0 ++state->cp;
1530 0 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1531 0 c = *state->cp;
1532 0 if (!ignoreValues && max == OVERFLOW_VALUE)
1533 0 return JSMSG_MAX_TOO_BIG;
1534 0 if (!ignoreValues && min > max)
1535 0 return JSMSG_OUT_OF_ORDER;
1536 } else {
1537 0 max = (uintN)-1;
1538 }
1539 } else {
1540 0 max = min;
1541 }
1542 0 if (c == '}') {
1543 0 state->result = NewRENode(state, REOP_QUANT);
1544 0 if (!state->result)
1545 0 return JS_FALSE;
1546 0 state->result->u.range.min = min;
1547 0 state->result->u.range.max = max;
1548 /*
1549 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1550 * where <max> is written as compact(max+1) to make
1551 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1552 */
1553 0 state->progLength += (1 + GetCompactIndexWidth(min)
1554 + GetCompactIndexWidth(max + 1)
1555 +3);
1556 0 return 0;
1557 }
1558 }
1559
1560 0 state->cp = errp;
1561 0 return -1;
1562 }
1563
1564 static JSBool
1565 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1566 1451249 {
1567 1451249 ptrdiff_t offset = target - jump;
1568
1569 /* Check that target really points forward. */
1570 JS_ASSERT(offset >= 2);
1571 1451249 if ((size_t)offset > OFFSET_MAX)
1572 0 return JS_FALSE;
1573
1574 1451249 jump[0] = JUMP_OFFSET_HI(offset);
1575 1451249 jump[1] = JUMP_OFFSET_LO(offset);
1576 1451249 return JS_TRUE;
1577 }
1578
1579 /*
1580 * Generate bytecode for the tree rooted at t using an explicit stack instead
1581 * of recursion.
1582 */
1583 static jsbytecode *
1584 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1585 jsbytecode *pc, RENode *t)
1586 16028 {
1587 EmitStateStackEntry *emitStateSP, *emitStateStack;
1588 RECharSet *charSet;
1589 REOp op;
1590
1591 16028 if (treeDepth == 0) {
1592 391 emitStateStack = NULL;
1593 } else {
1594 15637 emitStateStack =
1595 (EmitStateStackEntry *)JS_malloc(state->context,
1596 sizeof(EmitStateStackEntry) *
1597 treeDepth);
1598 15637 if (!emitStateStack)
1599 0 return NULL;
1600 }
1601 16028 emitStateSP = emitStateStack;
1602 16028 op = t->op;
1603
1604 for (;;) {
1605 2950344 *pc++ = op;
1606 2950344 switch (op) {
1607 case REOP_EMPTY:
1608 17 --pc;
1609 17 break;
1610
1611 case REOP_ALTPREREQ2:
1612 case REOP_ALTPREREQ:
1613 JS_ASSERT(emitStateSP);
1614 15637 emitStateSP->altHead = pc - 1;
1615 15637 emitStateSP->endTermFixup = pc;
1616 15637 pc += OFFSET_LEN;
1617 15637 SET_ARG(pc, t->u.altprereq.ch1);
1618 15637 pc += ARG_LEN;
1619 15637 SET_ARG(pc, t->u.altprereq.ch2);
1620 15637 pc += ARG_LEN;
1621
1622 15637 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1623 15637 pc += OFFSET_LEN;
1624
1625 15637 emitStateSP->continueNode = t;
1626 15637 emitStateSP->continueOp = REOP_JUMP;
1627 15637 emitStateSP->jumpToJumpFlag = JS_FALSE;
1628 15637 ++emitStateSP;
1629 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1630 15637 t = (RENode *) t->kid;
1631 15637 op = t->op;
1632 15637 continue;
1633
1634 case REOP_JUMP:
1635 717806 emitStateSP->nextTermFixup = pc; /* offset to following term */
1636 717806 pc += OFFSET_LEN;
1637 717806 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1638 717806 goto jump_too_big;
1639 717806 emitStateSP->continueOp = REOP_ENDALT;
1640 717806 ++emitStateSP;
1641 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1642 717806 t = t->u.kid2;
1643 717806 op = t->op;
1644 717806 continue;
1645
1646 case REOP_ENDALT:
1647 /*
1648 * If we already patched emitStateSP->nextTermFixup to jump to
1649 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1650 * are done here.
1651 */
1652 717806 if (emitStateSP->jumpToJumpFlag)
1653 717806 break;
1654
1655 /*
1656 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1657 * REOP_ENDALT is executed only on successful match of the last
1658 * alternate in a group.
1659 */
1660 717806 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1661 717806 goto jump_too_big;
1662 717806 if (t->op != REOP_ALT) {
1663 15637 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1664 717806 goto jump_too_big;
1665 }
1666
1667 /*
1668 * If the program is bigger than the REOP_JUMP offset range, then
1669 * we must check for alternates before this one that are part of
1670 * the same group, and fix up their jump offsets to target jumps
1671 * close enough to fit in a 16-bit unsigned offset immediate.
1672 */
1673 717806 if ((size_t)(pc - re->program) > OFFSET_MAX &&
1674 emitStateSP > emitStateStack) {
1675 EmitStateStackEntry *esp, *esp2;
1676 jsbytecode *alt, *jump;
1677 ptrdiff_t span, header;
1678
1679 0 esp2 = emitStateSP;
1680 0 alt = esp2->altHead;
1681 0 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1682 0 if (esp->continueOp == REOP_ENDALT &&
1683 !esp->jumpToJumpFlag &&
1684 esp->nextTermFixup + OFFSET_LEN == alt &&
1685 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1686 ? esp->endTermFixup
1687 : esp->nextTermFixup)) > OFFSET_MAX) {
1688 0 alt = esp->altHead;
1689 0 jump = esp->nextTermFixup;
1690
1691 /*
1692 * The span must be 1 less than the distance from
1693 * jump offset to jump offset, so we actually jump
1694 * to a REOP_JUMP bytecode, not to its offset!
1695 */
1696 for (;;) {
1697 JS_ASSERT(jump < esp2->nextTermFixup);
1698 0 span = esp2->nextTermFixup - jump - 1;
1699 0 if ((size_t)span <= OFFSET_MAX)
1700 0 break;
1701 do {
1702 0 if (--esp2 == esp)
1703 0 goto jump_too_big;
1704 0 } while (esp2->continueOp != REOP_ENDALT);
1705 }
1706
1707 0 jump[0] = JUMP_OFFSET_HI(span);
1708 0 jump[1] = JUMP_OFFSET_LO(span);
1709
1710 0 if (esp->continueNode->op != REOP_ALT) {
1711 /*
1712 * We must patch the offset at esp->endTermFixup
1713 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1714 * If we're unlucky and endTermFixup is more than
1715 * OFFSET_MAX bytes from its target, we cheat by
1716 * jumping 6 bytes to the jump whose offset is at
1717 * esp->nextTermFixup, which has the same target.
1718 */
1719 0 jump = esp->endTermFixup;
1720 0 header = esp->nextTermFixup - jump;
1721 0 span += header;
1722 0 if ((size_t)span > OFFSET_MAX)
1723 0 span = header;
1724
1725 0 jump[0] = JUMP_OFFSET_HI(span);
1726 0 jump[1] = JUMP_OFFSET_LO(span);
1727 }
1728
1729 0 esp->jumpToJumpFlag = JS_TRUE;
1730 }
1731 }
1732 }
1733 break;
1734
1735 case REOP_ALT:
1736 JS_ASSERT(emitStateSP);
1737 702169 emitStateSP->altHead = pc - 1;
1738 702169 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1739 702169 pc += OFFSET_LEN;
1740 702169 emitStateSP->continueNode = t;
1741 702169 emitStateSP->continueOp = REOP_JUMP;
1742 702169 emitStateSP->jumpToJumpFlag = JS_FALSE;
1743 702169 ++emitStateSP;
1744 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1745 702169 t = t->kid;
1746 702169 op = t->op;
1747 702169 continue;
1748
1749 case REOP_FLAT:
1750 /*
1751 * Coalesce FLATs if possible and if it would not increase bytecode
1752 * beyond preallocated limit. The latter happens only when bytecode
1753 * size for coalesced string with offset p and length 2 exceeds 6
1754 * bytes preallocated for 2 single char nodes, i.e. when
1755 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1756 * GetCompactIndexWidth(p) > 4.
1757 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1758 * nodes strictly decreases bytecode size, the check has to be
1759 * done only for the first coalescing.
1760 */
1761 734293 if (t->kid &&
1762 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1763 {
1764 4077313 while (t->next &&
1765 t->next->op == REOP_FLAT &&
1766 (jschar*)t->kid + t->u.flat.length ==
1767 (jschar*)t->next->kid) {
1768 3343020 t->u.flat.length += t->next->u.flat.length;
1769 3343020 t->next = t->next->next;
1770 }
1771 }
1772 1468518 if (t->kid && t->u.flat.length > 1) {
1773 734225 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1774 734225 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1775 734225 pc = WriteCompactIndex(pc, t->u.flat.length);
1776 68 } else if (t->u.flat.chr < 256) {
1777 68 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1778 68 *pc++ = (jsbytecode) t->u.flat.chr;
1779 } else {
1780 0 pc[-1] = (state->flags & JSREG_FOLD)
1781 ? REOP_UCFLAT1i
1782 : REOP_UCFLAT1;
1783 0 SET_ARG(pc, t->u.flat.chr);
1784 0 pc += ARG_LEN;
1785 }
1786 break;
1787
1788 case REOP_LPAREN:
1789 JS_ASSERT(emitStateSP);
1790 15637 pc = WriteCompactIndex(pc, t->u.parenIndex);
1791 15637 emitStateSP->continueNode = t;
1792 15637 emitStateSP->continueOp = REOP_RPAREN;
1793 15637 ++emitStateSP;
1794 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1795 15637 t = (RENode *) t->kid;
1796 15637 op = t->op;
1797 15637 continue;
1798
1799 case REOP_RPAREN:
1800 15637 pc = WriteCompactIndex(pc, t->u.parenIndex);
1801 15637 break;
1802
1803 case REOP_BACKREF:
1804 0 pc = WriteCompactIndex(pc, t->u.parenIndex);
1805 0 break;
1806
1807 case REOP_ASSERT:
1808 JS_ASSERT(emitStateSP);
1809 0 emitStateSP->nextTermFixup = pc;
1810 0 pc += OFFSET_LEN;
1811 0 emitStateSP->continueNode = t;
1812 0 emitStateSP->continueOp = REOP_ASSERTTEST;
1813 0 ++emitStateSP;
1814 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1815 0 t = (RENode *) t->kid;
1816 0 op = t->op;
1817 0 continue;
1818
1819 case REOP_ASSERTTEST:
1820 case REOP_ASSERTNOTTEST:
1821 0 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1822 goto jump_too_big;
1823 break;
1824
1825 case REOP_ASSERT_NOT:
1826 JS_ASSERT(emitStateSP);
1827 0 emitStateSP->nextTermFixup = pc;
1828 0 pc += OFFSET_LEN;
1829 0 emitStateSP->continueNode = t;
1830 0 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1831 0 ++emitStateSP;
1832 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1833 0 t = (RENode *) t->kid;
1834 0 op = t->op;
1835 0 continue;
1836
1837 case REOP_QUANT:
1838 JS_ASSERT(emitStateSP);
1839 0 if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1840 0 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1841 0 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1842 0 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1843 0 } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1844 0 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1845 } else {
1846 0 if (!t->u.range.greedy)
1847 0 pc[-1] = REOP_MINIMALQUANT;
1848 0 pc = WriteCompactIndex(pc, t->u.range.min);
1849 /*
1850 * Write max + 1 to avoid using size_t(max) + 1 bytes
1851 * for (uintN)-1 sentinel.
1852 */
1853 0 pc = WriteCompactIndex(pc, t->u.range.max + 1);
1854 }
1855 0 emitStateSP->nextTermFixup = pc;
1856 0 pc += OFFSET_LEN;
1857 0 emitStateSP->continueNode = t;
1858 0 emitStateSP->continueOp = REOP_ENDCHILD;
1859 0 ++emitStateSP;
1860 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1861 0 t = (RENode *) t->kid;
1862 0 op = t->op;
1863 0 continue;
1864
1865 case REOP_ENDCHILD:
1866 0 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1867 goto jump_too_big;
1868 break;
1869
1870 case REOP_CLASS:
1871 0 if (!t->u.ucclass.sense)
1872 0 pc[-1] = REOP_NCLASS;
1873 0 pc = WriteCompactIndex(pc, t->u.ucclass.index);
1874 0 charSet = &re->classList[t->u.ucclass.index];
1875 0 charSet->converted = JS_FALSE;
1876 0 charSet->length = t->u.ucclass.bmsize;
1877 0 charSet->u.src.startIndex = t->u.ucclass.startIndex;
1878 0 charSet->u.src.length = t->u.ucclass.kidlen;
1879 0 charSet->sense = t->u.ucclass.sense;
1880 break;
1881
1882 default:
1883 break;
1884 }
1885
1886 1499095 t = t->next;
1887 1499095 if (t) {
1888 31818 op = t->op;
1889 } else {
1890 1467277 if (emitStateSP == emitStateStack)
1891 1451249 break;
1892 1451249 --emitStateSP;
1893 1451249 t = emitStateSP->continueNode;
1894 1451249 op = emitStateSP->continueOp;
1895 }
1896 }
1897
1898 16028 cleanup:
1899 16028 if (emitStateStack)
1900 15637 JS_free(state->context, emitStateStack);
1901 16028 return pc;
1902
1903 0 jump_too_big:
1904 0 js_ReportCompileErrorNumber(state->context, state->tokenStream,
1905 JSREPORT_TS | JSREPORT_ERROR,
1906 JSMSG_REGEXP_TOO_COMPLEX);
1907 0 pc = NULL;
1908 0 goto cleanup;
1909 }
1910
1911
1912 JSRegExp *
1913 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1914 JSString *str, uintN flags, JSBool flat)
1915 16028 {
1916 JSRegExp *re;
1917 void *mark;
1918 CompilerState state;
1919 size_t resize;
1920 jsbytecode *endPC;
1921 uintN i;
1922 size_t len;
1923
1924 16028 re = NULL;
1925 16028 mark = JS_ARENA_MARK(&cx->tempPool);
1926 16028 len = JSSTRING_LENGTH(str);
1927
1928 16028 state.context = cx;
1929 16028 state.tokenStream = ts;
1930 16028 state.cpbegin = state.cp = JSSTRING_CHARS(str);
1931 16028 state.cpend = state.cp + len;
1932 16028 state.flags = flags;
1933 16028 state.parenCount = 0;
1934 16028 state.classCount = 0;
1935 16028 state.progLength = 0;
1936 16028 state.treeDepth = 0;
1937 16028 state.classBitmapsMem = 0;
1938 80140 for (i = 0; i < CLASS_CACHE_SIZE; i++)
1939 64112 state.classCache[i].start = NULL;
1940
1941 16028 if (len != 0 && flat) {
1942 0 state.result = NewRENode(&state, REOP_FLAT);
1943 0 state.result->u.flat.chr = *state.cpbegin;
1944 0 state.result->u.flat.length = len;
1945 0 state.result->kid = (void *) state.cpbegin;
1946 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1947 0 state.progLength += 1 + GetCompactIndexWidth(0)
1948 + GetCompactIndexWidth(len);
1949 } else {
1950 16028 if (!ParseRegExp(&state))
1951 16028 goto out;
1952 }
1953 16028 resize = offsetof(JSRegExp, program) + state.progLength + 1;
1954 16028 re = (JSRegExp *) JS_malloc(cx, resize);
1955 16028 if (!re)
1956 16028 goto out;
1957
1958 16028 re->nrefs = 1;
1959 JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1960 16028 re->classCount = state.classCount;
1961 16028 if (re->classCount) {
1962 0 re->classList = (RECharSet *)
1963 JS_malloc(cx, re->classCount * sizeof(RECharSet));
1964 0 if (!re->classList) {
1965 0 js_DestroyRegExp(cx, re);
1966 0 re = NULL;
1967 0 goto out;
1968 }
1969 } else {
1970 16028 re->classList = NULL;
1971 }
1972 16028 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
1973 16028 if (!endPC) {
1974 0 js_DestroyRegExp(cx, re);
1975 0 re = NULL;
1976 0 goto out;
1977 }
1978 16028 *endPC++ = REOP_END;
1979 /*
1980 * Check whether size was overestimated and shrink using realloc.
1981 * This is safe since no pointers to newly parsed regexp or its parts
1982 * besides re exist here.
1983 */
1984 16028 if ((size_t)(endPC - re->program) != state.progLength + 1) {
1985 JSRegExp *tmp;
1986 JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
1987 16011 resize = offsetof(JSRegExp, program) + (endPC - re->program);
1988 16011 tmp = (JSRegExp *) JS_realloc(cx, re, resize);
1989 16011 if (tmp)
1990 16011 re = tmp;
1991 }
1992
1993 16028 re->flags = flags;
1994 16028 re->cloneIndex = 0;
1995 16028 re->parenCount = state.parenCount;
1996 16028 re->source = str;
1997
1998 16028 out:
1999 16028 JS_ARENA_RELEASE(&cx->tempPool, mark);
2000 16028 return re;
2001 }
2002
2003 JSRegExp *
2004 js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
2005 JSString *str, JSString *opt, JSBool flat)
2006 15620 {
2007 uintN flags;
2008 jschar *s;
2009 size_t i, n;
2010 char charBuf[2];
2011
2012 15620 flags = 0;
2013 15620 if (opt) {
2014 0 s = JSSTRING_CHARS(opt);
2015 0 for (i = 0, n = JSSTRING_LENGTH(opt); i < n; i++) {
2016 0 switch (s[i]) {
2017 case 'g':
2018 0 flags |= JSREG_GLOB;
2019 0 break;
2020 case 'i':
2021 0 flags |= JSREG_FOLD;
2022 0 break;
2023 case 'm':
2024 0 flags |= JSREG_MULTILINE;
2025 0 break;
2026 default:
2027 0 charBuf[0] = (char)s[i];
2028 0 charBuf[1] = '\0';
2029 0 js_ReportCompileErrorNumber(cx, ts,
2030 JSREPORT_TS | JSREPORT_ERROR,
2031 JSMSG_BAD_FLAG, charBuf);
2032 0 return NULL;
2033 }
2034 }
2035 }
2036 15620 return js_NewRegExp(cx, ts, str, flags, flat);
2037 }
2038
2039 /*
2040 * Save the current state of the match - the position in the input
2041 * text as well as the position in the bytecode. The state of any
2042 * parent expressions is also saved (preceding state).
2043 * Contents of parenCount parentheses from parenIndex are also saved.
2044 */
2045 static REBackTrackData *
2046 PushBackTrackState(REGlobalData *gData, REOp op,
2047 jsbytecode *target, REMatchState *x, const jschar *cp,
2048 size_t parenIndex, size_t parenCount)
2049 0 {
2050 size_t i;
2051 REBackTrackData *result =
2052 0 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2053
2054 size_t sz = sizeof(REBackTrackData) +
2055 gData->stateStackTop * sizeof(REProgState) +
2056 0 parenCount * sizeof(RECapture);
2057
2058 0 ptrdiff_t btsize = gData->backTrackStackSize;
2059 ptrdiff_t btincr = ((char *)result + sz) -
2060 0 ((char *)gData->backTrackStack + btsize);
2061
2062 0 if (btincr > 0) {
2063 0 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2064
2065 0 btincr = JS_ROUNDUP(btincr, btsize);
2066 0 JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2067 &gData->pool, btsize, btincr);
2068 0 if (!gData->backTrackStack)
2069 0 return NULL;
2070 0 gData->backTrackStackSize = btsize + btincr;
2071 0 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2072 }
2073 0 gData->backTrackSP = result;
2074 0 result->sz = gData->cursz;
2075 0 gData->cursz = sz;
2076
2077 0 result->backtrack_op = op;
2078 0 result->backtrack_pc = target;
2079 0 result->cp = cp;
2080 0 result->parenCount = parenCount;
2081
2082 0 result->saveStateStackTop = gData->stateStackTop;
2083 JS_ASSERT(gData->stateStackTop);
2084 0 memcpy(result + 1, gData->stateStack,
2085 sizeof(REProgState) * result->saveStateStackTop);
2086
2087 0 if (parenCount != 0) {
2088 0 result->parenIndex = parenIndex;
2089 0 memcpy((char *)(result + 1) +
2090 sizeof(REProgState) * result->saveStateStackTop,
2091 &x->parens[parenIndex],
2092 sizeof(RECapture) * parenCount);
2093 0 for (i = 0; i != parenCount; i++)
2094 0 x->parens[parenIndex + i].index = -1;
2095 }
2096
2097 0 return result;
2098 }
2099
2100
2101 /*
2102 * Consecutive literal characters.
2103 */
2104 #if 0
2105 static REMatchState *
2106 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2107 size_t length)
2108 {
2109 size_t i;
2110 if (length > gData->cpend - x->cp)
2111 return NULL;
2112 for (i = 0; i != length; i++) {
2113 if (matchChars[i] != x->cp[i])
2114 return NULL;
2115 }
2116 x->cp += length;
2117 return x;
2118 }
2119 #endif
2120
2121 static REMatchState *
2122 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2123 size_t length)
2124 0 {
2125 size_t i;
2126 JS_ASSERT(gData->cpend >= x->cp);
2127 0 if (length > (size_t)(gData->cpend - x->cp))
2128 0 return NULL;
2129 0 for (i = 0; i != length; i++) {
2130 0 if (upcase(matchChars[i]) != upcase(x->cp[i]))
2131 0 return NULL;
2132 }
2133 0 x->cp += length;
2134 0 return x;
2135 }
2136
2137 /*
2138 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2139 * 2. If E is not a character then go to step 6.
2140 * 3. Let ch be E's character.
2141 * 4. Let A be a one-element RECharSet containing the character ch.
2142 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2143 * 6. E must be an integer. Let n be that integer.
2144 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2145 * 8. Return an internal Matcher closure that takes two arguments, a State x
2146 * and a Continuation c, and performs the following:
2147 * 1. Let cap be x's captures internal array.
2148 * 2. Let s be cap[n].
2149 * 3. If s is undefined, then call c(x) and return its result.
2150 * 4. Let e be x's endIndex.
2151 * 5. Let len be s's length.
2152 * 6. Let f be e+len.
2153 * 7. If f>InputLength, return failure.
2154 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2155 * such that Canonicalize(s[i]) is not the same character as
2156 * Canonicalize(Input [e+i]), then return failure.
2157 * 9. Let y be the State (f, cap).
2158 * 10. Call c(y) and return its result.
2159 */
2160 static REMatchState *
2161 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2162 0 {
2163 size_t len, i;
2164 const jschar *parenContent;
2165 0 RECapture *cap = &x->parens[parenIndex];
2166
2167 0 if (cap->index == -1)
2168 0 return x;
2169
2170 0 len = cap->length;
2171 0 if (x->cp + len > gData->cpend)
2172 0 return NULL;
2173
2174 0 parenContent = &gData->cpbegin[cap->index];
2175 0 if (gData->regexp->flags & JSREG_FOLD) {
2176 0 for (i = 0; i < len; i++) {
2177 0 if (upcase(parenContent[i]) != upcase(x->cp[i]))
2178 0 return NULL;
2179 }
2180 } else {
2181 0 for (i = 0; i < len; i++) {
2182 0 if (parenContent[i] != x->cp[i])
2183 0 return NULL;
2184 }
2185 }
2186 0 x->cp += len;
2187 0 return x;
2188 }
2189
2190
2191 /* Add a single character to the RECharSet */
2192 static void
2193 AddCharacterToCharSet(RECharSet *cs, jschar c)
2194 0 {
2195 0 uintN byteIndex = (uintN)(c >> 3);
2196 JS_ASSERT(c <= cs->length);
2197 0 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2198 }
2199
2200
2201 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2202 static void
2203 AddCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
2204 0 {
2205 uintN i;
2206
2207 0 uintN byteIndex1 = (uintN)(c1 >> 3);
2208 0 uintN byteIndex2 = (uintN)(c2 >> 3);
2209
2210 JS_ASSERT((c2 <= cs->length) && (c1 <= c2));
2211
2212 0 c1 &= 0x7;
2213 0 c2 &= 0x7;
2214
2215 0 if (byteIndex1 == byteIndex2) {
2216 0 cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2217 } else {
2218 0 cs->u.bits[byteIndex1] |= 0xFF << c1;
2219 0 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2220 0 cs->u.bits[i] = 0xFF;
2221 0 cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2222 }
2223 }
2224
2225 /* Compile the source of the class into a RECharSet */
2226 static JSBool
2227 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2228 0 {
2229 const jschar *src, *end;
2230 0 JSBool inRange = JS_FALSE;
2231 0 jschar rangeStart = 0;
2232 uintN byteLength, n;
2233 jschar c, thisCh;
2234 intN nDigits, i;
2235
2236 JS_ASSERT(!charSet->converted);
2237 /*
2238 * Assert that startIndex and length points to chars inside [] inside
2239 * source string.
2240 */
2241 JS_ASSERT(1 <= charSet->u.src.startIndex);
2242 JS_ASSERT(charSet->u.src.startIndex
2243 < JSSTRING_LENGTH(gData->regexp->source));
2244 JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2245 - 1 - charSet->u.src.startIndex);
2246
2247 0 charSet->converted = JS_TRUE;
2248 0 src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2249 0 end = src + charSet->u.src.length;
2250 JS_ASSERT(src[-1] == '[');
2251 JS_ASSERT(end[0] == ']');
2252
2253 0 byteLength = (charSet->length >> 3) + 1;
2254 0 charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2255 0 if (!charSet->u.bits)
2256 0 return JS_FALSE;
2257 0 memset(charSet->u.bits, 0, byteLength);
2258
2259 0 if (src == end)
2260 0 return JS_TRUE;
2261
2262 0 if (*src == '^') {
2263 JS_ASSERT(charSet->sense == JS_FALSE);
2264 0 ++src;
2265 } else {
2266 JS_ASSERT(charSet->sense == JS_TRUE);
2267 }
2268
2269 0 while (src != end) {
2270 0 switch (*src) {
2271 case '\\':
2272 0 ++src;
2273 0 c = *src++;
2274 0 switch (c) {
2275 case 'b':
2276 0 thisCh = 0x8;
2277 0 break;
2278 case 'f':
2279 0 thisCh = 0xC;
2280 0 break;
2281 case 'n':
2282 0 thisCh = 0xA;
2283 0 break;
2284 case 'r':
2285 0 thisCh = 0xD;
2286 0 break;
2287 case 't':
2288 0 thisCh = 0x9;
2289 0 break;
2290 case 'v':
2291 0 thisCh = 0xB;
2292 0 break;
2293 case 'c':
2294 0 if (src + 1 < end && JS_ISWORD(src[1])) {
2295 0 thisCh = (jschar)(*src++ & 0x1F);
2296 } else {
2297 0 --src;
2298 0 thisCh = '\\';
2299 }
2300 break;
2301 case 'x':
2302 0 nDigits = 2;
2303 0 goto lexHex;
2304 case 'u':
2305 0 nDigits = 4;
2306 0 lexHex:
2307 0 n = 0;
2308 0 for (i = 0; (i < nDigits) && (src < end); i++) {
2309 uintN digit;
2310 0 c = *src++;
2311 0 if (!isASCIIHexDigit(c, &digit)) {
2312 /*
2313 * Back off to accepting the original '\'
2314 * as a literal
2315 */
2316 0 src -= i + 1;
2317 0 n = '\\';
2318 0 break;
2319 }
2320 0 n = (n << 4) | digit;
2321 }
2322 0 thisCh = (jschar)n;
2323 0 break;
2324 case '0':
2325 case '1':
2326 case '2':
2327 case '3':
2328 case '4':
2329 case '5':
2330 case '6':
2331 case '7':
2332 /*
2333 * This is a non-ECMA extension - decimal escapes (in this
2334 * case, octal!) are supposed to be an error inside class
2335 * ranges, but supported here for backwards compatibility.
2336 */
2337 0 n = JS7_UNDEC(c);
2338 0 c = *src;
2339 0 if ('0' <= c && c <= '7') {
2340 0 src++;
2341 0 n = 8 * n + JS7_UNDEC(c);
2342 0 c = *src;
2343 0 if ('0' <= c && c <= '7') {
2344 0 src++;
2345 0 i = 8 * n + JS7_UNDEC(c);
2346 0 if (i <= 0377)
2347 0 n = i;
2348 else
2349 0 src--;
2350 }
2351 }
2352 0 thisCh = (jschar)n;
2353 0 break;
2354
2355 case 'd':
2356 0 AddCharacterRangeToCharSet(charSet, '0', '9');
2357 0 continue; /* don't need range processing */
2358 case 'D':
2359 0 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2360 0 AddCharacterRangeToCharSet(charSet,
2361 (jschar)('9' + 1),
2362 (jschar)charSet->length);
2363 0 continue;
2364 case 's':
2365 0 for (i = (intN)charSet->length; i >= 0; i--)
2366 0 if (JS_ISSPACE(i))
2367 0 AddCharacterToCharSet(charSet, (jschar)i);
2368 continue;
2369 case 'S':
2370 0 for (i = (intN)charSet->length; i >= 0; i--)
2371 0 if (!JS_ISSPACE(i))
2372 0 AddCharacterToCharSet(charSet, (jschar)i);
2373 continue;
2374 case 'w':
2375 0 for (i = (intN)charSet->length; i >= 0; i--)
2376 0 if (JS_ISWORD(i))
2377 0 AddCharacterToCharSet(charSet, (jschar)i);
2378 continue;
2379 case 'W':
2380 0 for (i = (intN)charSet->length; i >= 0; i--)
2381 0 if (!JS_ISWORD(i))
2382 0 AddCharacterToCharSet(charSet, (jschar)i);
2383 continue;
2384 default:
2385 0 thisCh = c;
2386 break;
2387
2388 }
2389 break;
2390
2391 default:
2392 0 thisCh = *src++;
2393 break;
2394
2395 }
2396 0 if (inRange) {
2397 0 if (gData->regexp->flags & JSREG_FOLD) {
2398 0 AddCharacterRangeToCharSet(charSet, upcase(rangeStart),
2399 upcase(thisCh));
2400 0 AddCharacterRangeToCharSet(charSet, downcase(rangeStart),
2401 downcase(thisCh));
2402 } else {
2403 0 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2404 }
2405 0 inRange = JS_FALSE;
2406 } else {
2407 0 if (gData->regexp->flags & JSREG_FOLD) {
2408 0 AddCharacterToCharSet(charSet, upcase(thisCh));
2409 0 AddCharacterToCharSet(charSet, downcase(thisCh));
2410 } else {
2411 0 AddCharacterToCharSet(charSet, thisCh);
2412 }
2413 0 if (src < end - 1) {
2414 0 if (*src == '-') {
2415 0 ++src;
2416 0 inRange = JS_TRUE;
2417 0 rangeStart = thisCh;
2418 }
2419 }
2420 }
2421 }
2422 0 return JS_TRUE;
2423 }
2424
2425 void
2426 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2427 56441 {
2428 56441 if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2429 16028 if (re->classList) {
2430 uintN i;
2431 0 for (i = 0; i < re->classCount; i++) {
2432 0 if (re->classList[i].converted)
2433 0 JS_free(cx, re->classList[i].u.bits);
2434 0 re->classList[i].u.bits = NULL;
2435 }
2436 0 JS_free(cx, re->classList);
2437 }
2438 16028 JS_free(cx, re);
2439 }
2440 }
2441
2442 static JSBool
2443 ReallocStateStack(REGlobalData *gData)
2444 0 {
2445 0 size_t limit = gData->stateStackLimit;
2446 0 size_t sz = sizeof(REProgState) * limit;
2447
2448 0 JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2449 0 if (!gData->stateStack) {
2450 0 gData->ok = JS_FALSE;
2451 0 return JS_FALSE;
2452 }
2453 0 gData->stateStackLimit = limit + limit;
2454 0 return JS_TRUE;
2455 }
2456
2457 #define PUSH_STATE_STACK(data) \
2458 JS_BEGIN_MACRO \
2459 ++(data)->stateStackTop; \
2460 if ((data)->stateStackTop == (data)->stateStackLimit && \
2461 !ReallocStateStack((data))) { \
2462 return NULL; \
2463 } \
2464 JS_END_MACRO
2465
2466 /*
2467 * Apply the current op against the given input to see if it's going to match
2468 * or fail. Return false if we don't get a match, true if we do and update the
2469 * state of the input and pc if the update flag is true.
2470 */
2471 static REMatchState *
2472 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2473 jsbytecode **startpc, JSBool update)
2474 1548365 {
2475 1548365 REMatchState *result = NULL;
2476 jschar matchCh;
2477 size_t parenIndex;
2478 size_t offset, length, index;
2479 1548365 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2480 jschar *source;
2481 1548365 const jschar *startcp = x->cp;
2482 jschar ch;
2483 RECharSet *charSet;
2484
2485 1548365 switch (op) {
2486 case REOP_BOL:
2487 318395 if (x->cp != gData->cpbegin) {
2488 297467 if (!gData->cx->regExpStatics.multiline &&
2489 !(gData->regexp->flags & JSREG_MULTILINE)) {
2490 0 break;
2491 }
2492 0 if (!RE_IS_LINE_TERM(x->cp[-1]))
2493 20928 break;
2494 }
2495 20928 result = x;
2496 20928 break;
2497 case REOP_EOL:
2498 0 if (x->cp != gData->cpend) {
2499 0 if (!gData->cx->regExpStatics.multiline &&
2500 !(gData->regexp->flags & JSREG_MULTILINE)) {
2501 0 break;
2502 }
2503 0 if (!RE_IS_LINE_TERM(*x->cp))
2504 0 break;
2505 }
2506 0 result = x;
2507 0 break;
2508 case REOP_WBDRY:
2509 0 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2510 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2511 0 result = x;
2512 }
2513 break;
2514 case REOP_WNONBDRY:
2515 0 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2516 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2517 0 result = x;
2518 }
2519 break;
2520 case REOP_DOT:
2521 38 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2522 38 result = x;
2523 38 result->cp++;
2524 }
2525 break;
2526 case REOP_DIGIT:
2527 0 if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
2528 0 result = x;
2529 0 result->cp++;
2530 }
2531 break;
2532 case REOP_NONDIGIT:
2533 0 if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
2534 0 result = x;
2535 0 result->cp++;
2536 }
2537 break;
2538 case REOP_ALNUM:
2539 0 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2540 0 result = x;
2541 0 result->cp++;
2542 }
2543 break;
2544 case REOP_NONALNUM:
2545 0 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2546 0 result = x;
2547 0 result->cp++;
2548 }
2549 break;
2550 case REOP_SPACE:
2551 0 if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2552 0 result = x;
2553 0 result->cp++;
2554 }
2555 break;
2556 case REOP_NONSPACE:
2557 0 if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2558 0 result = x;
2559 0 result->cp++;
2560 }
2561 break;
2562 case REOP_BACKREF:
2563 0 pc = ReadCompactIndex(pc, &parenIndex);
2564 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2565 0 result = BackrefMatcher(gData, x, parenIndex);
2566 0 break;
2567 case REOP_FLAT:
2568 1224634 pc = ReadCompactIndex(pc, &offset);
2569 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2570 1224634 pc = ReadCompactIndex(pc, &length);
2571 JS_ASSERT(1 <= length);
2572 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2573 1224634 if (length <= (size_t)(gData->cpend - x->cp)) {
2574 994326 source = JSSTRING_CHARS(gData->regexp->source) + offset;
2575 1083296 for (index = 0; index != length; index++) {
2576 1068931 if (source[index] != x->cp[index])
2577 979961 return NULL;
2578 }
2579 14365 x->cp += length;
2580 14365 result = x;
2581 }
2582 break;
2583 case REOP_FLAT1:
2584 5298 matchCh = *pc++;
2585 5298 if (x->cp != gData->cpend && *x->cp == matchCh) {
2586 5298 result = x;
2587 5298 result->cp++;
2588 }
2589 break;
2590 case REOP_FLATi:
2591 0 pc = ReadCompactIndex(pc, &offset);
2592 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2593 0 pc = ReadCompactIndex(pc, &length);
2594 JS_ASSERT(1 <= length);
2595 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2596 0 source = JSSTRING_CHARS(gData->regexp->source);
2597 0 result = FlatNIMatcher(gData, x, source + offset, length);
2598 0 break;
2599 case REOP_FLAT1i:
2600 0 matchCh = *pc++;
2601 0 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2602 0 result = x;
2603 0 result->cp++;
2604 }
2605 break;
2606 case REOP_UCFLAT1:
2607 0 matchCh = GET_ARG(pc);
2608 0 pc += ARG_LEN;
2609 0 if (x->cp != gData->cpend && *x->cp == matchCh) {
2610 0 result = x;
2611 0 result->cp++;
2612 }
2613 break;
2614 case REOP_UCFLAT1i:
2615 0 matchCh = GET_ARG(pc);
2616 0 pc += ARG_LEN;
2617 0 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2618 0 result = x;
2619 0 result->cp++;
2620 }
2621 break;
2622 case REOP_CLASS:
2623 0 pc = ReadCompactIndex(pc, &index);
2624 JS_ASSERT(index < gData->regexp->classCount);
2625 0 if (x->cp != gData->cpend) {
2626 0 charSet = &gData->regexp->classList[index];
2627 JS_ASSERT(charSet->converted);
2628 0 ch = *x->cp;
2629 0 index = ch >> 3;
2630 0 if (charSet->length != 0 &&
2631 ch <= charSet->length &&
2632 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2633 0 result = x;
2634 0 result->cp++;
2635 }
2636 }
2637 break;
2638 case REOP_NCLASS:
2639 0 pc = ReadCompactIndex(pc, &index);
2640 JS_ASSERT(index < gData->regexp->classCount);
2641 0 if (x->cp != gData->cpend) {
2642 0 charSet = &gData->regexp->classList[index];
2643 JS_ASSERT(charSet->converted);
2644 0 ch = *x->cp;
2645 0 index = ch >> 3;
2646 0 if (charSet->length == 0 ||
2647 ch > charSet->length ||
2648 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2649 0 result = x;
2650 0 result->cp++;
2651 }
2652 }
2653 break;
2654 default:
2655 JS_ASSERT(JS_FALSE);
2656 }
2657 568404 if (result) {
2658 40629 if (update)
2659 40629 *startpc = pc;
2660 else
2661 0 x->cp = startcp;
2662 40629 return result;
2663 }
2664 527775 x->cp = startcp;
2665 527775 return NULL;
2666 }
2667
2668 static REMatchState *
2669 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2670 65176 {
2671 65176 REMatchState *result = NULL;
2672 REBackTrackData *backTrackData;
2673 jsbytecode *nextpc;
2674 REOp nextop;
2675 RECapture *cap;
2676 REProgState *curState;
2677 const jschar *startcp;
2678 size_t parenIndex, k;
2679 65176 size_t parenSoFar = 0;
2680
2681 jschar matchCh1, matchCh2;
2682 RECharSet *charSet;
2683
2684 JSBool anchor;
2685 65176 jsbytecode *pc = gData->regexp->program;
2686 65176 REOp op = (REOp) *pc++;
2687
2688 /*
2689 * If the first node is a simple match, step the index into the string
2690 * until that match is made, or fail if it can't be found at all.
2691 */
2692 65176 if (REOP_IS_SIMPLE(op)) {
2693 65176 anchor = JS_FALSE;
2694 926697 while (x->cp <= gData->cpend) {
2695 823314 nextpc = pc; /* reset back to start each time */
2696 823314 result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2697 823314 if (result) {
2698 26969 anchor = JS_TRUE;
2699 26969 x = result;
2700 26969 pc = nextpc; /* accept skip to next opcode */
2701 26969 op = (REOp) *pc++;
2702 26969 break;
2703 }
2704 796345 gData->skipped++;
2705 796345 x->cp++;
2706 }
2707 65176 if (!anchor)
2708 38207 return NULL;
2709 }
2710
2711 for (;;) {
2712 758475 if (REOP_IS_SIMPLE(op)) {
2713 22856 result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2714 } else {
2715 735619 curState = &gData->stateStack[gData->stateStackTop];
2716 735619 switch (op) {
2717 case REOP_EMPTY:
2718 0 result = x;
2719 0 break;
2720
2721 case REOP_ALTPREREQ2:
2722 0 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2723 0 pc += ARG_LEN;
2724 0 matchCh2 = GET_ARG(pc);
2725 0 pc += ARG_LEN;
2726 0 k = GET_ARG(pc);
2727 0 pc += ARG_LEN;
2728
2729 0 if (x->cp != gData->cpend) {
2730 0 if (*x->cp == matchCh2)
2731 0 goto doAlt;
2732
2733 0 charSet = &gData->regexp->classList[k];
2734 0 if (!charSet->converted)
2735 0 if (!ProcessCharSet(gData, charSet))
2736 0 return NULL;
2737 0 matchCh1 = *x->cp;
2738 0 k = matchCh1 >> 3;
2739 0 if ((charSet->length == 0 ||
2740 matchCh1 > charSet->length ||
2741 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2742 charSet->sense) {
2743 0 goto doAlt;
2744 }
2745 }
2746 0 result = NULL;
2747 0 break;
2748
2749 case REOP_ALTPREREQ:
2750 15615 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2751 15615 pc += ARG_LEN;
2752 15615 matchCh1 = GET_ARG(pc);
2753 15615 pc += ARG_LEN;
2754 15615 matchCh2 = GET_ARG(pc);
2755 15615 pc += ARG_LEN;
2756 15615 if (x->cp == gData->cpend ||
2757 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2758 15567 result = NULL;
2759 15567 break;
2760 }
2761 /* else false thru... */
2762
2763 case REOP_ALT:
2764 702195 doAlt:
2765 702195 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2766 702195 pc += ARG_LEN; /* start of this alternate */
2767 702195 curState->parenSoFar = parenSoFar;
2768 702195 PUSH_STATE_STACK(gData);
2769 702195 op = (REOp) *pc++;
2770 702195 startcp = x->cp;
2771 702195 if (REOP_IS_SIMPLE(op)) {
2772 702195 if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2773 702195 op = (REOp) *nextpc++;
2774 702195 pc = nextpc;
2775 702195 continue;
2776 }
2777 0 result = x;
2778 0 op = (REOp) *pc++;
2779 }
2780 0 nextop = (REOp) *nextpc++;
2781 0 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2782 0 return NULL;
2783 continue;
2784
2785 /*
2786 * Occurs at (succesful) end of REOP_ALT,
2787 */
2788 case REOP_JUMP:
2789 0 --gData->stateStackTop;
2790 0 pc += GET_OFFSET(pc);
2791 0 op = (REOp) *pc++;
2792 0 continue;
2793
2794 /*
2795 * Occurs at last (succesful) end of REOP_ALT,
2796 */
2797 case REOP_ENDALT:
2798 24 --gData->stateStackTop;
2799 24 op = (REOp) *pc++;
2800 24 continue;
2801
2802 case REOP_LPAREN:
2803 15615 pc = ReadCompactIndex(pc, &parenIndex);
2804 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2805 15615 if (parenIndex + 1 > parenSoFar)
2806 15615 parenSoFar = parenIndex + 1;
2807 15615 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2808 15615 x->parens[parenIndex].length = 0;
2809 15615 op = (REOp) *pc++;
2810 15615 continue;
2811
2812 case REOP_RPAREN:
2813 12 pc = ReadCompactIndex(pc, &parenIndex);
2814 JS_ASSERT(parenIndex < gData->regexp->parenCount);
2815 12 cap = &x->parens[parenIndex];
2816
2817 /*
2818 * FIXME: https://bugzilla.mozilla.org/show_bug.cgi?id=346090
2819 * This wallpaper prevents a case where we somehow took a step
2820 * backward in input while minimally-matching an empty string.
2821 */
2822 12 if (x->cp < gData->cpbegin + cap->index)
2823 0 cap->index = -1;
2824 12 cap->length = x->cp - (gData->cpbegin + cap->index);
2825 12 op = (REOp) *pc++;
2826 12 continue;
2827
2828 case REOP_ASSERT:
2829 0 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2830 0 pc += ARG_LEN; /* start of ASSERT child */
2831 0 op = (REOp) *pc++;
2832 0 if (REOP_IS_SIMPLE(op) &&
2833 !SimpleMatch(gData, x, op, &pc, JS_FALSE)) {
2834 0 result = NULL;
2835 0 break;
2836 }
2837 0 curState->u.assertion.top =
2838 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2839 0 curState->u.assertion.sz = gData->cursz;
2840 0 curState->index = x->cp - gData->cpbegin;
2841 0 curState->parenSoFar = parenSoFar;
2842 0 PUSH_STATE_STACK(gData);
2843 0 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2844 nextpc, x, x->cp, 0, 0)) {
2845 0 return NULL;
2846 }
2847 continue;
2848
2849 case REOP_ASSERT_NOT:
2850 0 nextpc = pc + GET_OFFSET(pc);
2851 0 pc += ARG_LEN;
2852 0 op = (REOp) *pc++;
2853 0 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2854 SimpleMatch(gData, x, op, &pc, JS_FALSE) &&
2855 pc == nextpc) {
2856 0 result = NULL;
2857 0 break;
2858 }
2859 0 curState->u.assertion.top
2860 = (char *)gData->backTrackSP -
2861 (char *)gData->backTrackStack;
2862 0 curState->u.assertion.sz = gData->cursz;
2863 0 curState->index = x->cp - gData->cpbegin;
2864 0 curState->parenSoFar = parenSoFar;
2865 0 PUSH_STATE_STACK(gData);
2866 0 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2867 nextpc, x, x->cp, 0, 0))
2868 0 return NULL;
2869 continue;
2870
2871 case REOP_ASSERTTEST:
2872 0 --gData->stateStackTop;
2873 0 --curState;
2874 0 x->cp = gData->cpbegin + curState->index;
2875 0 gData->backTrackSP =
2876 (REBackTrackData *) ((char *)gData->backTrackStack +
2877 curState->u.assertion.top);
2878 0 gData->cursz = curState->u.assertion.sz;
2879 0 if (result)
2880 0 result = x;
2881 break;
2882
2883 case REOP_ASSERTNOTTEST:
2884 0 --gData->stateStackTop;
2885 0 --curState;
2886 0 x->cp = gData->cpbegin + curState->index;
2887 0 gData->backTrackSP =
2888 (REBackTrackData *) ((char *)gData->backTrackStack +
2889 curState->u.assertion.top);
2890 0 gData->cursz = curState->u.assertion.sz;
2891 0 result = (!result) ? x : NULL;
2892 0 break;
2893
2894 case REOP_END:
2895 2206 if (x)
2896 2206 return x;
2897 break;
2898
2899 case REOP_STAR:
2900 0 curState->u.quantifier.min = 0;
2901 0 curState->u.quantifier.max = (uintN)-1;
2902 0 goto quantcommon;
2903 case REOP_PLUS:
2904 0 curState->u.quantifier.min = 1;
2905 0 curState->u.quantifier.max = (uintN)-1;
2906 0 goto quantcommon;
2907 case REOP_OPT:
2908 0 curState->u.quantifier.min = 0;
2909 0 curState->u.quantifier.max = 1;
2910 0 goto quantcommon;
2911 case REOP_QUANT:
2912 0 pc = ReadCompactIndex(pc, &k);
2913 0 curState->u.quantifier.min = k;
2914 0 pc = ReadCompactIndex(pc, &k);
2915 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
2916 0 curState->u.quantifier.max = k - 1;
2917 JS_ASSERT(curState->u.quantifier.min
2918 <= curState->u.quantifier.max);
2919 0 quantcommon:
2920 0 if (curState->u.quantifier.max == 0) {
2921 0 pc = pc + GET_OFFSET(pc);
2922 0 op = (REOp) *pc++;
2923 0 result = x;
2924 0 continue;
2925 }
2926 /* Step over <next> */
2927 0 nextpc = pc + ARG_LEN;
2928 0 op = (REOp) *nextpc++;
2929 0 startcp = x->cp;
2930 0 if (REOP_IS_SIMPLE(op)) {
2931 0 if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
2932 0 if (curState->u.quantifier.min == 0)
2933 0 result = x;
2934 else
2935 0 result = NULL;
2936 0 pc = pc + GET_OFFSET(pc);
2937 0 break;
2938 }
2939 0 op = (REOp) *nextpc++;
2940 0 result = x;
2941 }
2942 0 curState->index = startcp - gData->cpbegin;
2943 0 curState->continue_op = REOP_REPEAT;
2944 0 curState->continue_pc = pc;
2945 0 curState->parenSoFar = parenSoFar;
2946 0 PUSH_STATE_STACK(gData);
2947 0 if (curState->u.quantifier.min == 0 &&
2948 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2949 0, 0)) {
2950 0 return NULL;
2951 }
2952 0 pc = nextpc;
2953 0 continue;
2954
2955 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2956 0 pc = curState[-1].continue_pc;
2957 0 op = curState[-1].continue_op;
2958 0 continue;
2959
2960 case REOP_REPEAT:
2961 0 --curState;
2962 do {
2963 0 --gData->stateStackTop;
2964 0 if (!result) {
2965 /* Failed, see if we have enough children. */
2966 0 if (curState->u.quantifier.min == 0)
2967 goto repeatDone;
2968 goto break_switch;
2969 }
2970 0 if (curState->u.quantifier.min == 0 &&
2971 x->cp == gData->cpbegin + curState->index) {
2972 /* matched an empty string, that'll get us nowhere */
2973 0 result = NULL;
2974 0 goto break_switch;
2975 }
2976 0 if (curState->u.quantifier.min != 0)
2977 0 curState->u.quantifier.min--;
2978 0 if (curState->u.quantifier.max != (uintN) -1)
2979 0 curState->u.quantifier.max--;
2980 0 if (curState->u.quantifier.max == 0)
2981 0 goto repeatDone;
2982 0 nextpc = pc + ARG_LEN;
2983 0 nextop = (REOp) *nextpc;
2984 0 startcp = x->cp;
2985 0 if (REOP_IS_SIMPLE(nextop)) {
2986 0 nextpc++;
2987 0 if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
2988 0 if (curState->u.quantifier.min == 0)
2989 0 goto repeatDone;
2990 0 result = NULL;
2991 0 goto break_switch;
2992 }
2993 0 result = x;
2994 }
2995 0 curState->index = startcp - gData->cpbegin;
2996 0 PUSH_STATE_STACK(gData);
2997 0 if (curState->u.quantifier.min == 0 &&
2998 !PushBackTrackState(gData, REOP_REPEAT,
2999 pc, x, startcp,
3000 curState->parenSoFar,
3001 parenSoFar -
3002 curState->parenSoFar)) {
3003 0 return NULL;
3004 }
3005 0 } while (*nextpc == REOP_ENDCHILD);
3006 0 pc = nextpc;
3007 0 op = (REOp) *pc++;
3008 0 parenSoFar = curState->parenSoFar;
3009 0 continue;
3010
3011 0 repeatDone:
3012 0 result = x;
3013 0 pc += GET_OFFSET(pc);
3014 0 goto break_switch;
3015
3016 case REOP_MINIMALSTAR:
3017 0 curState->u.quantifier.min = 0;
3018 0 curState->u.quantifier.max = (uintN)-1;
3019 0 goto minimalquantcommon;
3020 case REOP_MINIMALPLUS:
3021 0 curState->u.quantifier.min = 1;
3022 0 curState->u.quantifier.max = (uintN)-1;
3023 0 goto minimalquantcommon;
3024 case REOP_MINIMALOPT:
3025 0 curState->u.quantifier.min = 0;
3026 0 curState->u.quantifier.max = 1;
3027 0 goto minimalquantcommon;
3028 case REOP_MINIMALQUANT:
3029 0 pc = ReadCompactIndex(pc, &k);
3030 0 curState->u.quantifier.min = k;
3031 0 pc = ReadCompactIndex(pc, &k);
3032 /* See REOP_QUANT comments about k - 1. */
3033 0 curState->u.quantifier.max = k - 1;
3034 JS_ASSERT(curState->u.quantifier.min
3035 <= curState->u.quantifier.max);
3036 0 minimalquantcommon:
3037 0 curState->index = x->cp - gData->cpbegin;
3038 0 curState->parenSoFar = parenSoFar;
3039 0 PUSH_STATE_STACK(gData);
3040 0 if (curState->u.quantifier.min != 0) {
3041 0 curState->continue_op = REOP_MINIMALREPEAT;
3042 0 curState->continue_pc = pc;
3043 /* step over <next> */
3044 0 pc += OFFSET_LEN;
3045 0 op = (REOp) *pc++;
3046 } else {
3047 0 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3048 pc, x, x->cp, 0, 0)) {
3049 0 return NULL;
3050 }
3051 0 --gData->stateStackTop;
3052 0 pc = pc + GET_OFFSET(pc);
3053 0 op = (REOp) *pc++;
3054 }
3055 continue;
3056
3057 case REOP_MINIMALREPEAT:
3058 0 --gData->stateStackTop;
3059 0 --curState;
3060
3061 0 if (!result) {
3062 /*
3063 * Non-greedy failure - try to consume another child.
3064 */
3065 0 if (curState->u.quantifier.max == (uintN) -1 ||
3066 curState->u.quantifier.max > 0) {
3067 0 curState->index = x->cp - gData->cpbegin;
3068 0 curState->continue_op = REOP_MINIMALREPEAT;
3069 0 curState->continue_pc = pc;
3070 0 pc += ARG_LEN;
3071 0 for (k = curState->parenSoFar; k < parenSoFar; k++)
3072 0 x->parens[k].index = -1;
3073 0 PUSH_STATE_STACK(gData);
3074 0 op = (REOp) *pc++;
3075 0 continue;
3076 }
3077 /* Don't need to adjust pc since we're going to pop. */
3078 break;
3079 }
3080 0 if (curState->u.quantifier.min == 0 &&
3081 x->cp == gData->cpbegin + curState->index) {
3082 /* Matched an empty string, that'll get us nowhere. */
3083 0 result = NULL;
3084 0 break;
3085 }
3086 0 if (curState->u.quantifier.min != 0)
3087 0 curState->u.quantifier.min--;
3088 0 if (curState->u.quantifier.max != (uintN) -1)
3089 0 curState->u.quantifier.max--;
3090 0 if (curState->u.quantifier.min != 0) {
3091 0 curState->continue_op = REOP_MINIMALREPEAT;
3092 0 curState->continue_pc = pc;
3093 0 pc += ARG_LEN;
3094 0 for (k = curState->parenSoFar; k < parenSoFar; k++)
3095 0 x->parens[k].index = -1;
3096 0 curState->index = x->cp - gData->cpbegin;
3097 0 PUSH_STATE_STACK(gData);
3098 0 op = (REOp) *pc++;
3099 0 continue;
3100 }
3101 0 curState->index = x->cp - gData->cpbegin;
3102 0 curState->parenSoFar = parenSoFar;
3103 0 PUSH_STATE_STACK(gData);
3104 0 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3105 pc, x, x->cp,
3106 curState->parenSoFar,
3107 parenSoFar - curState->parenSoFar)) {
3108 0 return NULL;
3109 }
3110 0 --gData->stateStackTop;
3111 0 pc = pc + GET_OFFSET(pc);
3112 0 op = (REOp) *pc++;
3113 0 continue;
3114
3115 default:
3116 JS_ASSERT(JS_FALSE);
3117 0 result = NULL;
3118 }
3119 38423 break_switch:;
3120 }
3121
3122 /*
3123 * If the match failed and there's a backtrack option, take it.
3124 * Otherwise this is a complete and utter failure.
3125 */
3126 38423 if (!result) {
3127 24763 if (gData->cursz == 0)
3128 24763 return NULL;
3129 0 backTrackData = gData->backTrackSP;
3130 0 gData->cursz = backTrackData->sz;
3131 0 gData->backTrackSP =
3132 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3133 0 x->cp = backTrackData->cp;
3134 0 pc = backTrackData->backtrack_pc;
3135 0 op = backTrackData->backtrack_op;
3136 0 gData->stateStackTop = backTrackData->saveStateStackTop;
3137 JS_ASSERT(gData->stateStackTop);
3138
3139 0 memcpy(gData->stateStack, backTrackData + 1,
3140 sizeof(REProgState) * backTrackData->saveStateStackTop);
3141 0 curState = &gData->stateStack[gData->stateStackTop - 1];
3142
3143 0 if (backTrackData->parenCount) {
3144 0 memcpy(&x->parens[backTrackData->parenIndex],
3145 (char *)(backTrackData + 1) +
3146 sizeof(REProgState) * backTrackData->saveStateStackTop,
3147 sizeof(RECapture) * backTrackData->parenCount);
3148 0 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3149 } else {
3150 0 for (k = curState->parenSoFar; k < parenSoFar; k++)
3151 0 x->parens[k].index = -1;
3152 0 parenSoFar = curState->parenSoFar;
3153 }
3154 continue;
3155 }
3156 13660 x = result;
3157
3158 /*
3159 * Continue with the expression.
3160 */
3161 13660 op = (REOp)*pc++;
3162 }
3163 return NULL;
3164 }
3165
3166 static REMatchState *
3167 MatchRegExp(REGlobalData *gData, REMatchState *x)
3168 40413 {
3169 REMatchState *result;
3170 40413 const jschar *cp = x->cp;
3171 const jschar *cp2;
3172 uintN j;
3173
3174 /*
3175 * Have to include the position beyond the last character
3176 * in order to detect end-of-input/line condition.
3177 */
3178 103383 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3179 65176 gData->skipped = cp2 - cp;
3180 65176 x->cp = cp2;
3181 106370 for (j = 0; j < gData->regexp->parenCount; j++)
3182 41194 x->parens[j].index = -1;
3183 65176 result = ExecuteREBytecode(gData, x);
3184 65176 if (!gData->ok || result)
3185 2206 return result;
3186 62970 gData->backTrackSP = gData->backTrackStack;
3187 62970 gData->cursz = 0;
3188 62970 gData->stateStackTop = 0;
3189 62970 cp2 = cp + gData->skipped;
3190 }
3191 38207 return NULL;
3192 }
3193
3194
3195 static REMatchState *
3196 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re)
3197 40413 {
3198 REMatchState *result;
3199 uintN i;
3200
3201 40413 gData->backTrackStackSize = INITIAL_BACKTRACK;
3202 40413 JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3203 &gData->pool,
3204 INITIAL_BACKTRACK);
3205 40413 if (!gData->backTrackStack)
3206 0 return NULL;
3207 40413 gData->backTrackSP = gData->backTrackStack;
3208 40413 gData->cursz = 0;
3209
3210
3211 40413 gData->stateStackLimit = INITIAL_STATESTACK;
3212 40413 JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3213 &gData->pool,
3214 sizeof(REProgState) * INITIAL_STATESTACK);
3215 40413 if (!gData->stateStack)
3216 0 return NULL;
3217 40413 gData->stateStackTop = 0;
3218
3219 40413 gData->cx = cx;
3220 40413 gData->regexp = re;
3221 40413 gData->ok = JS_TRUE;
3222
3223 40413 JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3224 &gData->pool,
3225 offsetof(REMatchState, parens)
3226 + re->parenCount * sizeof(RECapture));
3227 40413 if (!result)
3228 0 return NULL;
3229
3230 40413 for (i = 0; i < re->classCount; i++)
3231 0 if (!re->classList[i].converted)
3232 0 if (!ProcessCharSet(gData, &re->classList[i]))
3233 0 return NULL;
3234
3235 40413 return result;
3236 }
3237
3238 JSBool
3239 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3240 JSBool test, jsval *rval)
3241 40413 {
3242 REGlobalData gData;
3243 REMatchState *x, *result;
3244
3245 const jschar *cp, *ep;
3246 size_t i, length, start;
3247 JSSubString *morepar;
3248 JSBool ok;
3249 JSRegExpStatics *res;
3250 ptrdiff_t matchlen;
3251 uintN num, morenum;
3252 JSString *parstr, *matchstr;
3253 JSObject *obj;
3254
3255 40413 RECapture *parsub = NULL;
3256
3257 /*
3258 * It's safe to load from cp because JSStrings have a zero at the end,
3259 * and we never let cp get beyond cpend.
3260 */
3261 40413 start = *indexp;
3262 40413 length = JSSTRING_LENGTH(str);
3263 40413 if (start > length)
3264 0 start = length;
3265 40413 cp = JSSTRING_CHARS(str);
3266 40413 gData.cpbegin = cp;
3267 40413 gData.cpend = cp + length;
3268 40413 cp += start;
3269 40413 gData.start = start;
3270 40413 gData.skipped = 0;
3271
3272 40413 JS_InitArenaPool(&gData.pool, "RegExpPool", 8096, 4);
3273 40413 x = InitMatch(cx, &gData, re);
3274 40413 if (!x)
3275 0 return JS_FALSE;
3276 40413 x->cp = cp;
3277
3278 /*
3279 * Call the recursive matcher to do the real work. Return null on mismatch
3280 * whether testing or not. On match, return an extended Array object.
3281 */
3282 40413 result = MatchRegExp(&gData, x);
3283 40413 ok = gData.ok;
3284 40413 if (!ok)
3285 40413 goto out;
3286 40413 if (!result) {
3287 38207 *rval = JSVAL_NULL;
3288 38207 goto out;
3289 }
3290 2206 cp = result->cp;
3291 2206 i = cp - gData.cpbegin;
3292 2206 *indexp = i;
3293 2206 matchlen = i - (start + gData.skipped);
3294 2206 ep = cp;
3295 2206 cp -= matchlen;
3296
3297 2206 if (test) {
3298 /*
3299 * Testing for a match and updating cx->regExpStatics: don't allocate
3300 * an array object, do return true.
3301 */
3302 2206 *rval = JSVAL_TRUE;
3303
3304 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3305 2206 obj = NULL;
3306 } else {
3307 /*
3308 * The array returned on match has element 0 bound to the matched
3309 * string, elements 1 through state.parenCount bound to the paren
3310 * matches, an index property telling the length of the left context,
3311 * and an input property referring to the input string.
3312 */
3313 0 obj = js_NewArrayObject(cx, 0, NULL);
3314 0 if (!obj) {
3315 0 ok = JS_FALSE;
3316 0 goto out;
3317 }
3318 0 *rval = OBJECT_TO_JSVAL(obj);
3319
3320 #define DEFVAL(val, id) { \
3321 ok = js_DefineProperty(cx, obj, id, val, \
3322 JS_PropertyStub, JS_PropertyStub, \
3323 JSPROP_ENUMERATE, NULL); \
3324 if (!ok) { \
3325 cx->newborn[GCX_OBJECT] = NULL; \
3326 cx->newborn[GCX_STRING] = NULL; \
3327 goto out; \
3328 } \
3329 }
3330
3331 0 matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
3332 0 if (!matchstr) {
3333 0 cx->newborn[GCX_OBJECT] = NULL;
3334 0 ok = JS_FALSE;
3335 0 goto out;
3336 }
3337 0 DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3338 }
3339
3340 2206 res = &cx->regExpStatics;
3341 2206 res->input = str;
3342 2206 res->parenCount = re->parenCount;
3343 2206 if (re->parenCount == 0) {
3344 2194 res->lastParen = js_EmptySubString;
3345 } else {
3346 24 for (num = 0; num < re->parenCount; num++) {
3347 12 parsub = &result->parens[num];
3348 12 if (num < 9) {
3349 12 if (parsub->index == -1) {
3350 0 res->parens[num].chars = NULL;
3351 0 res->parens[num].length = 0;
3352 } else {
3353 12 res->parens[num].chars = gData.cpbegin + parsub->index;
3354 12 res->parens[num].length = parsub->length;
3355 }
3356 } else {
3357 0 morenum = num - 9;
3358 0 morepar = res->moreParens;
3359 0 if (!morepar) {
3360 0 res->moreLength = 10;
3361 0 morepar = (JSSubString*)
3362 JS_malloc(cx, 10 * sizeof(JSSubString));
3363 0 } else if (morenum >= res->moreLength) {
3364 0 res->moreLength += 10;
3365 0 morepar = (JSSubString*)
3366 JS_realloc(cx, morepar,
3367 res->moreLength * sizeof(JSSubString));
3368 }
3369 0 if (!morepar) {
3370 0 cx->newborn[GCX_OBJECT] = NULL;
3371 0 cx->newborn[GCX_STRING] = NULL;
3372 0 ok = JS_FALSE;
3373 0 goto out;
3374 }
3375 0 res->moreParens = morepar;
3376 0 if (parsub->index == -1) {
3377 0 morepar[morenum].chars = NULL;
3378 0 morepar[morenum].length = 0;
3379 } else {
3380 0 morepar[morenum].chars = gData.cpbegin + parsub->index;
3381 0 morepar[morenum].length = parsub->length;
3382 }
3383 }
3384 12 if (test)
3385 0 continue;
3386 0 if (parsub->index == -1) {
3387 0 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3388 JSVAL_VOID, NULL, NULL,
3389 JSPROP_ENUMERATE, NULL);
3390 } else {
3391 0 parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3392 parsub->length, 0);
3393 0 if (!parstr) {
3394 0 cx->newborn[GCX_OBJECT] = NULL;
3395 0 cx->newborn[GCX_STRING] = NULL;
3396 0 ok = JS_FALSE;
3397 0 goto out;
3398 }
3399 0 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3400 STRING_TO_JSVAL(parstr), NULL, NULL,
3401 JSPROP_ENUMERATE, NULL);
3402 }
3403 0 if (!ok) {
3404 0 cx->newborn[GCX_OBJECT] = NULL;
3405 0 cx->newborn[GCX_STRING] = NULL;
3406 0 goto out;
3407 }
3408 }
3409 12 if (parsub->index == -1) {
3410 0 res->lastParen = js_EmptySubString;
3411 } else {
3412 12 res->lastParen.chars = gData.cpbegin + parsub->index;
3413 12 res->lastParen.length = parsub->length;
3414 }
3415 }
3416
3417 2206 if (!test) {
3418 /*
3419 * Define the index and input properties last for better for/in loop
3420 * order (so they come after the elements).
3421 */
3422 0 DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3423 ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3424 0 DEFVAL(STRING_TO_JSVAL(str),
3425 ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3426 }
3427
3428 #undef DEFVAL
3429
3430 2206 res->lastMatch.chars = cp;
3431 2206 res->lastMatch.length = matchlen;
3432 2206 if (JS_VERSION_IS_1_2(cx)) {
3433 /*
3434 * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
3435 * in scalar contexts, and unintentionally for the string.match "list"
3436 * pseudo-context. On "hi there bye", the following would result:
3437 *
3438 * Language while(/ /g){print("$`");} s/ /$`/g
3439 * perl4.036 "hi", "there" "hihitherehi therebye"
3440 * perl5 "hi", "hi there" "hihitherehi therebye"
3441 * js1.2 "hi", "there" "hihitheretherebye"
3442 */
3443 0 res->leftContext.chars = JSSTRING_CHARS(str) + start;
3444 0 res->leftContext.length = gData.skipped;
3445 } else {
3446 /*
3447 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3448 *
3449 * js1.3 "hi", "hi there" "hihitherehi therebye"
3450 */
3451 2206 res->leftContext.chars = JSSTRING_CHARS(str);
3452 2206 res->leftContext.length = start + gData.skipped;
3453 }
3454 2206 res->rightContext.chars = ep;
3455 2206 res->rightContext.length = gData.cpend - ep;
3456
3457 40413 out:
3458 40413 JS_FinishArenaPool(&gData.pool);
3459 40413 return ok;
3460 }
3461
3462 /************************************************************************/
3463
3464 enum regexp_tinyid {
3465 REGEXP_SOURCE = -1,
3466 REGEXP_GLOBAL = -2,
3467 REGEXP_IGNORE_CASE = -3,
3468 REGEXP_LAST_INDEX = -4,
3469 REGEXP_MULTILINE = -5
3470 };
3471
3472 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT|JSPROP_SHARED)
3473
3474 static JSPropertySpec regexp_props[] = {
3475 {"source", REGEXP_SOURCE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3476 {"global", REGEXP_GLOBAL, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3477 {"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3478 {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3479 {"multiline", REGEXP_MULTILINE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3480 {0,0,0,0,0}
3481 };
3482
3483 static JSBool
3484 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3485 0 {
3486 jsint slot;
3487 JSRegExp *re;
3488
3489 0 if (!JSVAL_IS_INT(id))
3490 0 return JS_TRUE;
3491 0 slot = JSVAL_TO_INT(id);
3492 0 if (slot == REGEXP_LAST_INDEX)
3493 0 return JS_GetReservedSlot(cx, obj, 0, vp);
3494
3495 JS_LOCK_OBJ(cx, obj);
3496 0 re = (JSRegExp *) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
3497 0 if (re) {
3498 0 switch (slot) {
3499 case REGEXP_SOURCE:
3500 0 *vp = STRING_TO_JSVAL(re->source);
3501 0 break;
3502 case REGEXP_GLOBAL:
3503 0 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3504 0 break;
3505 case REGEXP_IGNORE_CASE:
3506 0 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3507 0 break;
3508 case REGEXP_MULTILINE:
3509 0 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3510 break;
3511 }
3512 }
3513 JS_UNLOCK_OBJ(cx, obj);
3514 0 return JS_TRUE;
3515 }
3516
3517 static JSBool
3518 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3519 0 {
3520 JSBool ok;
3521 jsint slot;
3522 jsdouble lastIndex;
3523
3524 0 ok = JS_TRUE;
3525 0 if (!JSVAL_IS_INT(id))
3526 0 return ok;
3527 0 slot = JSVAL_TO_INT(id);
3528 0 if (slot == REGEXP_LAST_INDEX) {
3529 0 if (!js_ValueToNumber(cx, *vp, &lastIndex))
3530 0 return JS_FALSE;
3531 0 lastIndex = js_DoubleToInteger(lastIndex);
3532 0 ok = js_NewNumberValue(cx, lastIndex, vp) &&
3533 JS_SetReservedSlot(cx, obj, 0, *vp);
3534 }
3535 0 return ok;
3536 }
3537
3538 /*
3539 * RegExp class static properties and their Perl counterparts:
3540 *
3541 * RegExp.input $_
3542 * RegExp.multiline $*
3543 * RegExp.lastMatch $&
3544 * RegExp.lastParen $+
3545 * RegExp.leftContext $`
3546 * RegExp.rightContext $'
3547 */
3548 enum regexp_static_tinyid {
3549 REGEXP_STATIC_INPUT = -1,
3550 REGEXP_STATIC_MULTILINE = -2,
3551 REGEXP_STATIC_LAST_MATCH = -3,
3552 REGEXP_STATIC_LAST_PAREN = -4,
3553 REGEXP_STATIC_LEFT_CONTEXT = -5,
3554 REGEXP_STATIC_RIGHT_CONTEXT = -6
3555 };
3556
3557 JSBool
3558 js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3559 17 {
3560 17 JS_ClearRegExpStatics(cx);
3561 17 return js_AddRoot(cx, &res->input, "res->input");
3562 }
3563
3564 void
3565 js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3566 17 {
3567 17 if (res->moreParens) {
3568 0 JS_free(cx, res->moreParens);
3569 0 res->moreParens = NULL;
3570 }
3571 17 js_RemoveRoot(cx->runtime, &res->input);
3572 }
3573
3574 static JSBool
3575 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3576 0 {
3577 jsint slot;
3578 JSRegExpStatics *res;
3579 JSString *str;
3580 JSSubString *sub;
3581
3582 0 res = &cx->regExpStatics;
3583 0 if (!JSVAL_IS_INT(id))
3584 0 return JS_TRUE;
3585 0 slot = JSVAL_TO_INT(id);
3586 0 switch (slot) {
3587 case REGEXP_STATIC_INPUT:
3588 0 *vp = res->input ? STRING_TO_JSVAL(res->input)
3589 : JS_GetEmptyStringValue(cx);
3590 0 return JS_TRUE;
3591 case REGEXP_STATIC_MULTILINE:
3592 0 *vp = BOOLEAN_TO_JSVAL(res->multiline);
3593 0 return JS_TRUE;
3594 case REGEXP_STATIC_LAST_MATCH:
3595 0 sub = &res->lastMatch;
3596 0 break;
3597 case REGEXP_STATIC_LAST_PAREN:
3598 0 sub = &res->lastParen;
3599 0 break;
3600 case REGEXP_STATIC_LEFT_CONTEXT:
3601 0 sub = &res->leftContext;
3602 0 break;
3603 case REGEXP_STATIC_RIGHT_CONTEXT:
3604 0 sub = &res->rightContext;
3605 0 break;
3606 default:
3607 0 sub = REGEXP_PAREN_SUBSTRING(res, slot);
3608 break;
3609 }
3610 0 str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
3611 0 if (!str)
3612 0 return JS_FALSE;
3613 0 *vp = STRING_TO_JSVAL(str);
3614 0 return JS_TRUE;
3615 }
3616
3617 static JSBool
3618 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3619 0 {
3620 JSRegExpStatics *res;
3621
3622 0 if (!JSVAL_IS_INT(id))
3623 0 return JS_TRUE;
3624 0 res = &cx->regExpStatics;
3625 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3626 0 if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3627 0 if (!JSVAL_IS_STRING(*vp) &&
3628 !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3629 0 return JS_FALSE;
3630 }
3631 0 res->input = JSVAL_TO_STRING(*vp);
3632 0 } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3633 0 if (!JSVAL_IS_BOOLEAN(*vp) &&
3634 !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3635 0 return JS_FALSE;
3636 }
3637 0 res->multiline = JSVAL_TO_BOOLEAN(*vp);
3638 }
3639 0 return JS_TRUE;
3640 }
3641
3642 static JSPropertySpec regexp_static_props[] = {
3643 {"input",
3644 REGEXP_STATIC_INPUT,
3645 JSPROP_ENUMERATE|JSPROP_SHARED,
3646 regexp_static_getProperty, regexp_static_setProperty},
3647 {"multiline",
3648 REGEXP_STATIC_MULTILINE,
3649 JSPROP_ENUMERATE|JSPROP_SHARED,
3650 regexp_static_getProperty, regexp_static_setProperty},
3651 {"lastMatch",
3652 REGEXP_STATIC_LAST_MATCH,
3653 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3654 regexp_static_getProperty, regexp_static_getProperty},
3655 {"lastParen",
3656 REGEXP_STATIC_LAST_PAREN,
3657 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3658 regexp_static_getProperty, regexp_static_getProperty},
3659 {"leftContext",
3660 REGEXP_STATIC_LEFT_CONTEXT,
3661 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3662 regexp_static_getProperty, regexp_static_getProperty},
3663 {"rightContext",
3664 REGEXP_STATIC_RIGHT_CONTEXT,
3665 JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3666 regexp_static_getProperty, regexp_static_getProperty},
3667
3668 /* XXX should have block scope and local $1, etc. */
3669 {"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3670 regexp_static_getProperty, regexp_static_getProperty},
3671 {"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3672 regexp_static_getProperty, regexp_static_getProperty},
3673 {"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3674 regexp_static_getProperty, regexp_static_getProperty},
3675 {"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3676 regexp_static_getProperty, regexp_static_getProperty},
3677 {"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3678 regexp_static_getProperty, regexp_static_getProperty},
3679 {"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3680 regexp_static_getProperty, regexp_static_getProperty},
3681 {"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3682 regexp_static_getProperty, regexp_static_getProperty},
3683 {"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3684 regexp_static_getProperty, regexp_static_getProperty},
3685 {"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3686 regexp_static_getProperty, regexp_static_getProperty},
3687
3688 {0,0,0,0,0}
3689 };
3690
3691 static void
3692 regexp_finalize(JSContext *cx, JSObject *obj)
3693 16028 {
3694 JSRegExp *re;
3695
3696 16028 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3697 16028 if (!re)
3698 16028 return;
3699 16028 js_DestroyRegExp(cx, re);
3700 }
3701
3702 /* Forward static prototype. */
3703 static JSBool
3704 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3705 jsval *rval);
3706
3707 static JSBool
3708 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3709 0 {
3710 0 return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
3711 }
3712
3713 #if JS_HAS_XDR
3714
3715 #include "jsxdrapi.h"
3716
3717 static JSBool
3718 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3719 0 {
3720 JSRegExp *re;
3721 JSString *source;
3722 uint32 flagsword;
3723 JSObject *obj;
3724
3725 0 if (xdr->mode == JSXDR_ENCODE) {
3726 0 re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3727 0 if (!re)
3728 0 return JS_FALSE;
3729 0 source = re->source;
3730 0 flagsword = ((uint32)re->cloneIndex << 16) | re->flags;
3731 }
3732 0 if (!JS_XDRString(xdr, &source) ||
3733 !JS_XDRUint32(xdr, &flagsword)) {
3734 0 return JS_FALSE;
3735 }
3736 0 if (xdr->mode == JSXDR_DECODE) {
3737 0 obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
3738 0 if (!obj)
3739 0 return JS_FALSE;
3740 0 re = js_NewRegExp(xdr->cx, NULL, source, (uint16)flagsword, JS_FALSE);
3741 0 if (!re)
3742 0 return JS_FALSE;
3743 0 if (!JS_SetPrivate(xdr->cx, obj, re) ||
3744 !js_SetLastIndex(xdr->cx, obj, 0)) {
3745 0 js_DestroyRegExp(xdr->cx, re);
3746 0 return JS_FALSE;
3747 }
3748 0 re->cloneIndex = (uint16)(flagsword >> 16);
3749 0 *objp = obj;
3750 }
3751 0 return JS_TRUE;
3752 }
3753
3754 #else /* !JS_HAS_XDR */
3755
3756 #define regexp_xdrObject NULL
3757
3758 #endif /* !JS_HAS_XDR */
3759
3760 static uint32
3761 regexp_mark(JSContext *cx, JSObject *obj, void *arg)
3762 221 {
3763 221 JSRegExp *re = (JSRegExp *) JS_GetPrivate(cx, obj);
3764 221 if (re)
3765 221 JS_MarkGCThing(cx, re->source, "source", arg);
3766 221 return 0;
3767 }
3768
3769 JSClass js_RegExpClass = {
3770 js_RegExp_str,
3771 JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1),
3772 JS_PropertyStub, JS_PropertyStub,
3773 regexp_getProperty, regexp_setProperty,
3774 JS_EnumerateStub, JS_ResolveStub,
3775 JS_ConvertStub, regexp_finalize,
3776 NULL, NULL,
3777 regexp_call, NULL,
3778 regexp_xdrObject, NULL,
3779 regexp_mark, 0
3780 };
3781
3782 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3783
3784 JSBool
3785 js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3786 jsval *rval)
3787 0 {
3788 JSRegExp *re;
3789 const jschar *source;
3790 jschar *chars;
3791 size_t length, nflags;
3792 uintN flags;
3793 JSString *str;
3794
3795 0 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3796 0 return JS_FALSE;
3797 JS_LOCK_OBJ(cx, obj);
3798 0 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3799 0 if (!re) {
3800 JS_UNLOCK_OBJ(cx, obj);
3801 0 *rval = STRING_TO_JSVAL(cx->runtime->emptyString);
3802 0 return JS_TRUE;
3803 }
3804
3805 0 source = JSSTRING_CHARS(re->source);
3806 0 length = JSSTRING_LENGTH(re->source);
3807 0 if (length == 0) {
3808 0 source = empty_regexp_ucstr;
3809 0 length = sizeof(empty_regexp_ucstr) / sizeof(jschar) - 1;
3810 }
3811 0 length += 2;
3812 0 nflags = 0;
3813 0 for (flags = re->flags; flags != 0; flags &= flags - 1)
3814 0 nflags++;
3815 0 chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3816 0 if (!chars) {
3817 JS_UNLOCK_OBJ(cx, obj);
3818 0 return JS_FALSE;
3819 }
3820
3821 0 chars[0] = '/';
3822 0 js_strncpy(&chars[1], source, length - 2);
3823 0 chars[length-1] = '/';
3824 0 if (nflags) {
3825 0 if (re->flags & JSREG_GLOB)
3826 0 chars[length++] = 'g';
3827 0 if (re->flags & JSREG_FOLD)
3828 0 chars[length++] = 'i';
3829 0 if (re->flags & JSREG_MULTILINE)
3830 0 chars[length++] = 'm';
3831 }
3832 JS_UNLOCK_OBJ(cx, obj);
3833 0 chars[length] = 0;
3834
3835 0 str = js_NewString(cx, chars, length, 0);
3836 0 if (!str) {
3837 0 JS_free(cx, chars);
3838 0 return JS_FALSE;
3839 }
3840 0 *rval = STRING_TO_JSVAL(str);
3841 0 return JS_TRUE;
3842 }
3843
3844 static JSBool
3845 regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3846 jsval *rval)
3847 15620 {
3848 JSString *opt, *str;
3849 JSRegExp *oldre, *re;
3850 JSBool ok, ok2;
3851 JSObject *obj2;
3852 size_t length, nbytes;
3853 const jschar *cp, *start, *end;
3854 jschar *nstart, *ncp, *tmp;
3855
3856 15620 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3857 0 return JS_FALSE;
3858 15620 opt = NULL;
3859 15620 if (argc == 0) {
3860 17 str = cx->runtime->emptyString;
3861 } else {
3862 15603 if (JSVAL_IS_OBJECT(argv[0])) {
3863 /*
3864 * If we get passed in a RegExp object we construct a new
3865 * RegExp that is a duplicate of it by re-compiling the
3866 * original source code. ECMA requires that it be an error
3867 * here if the flags are specified. (We must use the flags
3868 * from the original RegExp also).
3869 */
3870 0 obj2 = JSVAL_TO_OBJECT(argv[0]);
3871 0 if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
3872 0 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
3873 0 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3874 JSMSG_NEWREGEXP_FLAGGED);
3875 0 return JS_FALSE;
3876 }
3877 JS_LOCK_OBJ(cx, obj2);
3878 0 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
3879 0 if (!re) {
3880 JS_UNLOCK_OBJ(cx, obj2);
3881 0 return JS_FALSE;
3882 }
3883 0 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
3884 JS_UNLOCK_OBJ(cx, obj2);
3885 0 goto created;
3886 }
3887 }
3888 15603 str = js_ValueToString(cx, argv[0]);
3889 15603 if (!str)
3890 0 return JS_FALSE;
3891 15603 argv[0] = STRING_TO_JSVAL(str);
3892 15603 if (argc > 1) {
3893 0 if (JSVAL_IS_VOID(argv[1])) {
3894 0 opt = NULL;
3895 } else {
3896 0 opt = js_ValueToString(cx, argv[1]);
3897 0 if (!opt)
3898 0 return JS_FALSE;
3899 0 argv[1] = STRING_TO_JSVAL(opt);
3900 }
3901 }
3902
3903 /* Escape any naked slashes in the regexp source. */
3904 15603 length = JSSTRING_LENGTH(str);
3905 15603 start = JSSTRING_CHARS(str);
3906 15603 end = start + length;
3907 15603 nstart = ncp = NULL;
3908 4868136 for (cp = start; cp < end; cp++) {
3909 4852533 if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
3910 0 nbytes = (++length + 1) * sizeof(jschar);
3911 0 if (!nstart) {
3912 0 nstart = (jschar *) JS_malloc(cx, nbytes);
3913 0 if (!nstart)
3914 0 return JS_FALSE;
3915 0 ncp = nstart + (cp - start);
3916 0 js_strncpy(nstart, start, cp - start);
3917 } else {
3918 0 tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
3919 0 if (!tmp) {
3920 0 JS_free(cx, nstart);
3921 0 return JS_FALSE;
3922 }
3923 0 ncp = tmp + (ncp - nstart);
3924 0 nstart = tmp;
3925 }
3926 0 *ncp++ = '\\';
3927 }
3928 4852533 if (nstart)
3929 0 *ncp++ = *cp;
3930 }
3931
3932 15603 if (nstart) {
3933 /* Don't forget to store the backstop after the new string. */
3934 JS_ASSERT((size_t)(ncp - nstart) == length);
3935 0 *ncp = 0;
3936 0 str = js_NewString(cx, nstart, length, 0);
3937 0 if (!str) {
3938 0 JS_free(cx, nstart);
3939 0 return JS_FALSE;
3940 }
3941 0 argv[0] = STRING_TO_JSVAL(str);
3942 }
3943 }
3944
3945 15620 re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
3946 15620 created:
3947 15620 if (!re)
3948 0 return JS_FALSE;
3949 JS_LOCK_OBJ(cx, obj);
3950 15620 oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
3951 15620 ok = JS_SetPrivate(cx, obj, re);
3952 15620 ok2 = js_SetLastIndex(cx, obj, 0);
3953 JS_UNLOCK_OBJ(cx, obj);
3954 15620 if (!ok) {
3955 0 js_DestroyRegExp(cx, re);
3956 0 return JS_FALSE;
3957 }
3958 15620 if (oldre)
3959 0 js_DestroyRegExp(cx, oldre);
3960 15620 *rval = OBJECT_TO_JSVAL(obj);
3961 15620 return ok2;
3962 }
3963
3964 static JSBool
3965 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3966 JSBool test, jsval *rval)
3967 0 {
3968 JSBool ok;
3969 JSRegExp *re;
3970 jsdouble lastIndex;
3971 JSString *str;
3972 size_t i;
3973
3974 0 ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
3975 0 if (!ok)
3976 0 return JS_FALSE;
3977 JS_LOCK_OBJ(cx, obj);
3978 0 re = (JSRegExp *) JS_GetPrivate(cx, obj);
3979 0 if (!re) {
3980 JS_UNLOCK_OBJ(cx, obj);
3981 0 return JS_TRUE;
3982 }
3983
3984 /* NB: we must reach out: after this paragraph, in order to drop re. */
3985 0 HOLD_REGEXP(cx, re);
3986 0 if (re->flags & JSREG_GLOB) {
3987 0 ok = js_GetLastIndex(cx, obj, &lastIndex);
3988 } else {
3989 0 lastIndex = 0;
3990 }
3991 JS_UNLOCK_OBJ(cx, obj);
3992 0 if (!ok)
3993 0 goto out;
3994
3995 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
3996 0 if (argc == 0) {
3997 0 str = cx->regExpStatics.input;
3998 0 if (!str) {
3999 0 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4000 JSMSG_NO_INPUT,
4001 JS_GetStringBytes(re->source),
4002 (re->flags & JSREG_GLOB) ? "g" : "",
4003 (re->flags & JSREG_FOLD) ? "i" : "",
4004 (re->flags & JSREG_MULTILINE) ? "m" : "");
4005 0 ok = JS_FALSE;
4006 0 goto out;
4007 }
4008 } else {
4009 0 str = js_ValueToString(cx, argv[0]);
4010 0 if (!str)
4011 0 goto out;
4012 0 argv[0] = STRING_TO_JSVAL(str);
4013 }
4014
4015 0 if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4016 0 ok = js_SetLastIndex(cx, obj, 0);
4017 0 *rval = JSVAL_NULL;
4018 } else {
4019 0 i = (size_t) lastIndex;
4020 0 ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4021 0 if (ok && (re->flags & JSREG_GLOB))
4022 0 ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4023 }
4024
4025 0 out:
4026 0 DROP_REGEXP(cx, re);
4027 0 return ok;
4028 }
4029
4030 static JSBool
4031 regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4032 0 {
4033 0 return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
4034 }
4035
4036 static JSBool
4037 regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4038 0 {
4039 0 if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
4040 0 return JS_FALSE;
4041 0 if (*rval != JSVAL_TRUE)
4042 0 *rval = JSVAL_FALSE;
4043 0 return JS_TRUE;
4044 }
4045
4046 static JSFunctionSpec regexp_methods[] = {
4047 #if JS_HAS_TOSOURCE
4048 {js_toSource_str, js_regexp_toString, 0,0,0},
4049 #endif
4050 {js_toString_str, js_regexp_toString, 0,0,0},
4051 {"compile", regexp_compile, 1,0,0},
4052 {"exec", regexp_exec, 0,0,0},
4053 {"test", regexp_test, 0,0,0},
4054 {0,0,0,0,0}
4055 };
4056
4057 static JSBool
4058 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4059 15603 {
4060 15603 if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4061 /*
4062 * If first arg is regexp and no flags are given, just return the arg.
4063 * (regexp_compile detects the regexp + flags case and throws a
4064 * TypeError.) See 10.15.3.1.
4065 */
4066 0 if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4067 !JSVAL_IS_PRIMITIVE(argv[0]) &&
4068 OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4069 0 *rval = argv[0];
4070 0 return JS_TRUE;
4071 }
4072
4073 /* Otherwise, replace obj with a new RegExp object. */
4074 0 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4075 0 if (!obj)
4076 0 return JS_FALSE;
4077
4078 /*
4079 * regexp_compile does not use rval to root its temporaries
4080 * so we can use it to root obj.
4081 */
4082 0 *rval = OBJECT_TO_JSVAL(obj);
4083 }
4084 15603 return regexp_compile(cx, obj, argc, argv, rval);
4085 }
4086
4087 JSObject *
4088 js_InitRegExpClass(JSContext *cx, JSObject *obj)
4089 17 {
4090 JSObject *proto, *ctor;
4091 jsval rval;
4092
4093 17 proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4094 regexp_props, regexp_methods,
4095 regexp_static_props, NULL);
4096
4097 17 if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4098 0 return NULL;
4099 17 if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4100 !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4101 !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4102 !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4103 !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4104 !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4105 goto bad;
4106 }
4107
4108 /* Give RegExp.prototype private data so it matches the empty string. */
4109 17 if (!regexp_compile(cx, proto, 0, NULL, &rval))
4110 17 goto bad;
4111 17 return proto;
4112
4113 0 bad:
4114 0 JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4115 0 return NULL;
4116 }
4117
4118 JSObject *
4119 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4120 jschar *chars, size_t length, uintN flags)
4121 408 {
4122 JSString *str;
4123 JSObject *obj;
4124 JSRegExp *re;
4125 JSTempValueRooter tvr;
4126
4127 408 str = js_NewStringCopyN(cx, chars, length, 0);
4128 408 if (!str)
4129 0 return NULL;
4130 408 re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4131 408 if (!re)
4132 0 return NULL;
4133 408 JS_PUSH_SINGLE_TEMP_ROOT(cx, STRING_TO_JSVAL(str), &tvr);
4134 408 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4135 408 if (!obj || !JS_SetPrivate(cx, obj, re) || !js_SetLastIndex(cx, obj, 0)) {
4136 0 js_DestroyRegExp(cx, re);
4137 0 obj = NULL;
4138 }
4139 408 JS_POP_TEMP_ROOT(cx, &tvr);
4140 408 return obj;
4141 }
4142
4143 JSObject *
4144 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4145 0 {
4146 JSObject *clone;
4147 JSRegExp *re;
4148
4149 JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4150 0 clone = js_NewObject(cx, &js_RegExpClass, NULL, parent);
4151 0 if (!clone)
4152 0 return NULL;
4153 0 re = JS_GetPrivate(cx, obj);
4154 0 if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4155 0 cx->newborn[GCX_OBJECT] = NULL;
4156 0 return NULL;
4157 }
4158 0 HOLD_REGEXP(cx, re);
4159 0 return clone;
4160 }
4161
4162 JSBool
4163 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4164 0 {
4165 jsval v;
4166
4167 0 return JS_GetReservedSlot(cx, obj, 0, &v) &&
4168 js_ValueToNumber(cx, v, lastIndex);
4169 }
4170
4171 JSBool
4172 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4173 16028 {
4174 jsval v;
4175
4176 16028 return js_NewNumberValue(cx, lastIndex, &v) &&
4177 JS_SetReservedSlot(cx, obj, 0, v);
4178 }
4179