46 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
51 for (
int i = 0; i < len; i++) {
54 if (old_char == new_char)
continue;
57 interp_canonicalize->
get(old_char,
'\0', old_string);
58 interp_canonicalize->
get(new_char,
'\0', new_string);
59 if (old_string[0] != new_string[0]) {
67 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
71 Vector<const char> subject) {
72 for (
int i = 0; i < len; i++) {
73 unsigned int old_char = subject[from++];
74 unsigned int new_char = subject[current++];
75 if (old_char == new_char)
continue;
76 if (old_char -
'A' <=
'Z' -
'A') old_char |= 0x20;
77 if (new_char -
'A' <=
'Z' -
'A') new_char |= 0x20;
78 if (old_char != new_char)
return false;
85 static void TraceInterpreter(
const byte* code_base,
89 uint32_t current_char,
91 const char* bytecode_name) {
92 if (FLAG_trace_regexp_bytecodes) {
93 bool printable = (current_char < 127 && current_char >= 32);
96 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
97 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
103 printable ? current_char :
'.',
105 for (
int i = 0; i < bytecode_length; i++) {
106 printf(
", %02x", pc[i]);
109 for (
int i = 1; i < bytecode_length; i++) {
110 unsigned char b = pc[i];
111 if (b < 127 && b >= 32) {
122 #define BYTECODE(name) \
124 TraceInterpreter(code_base, \
126 static_cast<int>(backtrack_sp - backtrack_stack_base), \
129 BC_##name##_LENGTH, \
132 #define BYTECODE(name) \
138 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
139 return *
reinterpret_cast<const int32_t *
>(
pc);
144 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
145 return *
reinterpret_cast<const uint16_t *
>(
pc);
156 if (isolate->irregexp_interpreter_backtrack_stack_cache() !=
NULL) {
158 data_ = isolate->irregexp_interpreter_backtrack_stack_cache();
159 isolate->set_irregexp_interpreter_backtrack_stack_cache(
NULL);
162 data_ = NewArray<int>(kBacktrackStackSize);
167 if (isolate_->irregexp_interpreter_backtrack_stack_cache() ==
NULL) {
169 isolate_->set_irregexp_interpreter_backtrack_stack_cache(data_);
176 int*
data()
const {
return data_; }
178 int max_size()
const {
return kBacktrackStackSize; }
181 static const int kBacktrackStackSize = 10000;
190 template <
typename Char>
192 const byte* code_base,
193 Vector<const Char> subject,
196 uint32_t current_char) {
197 const byte* pc = code_base;
201 BacktrackStack backtrack_stack(isolate);
202 int* backtrack_stack_base = backtrack_stack.data();
203 int* backtrack_sp = backtrack_stack_base;
204 int backtrack_stack_space = backtrack_stack.max_size();
206 if (FLAG_trace_regexp_bytecodes) {
207 PrintF(
"\n\nStart bytecode interpreter\n\n");
211 int32_t insn = Load32Aligned(pc);
215 return RegExpImpl::RE_FAILURE;
217 if (--backtrack_stack_space < 0) {
220 *backtrack_sp++ = current;
221 pc += BC_PUSH_CP_LENGTH;
224 if (--backtrack_stack_space < 0) {
227 *backtrack_sp++ = Load32Aligned(pc + 4);
228 pc += BC_PUSH_BT_LENGTH;
231 if (--backtrack_stack_space < 0) {
235 pc += BC_PUSH_REGISTER_LENGTH;
239 pc += BC_SET_REGISTER_LENGTH;
243 pc += BC_ADVANCE_REGISTER_LENGTH;
246 registers[insn >>
BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
247 pc += BC_SET_REGISTER_TO_CP_LENGTH;
251 pc += BC_SET_CP_TO_REGISTER_LENGTH;
254 registers[insn >> BYTECODE_SHIFT] =
255 static_cast<
int>(backtrack_sp - backtrack_stack_base);
256 pc += BC_SET_REGISTER_TO_SP_LENGTH;
259 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
260 backtrack_stack_space = backtrack_stack.max_size() -
261 static_cast<
int>(backtrack_sp - backtrack_stack_base);
262 pc += BC_SET_SP_TO_REGISTER_LENGTH;
265 backtrack_stack_space++;
267 current = *backtrack_sp;
268 pc += BC_POP_CP_LENGTH;
271 backtrack_stack_space++;
273 pc = code_base + *backtrack_sp;
276 backtrack_stack_space++;
278 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
279 pc += BC_POP_REGISTER_LENGTH;
282 return RegExpImpl::RE_FAILURE;
284 return RegExpImpl::RE_SUCCESS;
286 current += insn >> BYTECODE_SHIFT;
287 pc += BC_ADVANCE_CP_LENGTH;
290 pc = code_base + Load32Aligned(pc + 4);
293 current += insn >> BYTECODE_SHIFT;
294 pc = code_base + Load32Aligned(pc + 4);
297 if (current == backtrack_sp[-1]) {
299 backtrack_stack_space++;
300 pc = code_base + Load32Aligned(pc + 4);
302 pc += BC_CHECK_GREEDY_LENGTH;
307 if (pos >= subject.length()) {
308 pc = code_base + Load32Aligned(pc + 4);
310 current_char = subject[pos];
311 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
315 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
317 current_char = subject[pos];
318 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
323 if (pos + 2 > subject.length()) {
324 pc = code_base + Load32Aligned(pc + 4);
326 Char next = subject[pos + 1];
328 (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
329 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
333 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
335 Char next = subject[pos + 1];
336 current_char = (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
337 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
341 ASSERT(
sizeof(Char) == 1);
343 if (pos + 4 > subject.length()) {
344 pc = code_base + Load32Aligned(pc + 4);
346 Char next1 = subject[pos + 1];
347 Char next2 = subject[pos + 2];
348 Char next3 = subject[pos + 3];
349 current_char = (subject[pos] |
353 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
357 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
358 ASSERT(
sizeof(Char) == 1);
360 Char next1 = subject[pos + 1];
361 Char next2 = subject[pos + 2];
362 Char next3 = subject[pos + 3];
363 current_char = (subject[pos] |
367 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
371 uint32_t c = Load32Aligned(pc + 4);
372 if (c == current_char) {
373 pc = code_base + Load32Aligned(pc + 8);
375 pc += BC_CHECK_4_CHARS_LENGTH;
381 if (c == current_char) {
382 pc = code_base + Load32Aligned(pc + 4);
384 pc += BC_CHECK_CHAR_LENGTH;
389 uint32_t c = Load32Aligned(pc + 4);
390 if (c != current_char) {
391 pc = code_base + Load32Aligned(pc + 8);
393 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
399 if (c != current_char) {
400 pc = code_base + Load32Aligned(pc + 4);
402 pc += BC_CHECK_NOT_CHAR_LENGTH;
407 uint32_t c = Load32Aligned(pc + 4);
408 if (c == (current_char & Load32Aligned(pc + 8))) {
409 pc = code_base + Load32Aligned(pc + 12);
411 pc += BC_AND_CHECK_4_CHARS_LENGTH;
417 if (c == (current_char & Load32Aligned(pc + 4))) {
418 pc = code_base + Load32Aligned(pc + 8);
420 pc += BC_AND_CHECK_CHAR_LENGTH;
425 uint32_t c = Load32Aligned(pc + 4);
426 if (c != (current_char & Load32Aligned(pc + 8))) {
427 pc = code_base + Load32Aligned(pc + 12);
429 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
435 if (c != (current_char & Load32Aligned(pc + 4))) {
436 pc = code_base + Load32Aligned(pc + 8);
438 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
442 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
444 uint32_t minus = Load16Aligned(pc + 4);
445 uint32_t mask = Load16Aligned(pc + 6);
446 if (c != ((current_char - minus) & mask)) {
447 pc = code_base + Load32Aligned(pc + 8);
449 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
454 uint32_t from = Load16Aligned(pc + 4);
455 uint32_t to = Load16Aligned(pc + 6);
456 if (from <= current_char && current_char <= to) {
457 pc = code_base + Load32Aligned(pc + 8);
459 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
464 uint32_t from = Load16Aligned(pc + 4);
465 uint32_t to = Load16Aligned(pc + 6);
466 if (from > current_char || current_char > to) {
467 pc = code_base + Load32Aligned(pc + 8);
469 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
477 if ((b & (1 << bit)) != 0) {
478 pc = code_base + Load32Aligned(pc + 4);
480 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
486 if (current_char < limit) {
487 pc = code_base + Load32Aligned(pc + 4);
489 pc += BC_CHECK_LT_LENGTH;
495 if (current_char > limit) {
496 pc = code_base + Load32Aligned(pc + 4);
498 pc += BC_CHECK_GT_LENGTH;
504 pc = code_base + Load32Aligned(pc + 8);
506 pc += BC_CHECK_REGISTER_LT_LENGTH;
511 pc = code_base + Load32Aligned(pc + 8);
513 pc += BC_CHECK_REGISTER_GE_LENGTH;
518 pc = code_base + Load32Aligned(pc + 4);
520 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
525 registers[Load32Aligned(pc + 4)]) {
526 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
528 pc = code_base + Load32Aligned(pc + 8);
534 if (from < 0 || len <= 0) {
535 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
538 if (current + len > subject.length()) {
539 pc = code_base + Load32Aligned(pc + 4);
543 for (i = 0; i < len; i++) {
544 if (subject[from + i] != subject[current + i]) {
545 pc = code_base + Load32Aligned(pc + 4);
552 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
555 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
558 if (from < 0 || len <= 0) {
559 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
562 if (current + len > subject.length()) {
563 pc = code_base + Load32Aligned(pc + 4);
566 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
567 from, current, len, subject)) {
569 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
571 pc = code_base + Load32Aligned(pc + 4);
578 pc = code_base + Load32Aligned(pc + 4);
580 pc += BC_CHECK_AT_START_LENGTH;
585 pc += BC_CHECK_NOT_AT_START_LENGTH;
587 pc = code_base + Load32Aligned(pc + 4);
590 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
592 if (subject.length() - current > by) {
593 current = subject.length() - by;
594 current_char = subject[current - 1];
596 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
612 int start_position) {
613 ASSERT(subject->IsFlat());
616 const byte* code_base = code_array->GetDataStartAddress();
617 uc16 previous_char =
'\n';
619 if (subject_content.
IsAscii()) {
621 if (start_position != 0) previous_char = subject_vector[start_position - 1];
622 return RawMatch(isolate,
631 if (start_position != 0) previous_char = subject_vector[start_position - 1];
632 return RawMatch(isolate,
void PrintF(const char *format,...)
Vector< const char > ToAsciiVector()
const int kBitsPerByteLog2
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
#define ASSERT(condition)
int get(uchar c, uchar n, uchar *result)
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Vector< const uc16 > ToUC16Vector()
static const int kTableMask
BacktrackStack(Isolate *isolate)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
void DeleteArray(T *array)