46 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
51 for (
int i = 0; i < len; i++) {
54 if (old_char == new_char)
continue;
57 interp_canonicalize->
get(old_char,
'\0', old_string);
58 interp_canonicalize->
get(new_char,
'\0', new_string);
59 if (old_string[0] != new_string[0]) {
67 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
71 Vector<const char> subject) {
72 for (
int i = 0; i < len; i++) {
73 unsigned int old_char = subject[from++];
74 unsigned int new_char = subject[current++];
75 if (old_char == new_char)
continue;
76 if (old_char -
'A' <=
'Z' -
'A') old_char |= 0x20;
77 if (new_char -
'A' <=
'Z' -
'A') new_char |= 0x20;
78 if (old_char != new_char)
return false;
85 static void TraceInterpreter(
const byte* code_base,
89 uint32_t current_char,
91 const char* bytecode_name) {
92 if (FLAG_trace_regexp_bytecodes) {
93 bool printable = (current_char < 127 && current_char >= 32);
96 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
97 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
103 printable ? current_char :
'.',
105 for (
int i = 0; i < bytecode_length; i++) {
106 printf(
", %02x", pc[i]);
109 for (
int i = 1; i < bytecode_length; i++) {
110 unsigned char b = pc[i];
111 if (b < 127 && b >= 32) {
122 #define BYTECODE(name) \
124 TraceInterpreter(code_base, \
126 static_cast<int>(backtrack_sp - backtrack_stack_base), \
129 BC_##name##_LENGTH, \
132 #define BYTECODE(name) \
138 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
139 return *
reinterpret_cast<const int32_t *
>(
pc);
144 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
145 return *
reinterpret_cast<const uint16_t *
>(
pc);
156 if (isolate->irregexp_interpreter_backtrack_stack_cache() !=
NULL) {
158 data_ = isolate->irregexp_interpreter_backtrack_stack_cache();
159 isolate->set_irregexp_interpreter_backtrack_stack_cache(
NULL);
162 data_ = NewArray<int>(kBacktrackStackSize);
167 if (isolate_->irregexp_interpreter_backtrack_stack_cache() ==
NULL) {
169 isolate_->set_irregexp_interpreter_backtrack_stack_cache(data_);
176 int*
data()
const {
return data_; }
178 int max_size()
const {
return kBacktrackStackSize; }
181 static const int kBacktrackStackSize = 10000;
190 template <
typename Char>
192 const byte* code_base,
193 Vector<const Char> subject,
196 uint32_t current_char) {
197 const byte* pc = code_base;
201 BacktrackStack backtrack_stack(isolate);
202 int* backtrack_stack_base = backtrack_stack.data();
203 int* backtrack_sp = backtrack_stack_base;
204 int backtrack_stack_space = backtrack_stack.max_size();
206 if (FLAG_trace_regexp_bytecodes) {
207 PrintF(
"\n\nStart bytecode interpreter\n\n");
211 int32_t insn = Load32Aligned(pc);
215 return RegExpImpl::RE_FAILURE;
217 if (--backtrack_stack_space < 0) {
220 *backtrack_sp++ = current;
221 pc += BC_PUSH_CP_LENGTH;
224 if (--backtrack_stack_space < 0) {
227 *backtrack_sp++ = Load32Aligned(pc + 4);
228 pc += BC_PUSH_BT_LENGTH;
231 if (--backtrack_stack_space < 0) {
235 pc += BC_PUSH_REGISTER_LENGTH;
239 pc += BC_SET_REGISTER_LENGTH;
243 pc += BC_ADVANCE_REGISTER_LENGTH;
246 registers[insn >>
BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
247 pc += BC_SET_REGISTER_TO_CP_LENGTH;
251 pc += BC_SET_CP_TO_REGISTER_LENGTH;
254 registers[insn >> BYTECODE_SHIFT] =
255 static_cast<
int>(backtrack_sp - backtrack_stack_base);
256 pc += BC_SET_REGISTER_TO_SP_LENGTH;
259 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
260 backtrack_stack_space = backtrack_stack.max_size() -
261 static_cast<
int>(backtrack_sp - backtrack_stack_base);
262 pc += BC_SET_SP_TO_REGISTER_LENGTH;
265 backtrack_stack_space++;
267 current = *backtrack_sp;
268 pc += BC_POP_CP_LENGTH;
271 backtrack_stack_space++;
273 pc = code_base + *backtrack_sp;
276 backtrack_stack_space++;
278 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
279 pc += BC_POP_REGISTER_LENGTH;
282 return RegExpImpl::RE_FAILURE;
284 return RegExpImpl::RE_SUCCESS;
286 current += insn >> BYTECODE_SHIFT;
287 pc += BC_ADVANCE_CP_LENGTH;
290 pc = code_base + Load32Aligned(pc + 4);
293 current += insn >> BYTECODE_SHIFT;
294 pc = code_base + Load32Aligned(pc + 4);
297 if (current == backtrack_sp[-1]) {
299 backtrack_stack_space++;
300 pc = code_base + Load32Aligned(pc + 4);
302 pc += BC_CHECK_GREEDY_LENGTH;
307 if (pos >= subject.length()) {
308 pc = code_base + Load32Aligned(pc + 4);
310 current_char = subject[pos];
311 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
315 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
317 current_char = subject[pos];
318 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
323 if (pos + 2 > subject.length()) {
324 pc = code_base + Load32Aligned(pc + 4);
326 Char next = subject[pos + 1];
328 (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
329 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
333 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
335 Char next = subject[pos + 1];
336 current_char = (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
337 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
341 ASSERT(
sizeof(Char) == 1);
343 if (pos + 4 > subject.length()) {
344 pc = code_base + Load32Aligned(pc + 4);
346 Char next1 = subject[pos + 1];
347 Char next2 = subject[pos + 2];
348 Char next3 = subject[pos + 3];
349 current_char = (subject[pos] |
353 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
357 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
358 ASSERT(
sizeof(Char) == 1);
360 Char next1 = subject[pos + 1];
361 Char next2 = subject[pos + 2];
362 Char next3 = subject[pos + 3];
363 current_char = (subject[pos] |
367 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
371 uint32_t c = Load32Aligned(pc + 4);
372 if (c == current_char) {
373 pc = code_base + Load32Aligned(pc + 8);
375 pc += BC_CHECK_4_CHARS_LENGTH;
381 if (c == current_char) {
382 pc = code_base + Load32Aligned(pc + 4);
384 pc += BC_CHECK_CHAR_LENGTH;
389 uint32_t c = Load32Aligned(pc + 4);
390 if (c != current_char) {
391 pc = code_base + Load32Aligned(pc + 8);
393 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
399 if (c != current_char) {
400 pc = code_base + Load32Aligned(pc + 4);
402 pc += BC_CHECK_NOT_CHAR_LENGTH;
407 uint32_t c = Load32Aligned(pc + 4);
408 if (c == (current_char & Load32Aligned(pc + 8))) {
409 pc = code_base + Load32Aligned(pc + 12);
411 pc += BC_AND_CHECK_4_CHARS_LENGTH;
417 if (c == (current_char & Load32Aligned(pc + 4))) {
418 pc = code_base + Load32Aligned(pc + 8);
420 pc += BC_AND_CHECK_CHAR_LENGTH;
425 uint32_t c = Load32Aligned(pc + 4);
426 if (c != (current_char & Load32Aligned(pc + 8))) {
427 pc = code_base + Load32Aligned(pc + 12);
429 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
435 if (c != (current_char & Load32Aligned(pc + 4))) {
436 pc = code_base + Load32Aligned(pc + 8);
438 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
442 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
444 uint32_t minus = Load16Aligned(pc + 4);
445 uint32_t mask = Load16Aligned(pc + 6);
446 if (c != ((current_char - minus) & mask)) {
447 pc = code_base + Load32Aligned(pc + 8);
449 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
454 uint32_t from = Load16Aligned(pc + 4);
455 uint32_t to = Load16Aligned(pc + 6);
456 if (from <= current_char && current_char <= to) {
457 pc = code_base + Load32Aligned(pc + 8);
459 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
464 uint32_t from = Load16Aligned(pc + 4);
465 uint32_t to = Load16Aligned(pc + 6);
466 if (from > current_char || current_char > to) {
467 pc = code_base + Load32Aligned(pc + 8);
469 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
477 if ((b & (1 << bit)) != 0) {
478 pc = code_base + Load32Aligned(pc + 4);
480 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
486 if (current_char < limit) {
487 pc = code_base + Load32Aligned(pc + 4);
489 pc += BC_CHECK_LT_LENGTH;
495 if (current_char > limit) {
496 pc = code_base + Load32Aligned(pc + 4);
498 pc += BC_CHECK_GT_LENGTH;
504 pc = code_base + Load32Aligned(pc + 8);
506 pc += BC_CHECK_REGISTER_LT_LENGTH;
511 pc = code_base + Load32Aligned(pc + 8);
513 pc += BC_CHECK_REGISTER_GE_LENGTH;
518 pc = code_base + Load32Aligned(pc + 4);
520 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
525 registers[Load32Aligned(pc + 4)]) {
526 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
528 pc = code_base + Load32Aligned(pc + 8);
534 if (from < 0 || len <= 0) {
535 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
538 if (current + len > subject.length()) {
539 pc = code_base + Load32Aligned(pc + 4);
543 for (i = 0; i < len; i++) {
544 if (subject[from + i] != subject[current + i]) {
545 pc = code_base + Load32Aligned(pc + 4);
552 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
555 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
558 if (from < 0 || len <= 0) {
559 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
562 if (current + len > subject.length()) {
563 pc = code_base + Load32Aligned(pc + 4);
566 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
567 from, current, len, subject)) {
569 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
571 pc = code_base + Load32Aligned(pc + 4);
578 pc = code_base + Load32Aligned(pc + 4);
580 pc += BC_CHECK_AT_START_LENGTH;
585 pc += BC_CHECK_NOT_AT_START_LENGTH;
587 pc = code_base + Load32Aligned(pc + 4);
590 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
592 if (subject.length() - current > by) {
593 current = subject.length() - by;
594 current_char = subject[current - 1];
596 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
612 int start_position) {
613 ASSERT(subject->IsFlat());
616 const byte* code_base = code_array->GetDataStartAddress();
617 uc16 previous_char =
'\n';
619 if (subject_content.
IsAscii()) {
621 if (start_position != 0) previous_char = subject_vector[start_position - 1];
622 return RawMatch(isolate,
631 if (start_position != 0) previous_char = subject_vector[start_position - 1];
632 return RawMatch(isolate,
void PrintF(const char *format,...)
Vector< const char > ToAsciiVector()
const int kBitsPerByteLog2
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
#define ASSERT(condition)
int get(uchar c, uchar n, uchar *result)
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Vector< const uc16 > ToUC16Vector()
static const int kTableMask
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
BacktrackStack(Isolate *isolate)
void DeleteArray(T *array)