46 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
51 for (
int i = 0; i < len; i++) {
54 if (old_char == new_char)
continue;
57 interp_canonicalize->
get(old_char,
'\0', old_string);
58 interp_canonicalize->
get(new_char,
'\0', new_string);
59 if (old_string[0] != new_string[0]) {
67 static bool BackRefMatchesNoCase(
Canonicalize* interp_canonicalize,
71 Vector<const uint8_t> subject) {
72 for (
int i = 0; i < len; i++) {
73 unsigned int old_char = subject[from++];
74 unsigned int new_char = subject[current++];
75 if (old_char == new_char)
continue;
79 if (old_char != new_char)
return false;
81 if (!(old_char -
'a' <=
'z' -
'a') &&
82 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
91 static void TraceInterpreter(
const byte* code_base,
95 uint32_t current_char,
97 const char* bytecode_name) {
98 if (FLAG_trace_regexp_bytecodes) {
99 bool printable = (current_char < 127 && current_char >= 32);
102 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
103 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
109 printable ? current_char :
'.',
111 for (
int i = 0; i < bytecode_length; i++) {
112 printf(
", %02x", pc[i]);
115 for (
int i = 1; i < bytecode_length; i++) {
116 unsigned char b = pc[i];
117 if (b < 127 && b >= 32) {
128 #define BYTECODE(name) \
130 TraceInterpreter(code_base, \
132 static_cast<int>(backtrack_sp - backtrack_stack_base), \
135 BC_##name##_LENGTH, \
138 #define BYTECODE(name) \
144 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
145 return *
reinterpret_cast<const int32_t *
>(
pc);
150 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
151 return *
reinterpret_cast<const uint16_t *
>(
pc);
162 data_ = NewArray<int>(kBacktrackStackSize);
169 int*
data()
const {
return data_; }
171 int max_size()
const {
return kBacktrackStackSize; }
174 static const int kBacktrackStackSize = 10000;
182 template <
typename Char>
184 const byte* code_base,
185 Vector<const Char> subject,
188 uint32_t current_char) {
189 const byte* pc = code_base;
193 BacktrackStack backtrack_stack;
194 int* backtrack_stack_base = backtrack_stack.data();
195 int* backtrack_sp = backtrack_stack_base;
196 int backtrack_stack_space = backtrack_stack.max_size();
198 if (FLAG_trace_regexp_bytecodes) {
199 PrintF(
"\n\nStart bytecode interpreter\n\n");
203 int32_t insn = Load32Aligned(pc);
207 return RegExpImpl::RE_FAILURE;
209 if (--backtrack_stack_space < 0) {
212 *backtrack_sp++ = current;
213 pc += BC_PUSH_CP_LENGTH;
216 if (--backtrack_stack_space < 0) {
219 *backtrack_sp++ = Load32Aligned(pc + 4);
220 pc += BC_PUSH_BT_LENGTH;
223 if (--backtrack_stack_space < 0) {
227 pc += BC_PUSH_REGISTER_LENGTH;
231 pc += BC_SET_REGISTER_LENGTH;
235 pc += BC_ADVANCE_REGISTER_LENGTH;
238 registers[insn >>
BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
239 pc += BC_SET_REGISTER_TO_CP_LENGTH;
243 pc += BC_SET_CP_TO_REGISTER_LENGTH;
246 registers[insn >> BYTECODE_SHIFT] =
247 static_cast<
int>(backtrack_sp - backtrack_stack_base);
248 pc += BC_SET_REGISTER_TO_SP_LENGTH;
251 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
252 backtrack_stack_space = backtrack_stack.max_size() -
253 static_cast<
int>(backtrack_sp - backtrack_stack_base);
254 pc += BC_SET_SP_TO_REGISTER_LENGTH;
257 backtrack_stack_space++;
259 current = *backtrack_sp;
260 pc += BC_POP_CP_LENGTH;
263 backtrack_stack_space++;
265 pc = code_base + *backtrack_sp;
268 backtrack_stack_space++;
270 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
271 pc += BC_POP_REGISTER_LENGTH;
274 return RegExpImpl::RE_FAILURE;
276 return RegExpImpl::RE_SUCCESS;
278 current += insn >> BYTECODE_SHIFT;
279 pc += BC_ADVANCE_CP_LENGTH;
282 pc = code_base + Load32Aligned(pc + 4);
285 current += insn >> BYTECODE_SHIFT;
286 pc = code_base + Load32Aligned(pc + 4);
289 if (current == backtrack_sp[-1]) {
291 backtrack_stack_space++;
292 pc = code_base + Load32Aligned(pc + 4);
294 pc += BC_CHECK_GREEDY_LENGTH;
299 if (pos >= subject.length()) {
300 pc = code_base + Load32Aligned(pc + 4);
302 current_char = subject[pos];
303 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
307 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
309 current_char = subject[pos];
310 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
315 if (pos + 2 > subject.length()) {
316 pc = code_base + Load32Aligned(pc + 4);
318 Char next = subject[pos + 1];
320 (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
321 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
325 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
327 Char next = subject[pos + 1];
328 current_char = (subject[pos] | (next << (
kBitsPerByte *
sizeof(Char))));
329 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
333 ASSERT(
sizeof(Char) == 1);
335 if (pos + 4 > subject.length()) {
336 pc = code_base + Load32Aligned(pc + 4);
338 Char next1 = subject[pos + 1];
339 Char next2 = subject[pos + 2];
340 Char next3 = subject[pos + 3];
341 current_char = (subject[pos] |
345 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
349 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
350 ASSERT(
sizeof(Char) == 1);
352 Char next1 = subject[pos + 1];
353 Char next2 = subject[pos + 2];
354 Char next3 = subject[pos + 3];
355 current_char = (subject[pos] |
359 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
363 uint32_t c = Load32Aligned(pc + 4);
364 if (c == current_char) {
365 pc = code_base + Load32Aligned(pc + 8);
367 pc += BC_CHECK_4_CHARS_LENGTH;
373 if (c == current_char) {
374 pc = code_base + Load32Aligned(pc + 4);
376 pc += BC_CHECK_CHAR_LENGTH;
381 uint32_t c = Load32Aligned(pc + 4);
382 if (c != current_char) {
383 pc = code_base + Load32Aligned(pc + 8);
385 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
391 if (c != current_char) {
392 pc = code_base + Load32Aligned(pc + 4);
394 pc += BC_CHECK_NOT_CHAR_LENGTH;
399 uint32_t c = Load32Aligned(pc + 4);
400 if (c == (current_char & Load32Aligned(pc + 8))) {
401 pc = code_base + Load32Aligned(pc + 12);
403 pc += BC_AND_CHECK_4_CHARS_LENGTH;
409 if (c == (current_char & Load32Aligned(pc + 4))) {
410 pc = code_base + Load32Aligned(pc + 8);
412 pc += BC_AND_CHECK_CHAR_LENGTH;
417 uint32_t c = Load32Aligned(pc + 4);
418 if (c != (current_char & Load32Aligned(pc + 8))) {
419 pc = code_base + Load32Aligned(pc + 12);
421 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
427 if (c != (current_char & Load32Aligned(pc + 4))) {
428 pc = code_base + Load32Aligned(pc + 8);
430 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
434 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
436 uint32_t minus = Load16Aligned(pc + 4);
437 uint32_t mask = Load16Aligned(pc + 6);
438 if (c != ((current_char - minus) & mask)) {
439 pc = code_base + Load32Aligned(pc + 8);
441 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
446 uint32_t from = Load16Aligned(pc + 4);
447 uint32_t to = Load16Aligned(pc + 6);
448 if (from <= current_char && current_char <= to) {
449 pc = code_base + Load32Aligned(pc + 8);
451 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
456 uint32_t from = Load16Aligned(pc + 4);
457 uint32_t to = Load16Aligned(pc + 6);
458 if (from > current_char || current_char > to) {
459 pc = code_base + Load32Aligned(pc + 8);
461 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
469 if ((b & (1 << bit)) != 0) {
470 pc = code_base + Load32Aligned(pc + 4);
472 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
478 if (current_char < limit) {
479 pc = code_base + Load32Aligned(pc + 4);
481 pc += BC_CHECK_LT_LENGTH;
487 if (current_char > limit) {
488 pc = code_base + Load32Aligned(pc + 4);
490 pc += BC_CHECK_GT_LENGTH;
496 pc = code_base + Load32Aligned(pc + 8);
498 pc += BC_CHECK_REGISTER_LT_LENGTH;
503 pc = code_base + Load32Aligned(pc + 8);
505 pc += BC_CHECK_REGISTER_GE_LENGTH;
510 pc = code_base + Load32Aligned(pc + 4);
512 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
517 registers[Load32Aligned(pc + 4)]) {
518 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
520 pc = code_base + Load32Aligned(pc + 8);
526 if (from < 0 || len <= 0) {
527 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
530 if (current + len > subject.length()) {
531 pc = code_base + Load32Aligned(pc + 4);
535 for (i = 0; i < len; i++) {
536 if (subject[from + i] != subject[current + i]) {
537 pc = code_base + Load32Aligned(pc + 4);
544 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
547 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
550 if (from < 0 || len <= 0) {
551 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
554 if (current + len > subject.length()) {
555 pc = code_base + Load32Aligned(pc + 4);
558 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
559 from, current, len, subject)) {
561 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
563 pc = code_base + Load32Aligned(pc + 4);
570 pc = code_base + Load32Aligned(pc + 4);
572 pc += BC_CHECK_AT_START_LENGTH;
577 pc += BC_CHECK_NOT_AT_START_LENGTH;
579 pc = code_base + Load32Aligned(pc + 4);
582 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
584 if (subject.length() - current > by) {
585 current = subject.length() - by;
586 current_char = subject[current - 1];
588 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
604 int start_position) {
605 ASSERT(subject->IsFlat());
608 const byte* code_base = code_array->GetDataStartAddress();
609 uc16 previous_char =
'\n';
611 if (subject_content.
IsAscii()) {
613 if (start_position != 0) previous_char = subject_vector[start_position - 1];
614 return RawMatch(isolate,
623 if (start_position != 0) previous_char = subject_vector[start_position - 1];
624 return RawMatch(isolate,
void PrintF(const char *format,...)
const int kBitsPerByteLog2
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
#define ASSERT(condition)
int get(uchar c, uchar n, uchar *result)
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Vector< const uc16 > ToUC16Vector()
static const int kTableMask
Vector< const uint8_t > ToOneByteVector()
void DeleteArray(T *array)