46 #ifndef V8_INTERPRETED_REGEXP
47 #if V8_TARGET_ARCH_IA32
49 #elif V8_TARGET_ARCH_X64
51 #elif V8_TARGET_ARCH_ARM
53 #elif V8_TARGET_ARCH_MIPS
56 #error Unsupported target architecture.
69 bool* has_pending_exception) {
73 has_pending_exception);
79 for (
int i = 0; i < str->length(); i++) {
80 switch (str->Get(i)) {
92 return JSRegExp::Flags(flags);
96 static inline void ThrowRegExpException(Handle<JSRegExp> re,
97 Handle<String> pattern,
98 Handle<String> error_text,
100 Isolate* isolate = re->GetIsolate();
101 Factory* factory = isolate->factory();
102 Handle<FixedArray> elements = factory->NewFixedArray(2);
103 elements->set(0, *pattern);
104 elements->set(1, *error_text);
105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
107 isolate->Throw(*regexp_err);
115 ASSERT((ranges_length & 1) == 1);
120 for (
int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
123 if (ranges[i] <= new_range.
from())
continue;
126 if (last <= new_range.
from() && new_range.
to() < ranges[i]) {
147 const int kMod = 128;
148 bool character_found[kMod];
150 memset(&character_found[0], 0,
sizeof(character_found));
151 for (
int i = 0; i < length; i++) {
152 int ch = (pattern->Get(i) & (kMod - 1));
153 if (!character_found[ch]) {
154 character_found[ch] =
true;
158 if (different * 3 > length)
return false;
173 Isolate* isolate = re->GetIsolate();
177 bool in_cache = !cached.
is_null();
178 LOG(isolate, RegExpCompileEvent(re, in_cache));
182 re->set_data(*cached);
186 PostponeInterruptsScope postpone(isolate);
190 &parse_result, zone)) {
192 ThrowRegExpException(re,
199 bool has_been_compiled =
false;
201 if (parse_result.
simple &&
203 !HasFewDifferentCharacters(pattern)) {
206 has_been_compiled =
true;
207 }
else if (parse_result.
tree->IsAtom() &&
214 if (!HasFewDifferentCharacters(atom_string)) {
216 has_been_compiled =
true;
219 if (!has_been_compiled) {
222 ASSERT(re->data()->IsFixedArray());
226 compilation_cache->
PutRegExp(pattern, flags, data);
236 switch (regexp->TypeTag()) {
238 return AtomExec(regexp, subject, index, last_match_info);
243 regexp->GetIsolate()->has_pending_exception());
260 re->GetIsolate()->factory()->SetRegExpAtomData(re,
268 static void SetAtomLastCapture(
FixedArray* array,
272 NoHandleAllocation no_handles;
286 Isolate* isolate = regexp->GetIsolate();
289 ASSERT(index <= subject->length());
295 int needle_len = needle->
length();
299 if (index + needle_len > subject->length()) {
303 for (
int i = 0; i < output_size; i += 2) {
309 index = (needle_content.
IsAscii()
332 output[i+1] = index + needle_len;
336 return output_size / 2;
344 Isolate* isolate = re->GetIsolate();
348 int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
350 int res =
AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
355 NoHandleAllocation no_handles;
357 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
358 return last_match_info;
370 bool RegExpImpl::EnsureCompiledIrregexp(
373 #ifdef V8_INTERPRETED_REGEXP
374 if (compiled_code->IsByteArray())
return true;
375 #else // V8_INTERPRETED_REGEXP (RegExp native code)
376 if (compiled_code->IsCode())
return true;
381 if (saved_code->IsCode()) {
384 ASSERT(compiled_code->IsSmi());
387 return CompileIrregexp(re, sample_subject, is_ascii);
391 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
393 Handle<String> error_message,
395 Factory* factory = isolate->factory();
396 Handle<FixedArray> elements = factory->NewFixedArray(2);
397 elements->set(0, re->Pattern());
398 elements->set(1, *error_message);
399 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
400 Handle<Object> regexp_err =
401 factory->NewSyntaxError(
"malformed_regexp", array);
402 isolate->Throw(*regexp_err);
407 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
408 Handle<String> sample_subject,
411 Isolate* isolate = re->GetIsolate();
413 PostponeInterruptsScope postpone(isolate);
424 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
431 ASSERT(error_string->IsString());
432 Handle<String> error_message(
String::cast(error_string));
433 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
437 JSRegExp::Flags flags = re->GetFlags();
439 Handle<String> pattern(re->Pattern());
441 RegExpCompileData compile_data;
442 FlatStringReader reader(isolate, pattern);
443 Zone* zone = isolate->runtime_zone();
449 ThrowRegExpException(re,
455 RegExpEngine::CompilationResult result =
457 flags.is_ignore_case(),
459 flags.is_multiline(),
464 if (result.error_message !=
NULL) {
466 Handle<String> error_message =
467 isolate->factory()->NewStringFromUtf8(
CStrVector(result.error_message));
468 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
475 if (result.num_registers > register_max) {
519 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
532 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
533 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii))
return -1;
535 #ifdef V8_INTERPRETED_REGEXP
542 #else // V8_INTERPRETED_REGEXP
546 #endif // V8_INTERPRETED_REGEXP
555 Isolate* isolate = regexp->GetIsolate();
560 ASSERT(index <= subject->length());
561 ASSERT(subject->IsFlat());
563 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
565 #ifndef V8_INTERPRETED_REGEXP
568 EnsureCompiledIrregexp(regexp, subject, is_ascii);
599 is_ascii = subject->IsAsciiRepresentationUnderneath();
603 #else // V8_INTERPRETED_REGEXP
608 int number_of_capture_registers =
610 int32_t* raw_output = &output[number_of_capture_registers];
614 for (
int i = number_of_capture_registers - 1; i >= 0; i--) {
626 memcpy(output, raw_output, number_of_capture_registers *
sizeof(
int32_t));
633 #endif // V8_INTERPRETED_REGEXP
641 Isolate* isolate = regexp->GetIsolate();
645 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
646 if (FLAG_trace_regexp_bytecodes) {
647 String* pattern = regexp->Pattern();
649 PrintF(
"\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
653 if (required_registers < 0) {
661 output_registers = NewArray<int32_t>(required_registers);
664 if (output_registers ==
NULL) {
665 output_registers = isolate->jsregexp_static_offsets_vector();
669 regexp, subject, previous_index, output_registers, required_registers);
674 last_match_info, subject, capture_count, output_registers);
681 return isolate->
factory()->null_value();
689 int capture_register_count = (capture_count + 1) * 2;
694 for (
int i = 0; i < capture_register_count; i += 2) {
702 return last_match_info;
710 : register_array_(
NULL),
711 register_array_size_(0),
714 #ifdef V8_INTERPRETED_REGEXP
715 bool interpreted =
true;
717 bool interpreted =
false;
718 #endif // V8_INTERPRETED_REGEXP
721 static const int kAtomRegistersPerMatch = 2;
722 registers_per_match_ = kAtomRegistersPerMatch;
727 if (registers_per_match_ < 0) {
733 if (is_global && !interpreted) {
734 register_array_size_ =
736 max_matches_ = register_array_size_ / registers_per_match_;
740 register_array_size_ = registers_per_match_;
745 register_array_ = NewArray<int32_t>(register_array_size_);
747 register_array_ = isolate->jsregexp_static_offsets_vector();
752 current_match_index_ = max_matches_ - 1;
753 num_matches_ = max_matches_;
754 ASSERT(registers_per_match_ >= 2);
755 ASSERT_GE(register_array_size_, registers_per_match_);
757 ®ister_array_[current_match_index_ * registers_per_match_];
773 current_match_index_++;
774 if (current_match_index_ >= num_matches_) {
777 if (num_matches_ < max_matches_) {
783 ®ister_array_[(current_match_index_ - 1) * registers_per_match_];
784 int last_end_index = last_match[1];
791 register_array_size_);
793 int last_start_index = last_match[0];
794 if (last_start_index == last_end_index) last_end_index++;
795 if (last_end_index > subject_->length()) {
803 register_array_size_);
806 if (num_matches_ <= 0)
return NULL;
807 current_match_index_ = 0;
808 return register_array_;
810 return ®ister_array_[current_match_index_ * registers_per_match_];
816 int index = current_match_index_ * registers_per_match_;
817 if (num_matches_ == 0) {
819 index -= registers_per_match_;
821 return ®ister_array_[index];
991 for (
int i = 0; i < elements()->length(); i++)
1013 return data.u_atom->length();
1015 ASSERT(type == CHAR_CLASS);
1022 if (table_ ==
NULL) {
1035 frequencies_[i] = CharacterFrequency(i);
1041 frequencies_[index].Increment();
1049 if (total_samples_ < 1)
return 1;
1050 int freq_in_per128 =
1051 (frequencies_[in_character].counter() * 128) / total_samples_;
1052 return freq_in_per128;
1056 class CharacterFrequency {
1058 CharacterFrequency() : counter_(0), character_(-1) { }
1059 explicit CharacterFrequency(
int character)
1060 : counter_(0), character_(character) { }
1062 void Increment() { counter_++; }
1063 int counter() {
return counter_; }
1064 int character() {
return character_; }
1080 RegExpCompiler(
int capture_count,
bool ignore_case,
bool is_ascii,
1085 reg_exp_too_big_ =
true;
1086 return next_register_;
1088 return next_register_++;
1098 static const int kImplementationOffset = 0;
1099 static const int kNumberOfRegistersOffset = 0;
1100 static const int kCodeOffset = 1;
1105 static const int kMaxRecursion = 100;
1118 current_expansion_factor_ = value;
1129 int recursion_depth_;
1133 bool reg_exp_too_big_;
1134 int current_expansion_factor_;
1151 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
1152 return RegExpEngine::CompilationResult(
"RegExp too big");
1160 : next_register_(2 * (capture_count + 1)),
1162 recursion_depth_(0),
1163 ignore_case_(ignore_case),
1165 reg_exp_too_big_(
false),
1166 current_expansion_factor_(1),
1167 frequency_collator_(),
1179 Heap* heap = pattern->GetHeap();
1181 bool use_slow_safe_regexp_compiler =
false;
1186 use_slow_safe_regexp_compiler =
true;
1189 macro_assembler->
set_slow_safe(use_slow_safe_regexp_compiler);
1192 if (FLAG_trace_regexp_assembler)
1199 work_list_ = &work_list;
1203 start->
Emit(
this, &new_trace);
1204 macro_assembler_->
Bind(&fail);
1205 macro_assembler_->
Fail();
1206 while (!work_list.is_empty()) {
1207 work_list.RemoveLast()->Emit(
this, &new_trace);
1209 if (reg_exp_too_big_)
return IrregexpRegExpTooBig();
1215 if (FLAG_print_code) {
1218 if (FLAG_trace_regexp_assembler) {
1219 delete macro_assembler_;
1231 return reg() == that;
1239 action = action->next()) {
1240 if (action->Mentions(reg))
1251 action = action->next()) {
1252 if (action->Mentions(reg)) {
1265 int Trace::FindAffectedRegisters(
OutSet* affected_registers,
1268 for (DeferredAction* action = actions_;
1270 action = action->next()) {
1272 Interval range =
static_cast<DeferredClearCaptures*
>(action)->range();
1273 for (
int i = range.
from(); i <= range.
to(); i++)
1274 affected_registers->Set(i, zone);
1275 if (range.
to() > max_register) max_register = range.
to();
1277 affected_registers->Set(action->reg(), zone);
1278 if (action->reg() > max_register) max_register = action->reg();
1281 return max_register;
1285 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
1287 OutSet& registers_to_pop,
1288 OutSet& registers_to_clear) {
1289 for (
int reg = max_register; reg >= 0; reg--) {
1290 if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
1291 else if (registers_to_clear.Get(reg)) {
1293 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
1296 assembler->ClearRegisters(reg, clear_to);
1302 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
1304 OutSet& affected_registers,
1305 OutSet* registers_to_pop,
1306 OutSet* registers_to_clear,
1309 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1314 for (
int reg = 0; reg <= max_register; reg++) {
1315 if (!affected_registers.Get(reg)) {
1322 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1323 DeferredActionUndoType undo_action = IGNORE;
1326 bool absolute =
false;
1328 int store_position = -1;
1331 for (DeferredAction* action = actions_;
1333 action = action->next()) {
1334 if (action->Mentions(reg)) {
1335 switch (action->type()) {
1337 Trace::DeferredSetRegister* psr =
1338 static_cast<Trace::DeferredSetRegister*
>(action);
1340 value += psr->value();
1348 undo_action = RESTORE;
1359 undo_action = RESTORE;
1362 Trace::DeferredCapture*
pc =
1363 static_cast<Trace::DeferredCapture*
>(action);
1364 if (!clear && store_position == -1) {
1365 store_position = pc->cp_offset();
1376 undo_action = IGNORE;
1378 undo_action = pc->is_capture() ? CLEAR : RESTORE;
1388 if (store_position == -1) {
1391 undo_action = RESTORE;
1403 if (undo_action == RESTORE) {
1407 if (pushes == push_limit) {
1412 assembler->PushRegister(reg, stack_check);
1413 registers_to_pop->Set(reg, zone);
1414 }
else if (undo_action == CLEAR) {
1415 registers_to_clear->Set(reg, zone);
1419 if (store_position != -1) {
1420 assembler->WriteCurrentPositionToRegister(reg, store_position);
1422 assembler->ClearRegisters(reg, reg);
1423 }
else if (absolute) {
1424 assembler->SetRegister(reg, value);
1425 }
else if (value != 0) {
1426 assembler->AdvanceRegister(reg, value);
1447 successor->
Emit(compiler, &new_state);
1452 OutSet affected_registers;
1461 int max_register = FindAffectedRegisters(&affected_registers,
1464 OutSet registers_to_clear;
1465 PerformDeferredActions(assembler,
1469 ®isters_to_clear,
1471 if (cp_offset_ != 0) {
1479 successor->
Emit(compiler, &new_state);
1482 assembler->
Bind(&undo);
1483 RestoreAffectedRegisters(assembler,
1486 registers_to_clear);
1501 if (!label()->is_bound()) {
1504 assembler->
Bind(label());
1511 if (clear_capture_count_ > 0) {
1514 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
1515 assembler->
ClearRegisters(clear_capture_start_, clear_capture_end);
1525 trace->
Flush(compiler,
this);
1529 if (!label()->is_bound()) {
1530 assembler->
Bind(label());
1539 case NEGATIVE_SUBMATCH_SUCCESS:
1548 if (guards_ ==
NULL)
1550 guards_->
Add(guard, zone);
1567 new(on_success->
zone())
ActionNode(INCREMENT_REGISTER, on_success);
1599 result->data_.
u_submatch.stack_pointer_register = stack_reg;
1600 result->data_.
u_submatch.current_position_register = position_reg;
1607 int clear_register_count,
1608 int clear_register_from,
1611 new(on_success->
zone())
ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1612 result->data_.
u_submatch.stack_pointer_register = stack_reg;
1613 result->data_.
u_submatch.current_position_register = position_reg;
1614 result->data_.
u_submatch.clear_register_count = clear_register_count;
1615 result->data_.
u_submatch.clear_register_from = clear_register_from;
1621 int repetition_register,
1622 int repetition_limit,
1625 new(on_success->
zone())
ActionNode(EMPTY_MATCH_CHECK, on_success);
1633 #define DEFINE_ACCEPT(Type) \
1634 void Type##Node::Accept(NodeVisitor* visitor) { \
1635 visitor->Visit##Type(this); \
1638 #undef DEFINE_ACCEPT
1653 switch (guard->
op()) {
1672 static int GetCaseIndependentLetters(Isolate* isolate,
1677 isolate->jsregexp_uncanonicalize()->get(character,
'\0', letters);
1681 letters[0] = character;
1693 static inline bool EmitSimpleCharacter(Isolate* isolate,
1694 RegExpCompiler* compiler,
1700 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1701 bool bound_checked =
false;
1703 assembler->LoadCurrentCharacter(
1707 bound_checked =
true;
1709 assembler->CheckNotCharacter(c, on_failure);
1710 return bound_checked;
1716 static inline bool EmitAtomNonLetter(Isolate* isolate,
1717 RegExpCompiler* compiler,
1723 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1724 bool ascii = compiler->ascii();
1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1732 bool checked =
false;
1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1743 macro_assembler->CheckNotCharacter(c, on_failure);
1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1753 Label* on_failure) {
1760 uc16 exor = c1 ^ c2;
1762 if (((exor - 1) & exor) == 0) {
1766 uc16 mask = char_mask ^ exor;
1767 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1771 uc16 diff = c2 - c1;
1772 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1777 uc16 mask = char_mask ^ diff;
1778 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1798 static inline bool EmitAtomLetter(
Isolate* isolate,
1806 bool ascii = compiler->
ascii();
1808 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1809 if (length <= 1)
return false;
1819 if (ShortCutEmitCharacterPair(macro_assembler,
1827 macro_assembler->
Bind(&ok);
1838 macro_assembler->
Bind(&ok);
1848 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1850 Label* fall_through,
1851 Label* above_or_equal,
1853 if (below != fall_through) {
1854 masm->CheckCharacterLT(border, below);
1855 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1857 masm->CheckCharacterGT(border - 1, above_or_equal);
1862 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1865 Label* fall_through,
1867 Label* out_of_range) {
1868 if (in_range == fall_through) {
1869 if (first == last) {
1870 masm->CheckNotCharacter(first, out_of_range);
1872 masm->CheckCharacterNotInRange(first, last, out_of_range);
1875 if (first == last) {
1876 masm->CheckCharacter(first, in_range);
1878 masm->CheckCharacterInRange(first, last, in_range);
1880 if (out_of_range != fall_through) masm->GoTo(out_of_range);
1887 static void EmitUseLookupTable(
1888 RegExpMacroAssembler* masm,
1889 ZoneList<int>* ranges,
1893 Label* fall_through,
1899 int base = (min_char & ~kMask);
1903 for (
int i = start_index; i <= end_index; i++) {
1904 ASSERT_EQ(ranges->at(i) & ~kMask, base);
1906 ASSERT(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1910 Label* on_bit_clear;
1912 if (even_label == fall_through) {
1913 on_bit_set = odd_label;
1914 on_bit_clear = even_label;
1917 on_bit_set = even_label;
1918 on_bit_clear = odd_label;
1921 for (
int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
1926 for (
int i = start_index; i < end_index; i++) {
1927 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1932 for (
int i = j; i < kSize; i++) {
1937 for (
int i = 0; i < kSize; i++) {
1938 ba->set(i, templ[i]);
1940 masm->CheckBitInTable(ba, on_bit_set);
1941 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1945 static void CutOutRange(RegExpMacroAssembler* masm,
1946 ZoneList<int>* ranges,
1952 bool odd = (((cut_index - start_index) & 1) == 1);
1953 Label* in_range_label = odd ? odd_label : even_label;
1955 EmitDoubleBoundaryTest(masm,
1956 ranges->at(cut_index),
1957 ranges->at(cut_index + 1) - 1,
1961 ASSERT(!dummy.is_linked());
1965 for (
int j = cut_index; j > start_index; j--) {
1966 ranges->at(j) = ranges->at(j - 1);
1968 for (
int j = cut_index + 1; j < end_index; j++) {
1969 ranges->at(j) = ranges->at(j + 1);
1976 static void SplitSearchSpace(ZoneList<int>* ranges,
1979 int* new_start_index,
1985 int first = ranges->at(start_index);
1986 int last = ranges->at(end_index) - 1;
1988 *new_start_index = start_index;
1989 *border = (ranges->at(start_index) & ~kMask) + kSize;
1990 while (*new_start_index < end_index) {
1991 if (ranges->at(*new_start_index) > *border)
break;
1992 (*new_start_index)++;
2005 int binary_chop_index = (end_index + start_index) / 2;
2011 end_index - start_index > (*new_start_index - start_index) * 2 &&
2012 last - first > kSize * 2 &&
2013 binary_chop_index > *new_start_index &&
2014 ranges->at(binary_chop_index) >= first + 2 * kSize) {
2015 int scan_forward_for_section_border = binary_chop_index;;
2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
2018 while (scan_forward_for_section_border < end_index) {
2019 if (ranges->at(scan_forward_for_section_border) > new_border) {
2020 *new_start_index = scan_forward_for_section_border;
2021 *border = new_border;
2024 scan_forward_for_section_border++;
2028 ASSERT(*new_start_index > start_index);
2029 *new_end_index = *new_start_index - 1;
2030 if (ranges->at(*new_end_index) == *border) {
2033 if (*border >= ranges->at(end_index)) {
2034 *border = ranges->at(end_index);
2035 *new_start_index = end_index;
2036 *new_end_index = end_index - 1;
2047 static void GenerateBranches(RegExpMacroAssembler* masm,
2048 ZoneList<int>* ranges,
2053 Label* fall_through,
2056 int first = ranges->at(start_index);
2057 int last = ranges->at(end_index) - 1;
2063 if (start_index == end_index) {
2064 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
2070 if (start_index + 1 == end_index) {
2071 EmitDoubleBoundaryTest(
2072 masm, first, last, fall_through, even_label, odd_label);
2078 if (end_index - start_index <= 6) {
2081 static int kNoCutIndex = -1;
2082 int cut = kNoCutIndex;
2083 for (
int i = start_index; i < end_index; i++) {
2084 if (ranges->at(i) == ranges->at(i + 1) - 1) {
2089 if (cut == kNoCutIndex) cut = start_index;
2091 masm, ranges, start_index, end_index, cut, even_label, odd_label);
2093 GenerateBranches(masm,
2109 if ((max_char >> kBits) == (min_char >> kBits)) {
2110 EmitUseLookupTable(masm,
2121 if ((min_char >> kBits) != (first >> kBits)) {
2122 masm->CheckCharacterLT(first, odd_label);
2123 GenerateBranches(masm,
2135 int new_start_index = 0;
2136 int new_end_index = 0;
2139 SplitSearchSpace(ranges,
2147 Label*
above = &handle_rest;
2148 if (border == last + 1) {
2151 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
2152 ASSERT(new_end_index == end_index - 1);
2157 ASSERT_LT(start_index, new_start_index);
2159 ASSERT(new_end_index + 1 == new_start_index ||
2160 (new_end_index + 2 == new_start_index &&
2161 border == ranges->at(new_end_index + 1)));
2164 ASSERT_LT(ranges->at(new_end_index), border);
2165 ASSERT(border < ranges->at(new_start_index) ||
2166 (border == ranges->at(new_start_index) &&
2167 new_start_index == end_index &&
2168 new_end_index == end_index - 1 &&
2169 border == last + 1));
2170 ASSERT(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
2172 masm->CheckCharacterGT(border - 1, above);
2174 GenerateBranches(masm,
2183 if (handle_rest.is_linked()) {
2184 masm->Bind(&handle_rest);
2185 bool flip = (new_start_index & 1) != (start_index & 1);
2186 GenerateBranches(masm,
2193 flip ? odd_label : even_label,
2194 flip ? even_label : odd_label);
2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2200 RegExpCharacterClass*
cc,
2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2219 int range_count = ranges->length();
2221 int last_valid_range = range_count - 1;
2222 while (last_valid_range >= 0) {
2223 CharacterRange& range = ranges->at(last_valid_range);
2224 if (range.from() <= max_char) {
2230 if (last_valid_range < 0) {
2231 if (!cc->is_negated()) {
2232 macro_assembler->GoTo(on_failure);
2235 macro_assembler->CheckPosition(cp_offset, on_failure);
2240 if (last_valid_range == 0 &&
2241 ranges->at(0).IsEverything(max_char)) {
2242 if (cc->is_negated()) {
2243 macro_assembler->GoTo(on_failure);
2247 macro_assembler->CheckPosition(cp_offset, on_failure);
2252 if (last_valid_range == 0 &&
2253 !cc->is_negated() &&
2254 ranges->at(0).IsEverything(max_char)) {
2257 macro_assembler->CheckPosition(cp_offset, on_failure);
2263 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2266 if (cc->is_standard(zone) &&
2267 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2279 ZoneList<int>* range_boundaries =
2280 new(zone) ZoneList<int>(last_valid_range, zone);
2282 bool zeroth_entry_is_failure = !cc->is_negated();
2284 for (
int i = 0; i <= last_valid_range; i++) {
2285 CharacterRange& range = ranges->at(i);
2286 if (range.from() == 0) {
2288 zeroth_entry_is_failure = !zeroth_entry_is_failure;
2290 range_boundaries->Add(range.from(), zone);
2292 range_boundaries->Add(range.to() + 1, zone);
2294 int end_index = range_boundaries->length() - 1;
2295 if (range_boundaries->at(end_index) > max_char) {
2300 GenerateBranches(macro_assembler,
2307 zeroth_entry_is_failure ? &fall_through : on_failure,
2308 zeroth_entry_is_failure ? on_failure : &fall_through);
2309 macro_assembler->Bind(&fall_through);
2326 if (label_.is_bound()) {
2329 macro_assembler->
GoTo(&label_);
2336 macro_assembler->
GoTo(&label_);
2340 macro_assembler->
Bind(&label_);
2347 if (FLAG_regexp_optimization &&
2348 trace_count_ < kMaxCopiesCodeGenerated &&
2356 trace->
Flush(compiler,
this);
2362 int recursion_depth,
2363 bool not_at_start) {
2365 if (type_ == POSITIVE_SUBMATCH_SUCCESS)
return 0;
2366 return on_success()->EatsAtLeast(still_to_find,
2367 recursion_depth + 1,
2373 int recursion_depth,
2376 bool not_at_start) {
2377 if (type_ == BEGIN_SUBMATCH) {
2379 }
else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2380 on_success()->FillInBMInfo(
2381 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2383 SaveBMInfo(bm, not_at_start, offset);
2388 int recursion_depth,
2389 bool not_at_start) {
2396 if (type() == AT_START && not_at_start)
return still_to_find;
2397 return on_success()->EatsAtLeast(still_to_find,
2398 recursion_depth + 1,
2404 int recursion_depth,
2407 bool not_at_start) {
2409 if (type() == AT_START && not_at_start)
return;
2410 on_success()->FillInBMInfo(
2411 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2412 SaveBMInfo(bm, not_at_start, offset);
2417 int recursion_depth,
2418 bool not_at_start) {
2420 return on_success()->EatsAtLeast(still_to_find,
2421 recursion_depth + 1,
2427 int recursion_depth,
2428 bool not_at_start) {
2429 int answer = Length();
2430 if (answer >= still_to_find)
return answer;
2433 return answer + on_success()->EatsAtLeast(still_to_find - answer,
2434 recursion_depth + 1,
2440 int recursion_depth,
2441 bool not_at_start) {
2445 RegExpNode* node = alternatives_->at(1).node();
2446 return node->
EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
2454 bool not_at_start) {
2457 RegExpNode* node = alternatives_->at(1).node();
2463 int recursion_depth,
2465 bool not_at_start) {
2468 int choice_count = alternatives_->length();
2469 for (
int i = 0; i < choice_count; i++) {
2470 RegExpNode* node = alternatives_->at(i).node();
2471 if (node == ignore_this_node)
continue;
2472 int node_eats_at_least = node->
EatsAtLeast(still_to_find,
2473 recursion_depth + 1,
2475 if (node_eats_at_least < min) min = node_eats_at_least;
2482 int recursion_depth,
2483 bool not_at_start) {
2484 return EatsAtLeastHelper(still_to_find,
2492 int recursion_depth,
2493 bool not_at_start) {
2494 return EatsAtLeastHelper(still_to_find,
2502 static inline uint32_t SmearBitsRight(uint32_t v) {
2513 bool found_useful_op =
false;
2523 for (
int i = 0; i < characters_; i++) {
2526 found_useful_op =
true;
2528 mask_ |= (pos->
mask & char_mask) << char_shift;
2529 value_ |= (pos->
value & char_mask) << char_shift;
2530 char_shift += asc ? 8 : 16;
2532 return found_useful_op;
2538 bool preload_has_checked_bounds,
2539 Label* on_possible_success,
2541 bool fall_through_on_failure) {
2542 if (details->
characters() == 0)
return false;
2548 uint32_t mask = details->
mask();
2549 uint32_t value = details->
value();
2556 !preload_has_checked_bounds,
2561 bool need_mask =
true;
2567 if (compiler->
ascii()) {
2572 if ((mask & char_mask) == char_mask) need_mask =
false;
2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask =
false;
2580 if ((mask & 0xffff) == 0xffff) need_mask =
false;
2582 if (mask == 0xffffffff) need_mask =
false;
2586 if (fall_through_on_failure) {
2613 int characters_filled_in,
2614 bool not_at_start) {
2615 Isolate* isolate = Isolate::Current();
2616 ASSERT(characters_filled_in < details->characters());
2619 if (compiler->
ascii()) {
2624 for (
int k = 0; k < elms_->length(); k++) {
2628 for (
int i = 0; i < characters && i < quarks.
length(); i++) {
2630 details->
positions(characters_filled_in);
2632 if (c > char_mask) {
2643 int length = GetCaseIndependentLetters(isolate, c, compiler->
ascii(),
2650 pos->
mask = char_mask;
2654 uint32_t common_bits = char_mask;
2655 uint32_t bits = chars[0];
2656 for (
int j = 1; j < length; j++) {
2657 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2658 common_bits ^= differing_bits;
2659 bits &= common_bits;
2665 uint32_t one_zero = (common_bits | ~char_mask);
2666 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2669 pos->
mask = common_bits;
2676 pos->
mask = char_mask;
2680 characters_filled_in++;
2681 ASSERT(characters_filled_in <= details->characters());
2682 if (characters_filled_in == details->
characters()) {
2688 details->
positions(characters_filled_in);
2699 int first_range = 0;
2700 while (ranges->
at(first_range).from() > char_mask) {
2702 if (first_range == ranges->length()) {
2711 if (to > char_mask) {
2714 uint32_t differing_bits = (from ^ to);
2717 if ((differing_bits & (differing_bits + 1)) == 0 &&
2718 from + differing_bits == to) {
2721 uint32_t common_bits = ~SmearBitsRight(differing_bits);
2722 uint32_t bits = (from & common_bits);
2723 for (
int i = first_range + 1; i < ranges->length(); i++) {
2727 if (from > char_mask)
continue;
2728 if (to > char_mask) to = char_mask;
2735 uint32_t new_common_bits = (from ^ to);
2736 new_common_bits = ~SmearBitsRight(new_common_bits);
2737 common_bits &= new_common_bits;
2738 bits &= new_common_bits;
2739 uint32_t differing_bits = (from & common_bits) ^ bits;
2740 common_bits ^= differing_bits;
2741 bits &= common_bits;
2743 pos->
mask = common_bits;
2746 characters_filled_in++;
2747 ASSERT(characters_filled_in <= details->characters());
2748 if (characters_filled_in == details->
characters()) {
2755 on_success()-> GetQuickCheckDetails(details,
2757 characters_filled_in,
2764 for (
int i = 0; i < characters_; i++) {
2765 positions_[i].mask = 0;
2766 positions_[i].value = 0;
2767 positions_[i].determines_perfectly =
false;
2775 if (by >= characters_) {
2779 for (
int i = 0; i < characters_ - by; i++) {
2780 positions_[i] = positions_[by + i];
2782 for (
int i = characters_ - by; i < characters_; i++) {
2783 positions_[i].mask = 0;
2784 positions_[i].value = 0;
2785 positions_[i].determines_perfectly =
false;
2795 ASSERT(characters_ == other->characters_);
2796 if (other->cannot_match_) {
2799 if (cannot_match_) {
2803 for (
int i = from_index; i < characters_; i++) {
2806 if (pos->
mask != other_pos->
mask ||
2817 pos->
mask &= ~differing_bits;
2830 info_->visited =
false;
2838 if (info()->replacement_calculated)
return replacement();
2839 if (depth < 0)
return this;
2840 ASSERT(!info()->visited);
2842 return FilterSuccessor(depth - 1);
2848 if (next ==
NULL)
return set_replacement(
NULL);
2850 return set_replacement(
this);
2855 if (info()->replacement_calculated)
return replacement();
2856 if (depth < 0)
return this;
2857 ASSERT(!info()->visited);
2859 int element_count = elms_->length();
2860 for (
int i = 0; i < element_count; i++) {
2864 for (
int j = 0; j < quarks.
length(); j++) {
2869 return set_replacement(
NULL);
2880 int range_count = ranges->length();
2882 if (range_count != 0 &&
2883 ranges->
at(0).from() == 0 &&
2885 return set_replacement(
NULL);
2888 if (range_count == 0 ||
2890 return set_replacement(
NULL);
2895 return FilterSuccessor(depth - 1);
2900 if (info()->replacement_calculated)
return replacement();
2901 if (depth < 0)
return this;
2902 if (info()->visited)
return this;
2909 if (continue_replacement ==
NULL)
return set_replacement(
NULL);
2917 if (info()->replacement_calculated)
return replacement();
2918 if (depth < 0)
return this;
2919 if (info()->visited)
return this;
2921 int choice_count = alternatives_->length();
2923 for (
int i = 0; i < choice_count; i++) {
2926 set_replacement(
this);
2933 for (
int i = 0; i < choice_count; i++) {
2936 ASSERT(replacement !=
this);
2937 if (replacement !=
NULL) {
2938 alternatives_->at(i).set_node(replacement);
2940 survivor = replacement;
2943 if (surviving < 2)
return set_replacement(survivor);
2945 set_replacement(
this);
2946 if (surviving == choice_count) {
2953 for (
int i = 0; i < choice_count; i++) {
2955 alternatives_->at(i).node()->
FilterASCII(depth - 1);
2956 if (replacement !=
NULL) {
2957 alternatives_->at(i).set_node(replacement);
2958 new_alternatives->
Add(alternatives_->at(i), zone());
2961 alternatives_ = new_alternatives;
2967 if (info()->replacement_calculated)
return replacement();
2968 if (depth < 0)
return this;
2969 if (info()->visited)
return this;
2973 RegExpNode* node = alternatives_->at(1).node();
2975 if (replacement ==
NULL)
return set_replacement(
NULL);
2976 alternatives_->at(1).set_node(replacement);
2978 RegExpNode* neg_node = alternatives_->at(0).node();
2982 if (neg_replacement ==
NULL)
return set_replacement(replacement);
2983 alternatives_->at(0).set_node(neg_replacement);
2984 return set_replacement(
this);
2990 int characters_filled_in,
2991 bool not_at_start) {
2992 if (body_can_be_zero_length_ || info()->visited)
return;
2996 characters_filled_in,
3002 int recursion_depth,
3005 bool not_at_start) {
3006 if (body_can_be_zero_length_ ||
3010 SaveBMInfo(bm, not_at_start, offset);
3014 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
3015 SaveBMInfo(bm, not_at_start, offset);
3021 int characters_filled_in,
3022 bool not_at_start) {
3023 not_at_start = (not_at_start || not_at_start_);
3024 int choice_count = alternatives_->length();
3025 ASSERT(choice_count > 0);
3026 alternatives_->at(0).node()->GetQuickCheckDetails(details,
3028 characters_filled_in,
3030 for (
int i = 1; i < choice_count; i++) {
3032 RegExpNode* node = alternatives_->at(i).node();
3034 characters_filled_in,
3037 details->
Merge(&new_details, characters_filled_in);
3046 bool fall_through_on_word) {
3048 fall_through_on_word ?
'w' :
'W',
3049 fall_through_on_word ? non_word : word)) {
3059 if (fall_through_on_word) {
3069 static void EmitHat(RegExpCompiler* compiler,
3070 RegExpNode* on_success,
3072 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3075 Trace new_trace(*trace);
3076 new_trace.InvalidateCurrentCharacter();
3079 if (new_trace.cp_offset() == 0) {
3082 assembler->CheckAtStart(&ok);
3086 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
3087 new_trace.backtrack(),
3089 if (!assembler->CheckSpecialCharacterClass(
'n',
3090 new_trace.backtrack())) {
3092 if (!compiler->ascii()) {
3093 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
3095 assembler->CheckCharacter(
'\n', &ok);
3096 assembler->CheckNotCharacter(
'\r', new_trace.backtrack());
3098 assembler->Bind(&ok);
3099 on_success->Emit(compiler, &new_trace);
3104 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler,
Trace* trace) {
3105 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3107 bool not_at_start = (trace->at_start() ==
Trace::FALSE);
3108 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3109 if (lookahead ==
NULL) {
3113 if (eats_at_least >= 1) {
3114 BoyerMooreLookahead* bm =
3115 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
3116 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
3117 if (bm->at(0)->is_non_word()) next_is_word_character =
Trace::FALSE;
3118 if (bm->at(0)->is_word()) next_is_word_character =
Trace::TRUE;
3121 if (lookahead->at(0)->is_non_word()) next_is_word_character =
Trace::FALSE;
3122 if (lookahead->at(0)->is_word()) next_is_word_character =
Trace::TRUE;
3126 Label before_non_word;
3128 if (trace->characters_preloaded() != 1) {
3129 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
3132 EmitWordCheck(assembler, &before_word, &before_non_word,
false);
3134 assembler->Bind(&before_non_word);
3136 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3137 assembler->GoTo(&ok);
3139 assembler->Bind(&before_word);
3140 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3141 assembler->Bind(&ok);
3142 }
else if (next_is_word_character ==
Trace::TRUE) {
3143 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3146 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3151 void AssertionNode::BacktrackIfPrevious(
3152 RegExpCompiler* compiler,
3154 AssertionNode::IfPrevious backtrack_if_previous) {
3155 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3156 Trace new_trace(*trace);
3157 new_trace.InvalidateCurrentCharacter();
3159 Label fall_through, dummy;
3161 Label* non_word = backtrack_if_previous == kIsNonWord ?
3162 new_trace.backtrack() :
3164 Label* word = backtrack_if_previous == kIsNonWord ?
3166 new_trace.backtrack();
3168 if (new_trace.cp_offset() == 0) {
3171 assembler->CheckAtStart(non_word);
3175 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy,
false);
3176 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
3178 assembler->Bind(&fall_through);
3179 on_success()->Emit(compiler, &new_trace);
3186 bool not_at_start) {
3187 if (type_ == AT_START && not_at_start) {
3191 return on_success()->GetQuickCheckDetails(details,
3205 assembler->
Bind(&ok);
3215 Trace at_start_trace = *trace;
3217 on_success()->Emit(compiler, &at_start_trace);
3223 EmitHat(compiler, on_success(), trace);
3226 case AT_NON_BOUNDARY: {
3227 EmitBoundaryCheck(compiler, trace);
3231 on_success()->Emit(compiler, trace);
3236 if (quick_check ==
NULL)
return false;
3237 if (offset >= quick_check->
characters())
return false;
3242 static void UpdateBoundsCheck(
int index,
int* checked_up_to) {
3243 if (index > *checked_up_to) {
3244 *checked_up_to = index;
3278 void TextNode::TextEmitPass(RegExpCompiler* compiler,
3279 TextEmitPassType pass,
3282 bool first_element_checked,
3283 int* checked_up_to) {
3284 Isolate* isolate = Isolate::Current();
3285 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3286 bool ascii = compiler->ascii();
3288 QuickCheckDetails* quick_check = trace->quick_check_performed();
3289 int element_count = elms_->length();
3290 for (
int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3291 TextElement elm = elms_->at(i);
3292 int cp_offset = trace->cp_offset() + elm.cp_offset;
3294 Vector<const uc16> quarks = elm.data.u_atom->data();
3295 for (
int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3296 if (first_element_checked && i == 0 && j == 0)
continue;
3297 if (DeterminedAlready(quick_check, elm.cp_offset + j))
continue;
3300 case NON_ASCII_MATCH:
3303 assembler->GoTo(backtrack);
3307 case NON_LETTER_CHARACTER_MATCH:
3308 emit_function = &EmitAtomNonLetter;
3310 case SIMPLE_CHARACTER_MATCH:
3311 emit_function = &EmitSimpleCharacter;
3313 case CASE_CHARACTER_MATCH:
3314 emit_function = &EmitAtomLetter;
3319 if (emit_function !=
NULL) {
3320 bool bound_checked = emit_function(isolate,
3325 *checked_up_to < cp_offset + j,
3327 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3332 if (pass == CHARACTER_CLASS_MATCH) {
3333 if (first_element_checked && i == 0)
continue;
3334 if (DeterminedAlready(quick_check, elm.cp_offset))
continue;
3335 RegExpCharacterClass* cc = elm.data.u_char_class;
3336 EmitCharClass(assembler,
3341 *checked_up_to < cp_offset,
3344 UpdateBoundsCheck(cp_offset, checked_up_to);
3351 int TextNode::Length() {
3352 TextElement elm = elms_->last();
3353 ASSERT(elm.cp_offset >= 0);
3355 return elm.cp_offset + elm.data.u_atom->data().length();
3357 return elm.cp_offset + 1;
3362 bool TextNode::SkipPass(
int int_pass,
bool ignore_case) {
3363 TextEmitPassType pass =
static_cast<TextEmitPassType
>(int_pass);
3365 return pass == SIMPLE_CHARACTER_MATCH;
3367 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3379 LimitResult limit_result = LimitVersions(compiler, trace);
3380 if (limit_result ==
DONE)
return;
3381 ASSERT(limit_result == CONTINUE);
3388 if (compiler->
ascii()) {
3390 TextEmitPass(compiler, NON_ASCII_MATCH,
false, trace,
false, &dummy);
3393 bool first_elt_done =
false;
3394 int bound_checked_to = trace->
cp_offset() - 1;
3400 for (
int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3402 TextEmitPass(compiler,
3403 static_cast<TextEmitPassType>(pass),
3410 first_elt_done =
true;
3413 for (
int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3415 TextEmitPass(compiler,
3416 static_cast<TextEmitPassType>(pass),
3424 Trace successor_trace(*trace);
3428 on_success()->Emit(compiler, &successor_trace);
3433 characters_preloaded_ = 0;
3442 characters_preloaded_ = 0;
3452 bound_checked_up_to_ =
Max(0, bound_checked_up_to_ - by);
3457 int element_count = elms_->length();
3458 for (
int i = 0; i < element_count; i++) {
3466 int range_count = ranges->length();
3467 for (
int j = 0; j < range_count; j++) {
3468 ranges->
at(j).AddCaseEquivalents(ranges, is_ascii, zone());
3487 if (elms_->length() != 1)
return NULL;
3496 return ranges->length() == 0 ? on_success() :
NULL;
3498 if (ranges->length() != 1)
return NULL;
3500 if (compiler->
ascii()) {
3505 return ranges->
at(0).IsEverything(max_char) ? on_success() :
NULL;
3519 int recursion_depth = 0;
3520 while (node !=
this) {
3522 return kNodeIsTooComplexForGreedyLoops;
3525 if (node_length == kNodeIsTooComplexForGreedyLoops) {
3526 return kNodeIsTooComplexForGreedyLoops;
3528 length += node_length;
3538 AddAlternative(alt);
3539 loop_node_ = alt.
node();
3545 AddAlternative(alt);
3546 continue_node_ = alt.
node();
3554 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3555 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
3565 trace->
Flush(compiler,
this);
3572 int ChoiceNode::CalculatePreloadCharacters(
RegExpCompiler* compiler,
3573 int eats_at_least) {
3574 int preload_characters =
Min(4, eats_at_least);
3576 bool ascii = compiler->
ascii();
3578 if (preload_characters > 4) preload_characters = 4;
3582 if (preload_characters == 3) preload_characters = 2;
3584 if (preload_characters > 2) preload_characters = 2;
3587 if (preload_characters > 1) preload_characters = 1;
3589 return preload_characters;
3598 : possible_success(),
3599 expects_preload(
false),
3601 quick_check_details() { }
3614 : alt_gens_(count, zone) {
3615 for (
int i = 0; i < count && i < kAFew; i++) {
3616 alt_gens_.Add(a_few_alt_gens_ + i, zone);
3618 for (
int i = kAFew; i < count; i++) {
3623 for (
int i = kAFew; i < alt_gens_.length(); i++) {
3624 delete alt_gens_[i];
3625 alt_gens_[i] =
NULL;
3630 return alt_gens_[i];
3634 static const int kAFew = 10;
3641 static const int kSpaceRanges[] = {
'\t',
'\r' + 1,
' ',
' ' + 1, 0x00A0,
3642 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,
3643 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };
3644 static const int kSpaceRangeCount =
ARRAY_SIZE(kSpaceRanges);
3646 static const int kWordRanges[] = {
3647 '0',
'9' + 1,
'A',
'Z' + 1,
'_',
'_' + 1,
'a',
'z' + 1, 0x10000 };
3648 static const int kWordRangeCount =
ARRAY_SIZE(kWordRanges);
3649 static const int kDigitRanges[] = {
'0',
'9' + 1, 0x10000 };
3650 static const int kDigitRangeCount =
ARRAY_SIZE(kDigitRanges);
3651 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
3652 static const int kSurrogateRangeCount =
ARRAY_SIZE(kSurrogateRanges);
3653 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
3654 0x2028, 0x202A, 0x10000 };
3655 static const int kLineTerminatorRangeCount =
ARRAY_SIZE(kLineTerminatorRanges);
3659 SetInterval(
Interval(character, character));
3664 s_ =
AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3665 w_ =
AddRange(w_, kWordRanges, kWordRangeCount, interval);
3666 d_ =
AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3668 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
3669 if (interval.
to() - interval.
from() >= kMapSize - 1) {
3670 if (map_count_ != kMapSize) {
3671 map_count_ = kMapSize;
3672 for (
int i = 0; i < kMapSize; i++) map_->at(i) =
true;
3676 for (
int i = interval.
from(); i <= interval.
to(); i++) {
3677 int mod_character = (i & kMask);
3678 if (!map_->at(mod_character)) {
3680 map_->at(mod_character) =
true;
3682 if (map_count_ == kMapSize)
return;
3689 if (map_count_ != kMapSize) {
3690 map_count_ = kMapSize;
3691 for (
int i = 0; i < kMapSize; i++) map_->at(i) =
true;
3699 compiler_(compiler) {
3700 if (compiler->
ascii()) {
3706 for (
int i = 0; i <
length; i++) {
3715 bool BoyerMooreLookahead::FindWorthwhileInterval(
int* from,
int* to) {
3716 int biggest_points = 0;
3719 const int kMaxMax = 32;
3720 for (
int max_number_of_chars = 4;
3721 max_number_of_chars < kMaxMax;
3722 max_number_of_chars *= 2) {
3724 FindBestInterval(max_number_of_chars, biggest_points, from, to);
3726 if (biggest_points == 0)
return false;
3737 int BoyerMooreLookahead::FindBestInterval(
3738 int max_number_of_chars,
int old_biggest_points,
int* from,
int* to) {
3739 int biggest_points = old_biggest_points;
3741 for (
int i = 0; i < length_; ) {
3742 while (i < length_ &&
Count(i) > max_number_of_chars) i++;
3743 if (i == length_)
break;
3744 int remembered_from = i;
3745 bool union_map[kSize];
3746 for (
int j = 0; j < kSize; j++) union_map[j] =
false;
3747 while (i < length_ &&
Count(i) <= max_number_of_chars) {
3748 BoyerMoorePositionInfo* map = bitmaps_->at(i);
3749 for (
int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
3753 for (
int j = 0; j < kSize; j++) {
3768 bool in_quickcheck_range = ((i - remembered_from < 4) ||
3769 (compiler_->
ascii() ? remembered_from <= 4 : remembered_from <= 2));
3772 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3773 int points = (i - remembered_from) * probability;
3774 if (points > biggest_points) {
3775 *from = remembered_from;
3777 biggest_points = points;
3780 return biggest_points;
3789 int BoyerMooreLookahead::GetSkipTable(
int min_lookahead,
3791 Handle<ByteArray> boolean_skip_table) {
3794 const int kSkipArrayEntry = 0;
3795 const int kDontSkipArrayEntry = 1;
3797 for (
int i = 0; i < kSize; i++) {
3798 boolean_skip_table->set(i, kSkipArrayEntry);
3800 int skip = max_lookahead + 1 - min_lookahead;
3802 for (
int i = max_lookahead; i >= min_lookahead; i--) {
3803 BoyerMoorePositionInfo* map = bitmaps_->at(i);
3804 for (
int j = 0; j < kSize; j++) {
3806 boolean_skip_table->set(j, kDontSkipArrayEntry);
3819 int min_lookahead = 0;
3820 int max_lookahead = 0;
3822 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead))
return false;
3824 bool found_single_character =
false;
3825 int single_character = 0;
3826 for (
int i = max_lookahead; i >= min_lookahead; i--) {
3829 (found_single_character && map->
map_count() != 0)) {
3830 found_single_character =
false;
3833 for (
int j = 0; j < kSize; j++) {
3835 found_single_character =
true;
3836 single_character = j;
3842 int lookahead_width = max_lookahead + 1 - min_lookahead;
3844 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3849 if (found_single_character) {
3853 if (max_char_ > kSize) {
3868 int skip_distance = GetSkipTable(
3869 min_lookahead, max_lookahead, boolean_skip_table);
3870 ASSERT(skip_distance != 0);
3966 for (
int i = 0; i < choice_count - 1; i++) {
3969 int guard_count = (guards ==
NULL) ? 0 : guards->length();
3970 for (
int j = 0; j < guard_count; j++) {
3977 if (limit_result ==
DONE)
return;
3980 int new_flush_budget = trace->
flush_budget() / choice_count;
3982 trace->
Flush(compiler,
this);
3988 Trace* current_trace = trace;
3991 bool greedy_loop =
false;
3992 Label greedy_loop_label;
3993 Trace counter_backtrack_trace;
4008 current_trace = &counter_backtrack_trace;
4009 Label greedy_match_failed;
4010 Trace greedy_match_trace;
4014 macro_assembler->
Bind(&loop_label);
4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
4018 macro_assembler->
Bind(&greedy_match_failed);
4021 Label second_choice;
4022 macro_assembler->
Bind(&second_choice);
4024 int first_normal_choice = greedy_loop ? 1 : 0;
4027 const int kEatsAtLeastNotYetInitialized = -1;
4028 int eats_at_least = kEatsAtLeastNotYetInitialized;
4030 bool skip_was_emitted =
false;
4032 if (!greedy_loop && choice_count == 2) {
4047 if (lookahead ==
NULL) {
4051 if (eats_at_least >= 1) {
4061 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
4067 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
4071 int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
4073 bool preload_is_current = !skip_was_emitted &&
4075 bool preload_has_checked_bounds = preload_is_current;
4081 for (
int i = first_normal_choice; i < choice_count; i++) {
4086 int guard_count = (guards ==
NULL) ? 0 : guards->length();
4087 Trace new_trace(*current_trace);
4089 preload_characters :
4091 if (preload_has_checked_bounds) {
4097 bool generate_full_check_inline =
false;
4098 if (FLAG_regexp_optimization &&
4102 preload_has_checked_bounds,
4105 i < choice_count - 1)) {
4107 preload_is_current =
true;
4108 preload_has_checked_bounds =
true;
4112 if (i == choice_count - 1) {
4117 generate_full_check_inline =
true;
4120 if (i == choice_count - 1 && !greedy_loop) {
4130 if (i != first_normal_choice) {
4134 if (i < choice_count - 1) {
4137 generate_full_check_inline =
true;
4139 if (generate_full_check_inline) {
4143 for (
int j = 0; j < guard_count; j++) {
4144 GenerateGuard(macro_assembler, guards->
at(j), &new_trace);
4146 alternative.
node()->
Emit(compiler, &new_trace);
4147 preload_is_current =
false;
4152 macro_assembler->
Bind(&greedy_loop_label);
4157 macro_assembler->
GoTo(&second_choice);
4163 for (
int i = first_normal_choice; i < choice_count - 1; i++) {
4165 Trace new_trace(*current_trace);
4172 EmitOutOfLineContinuation(compiler,
4182 void ChoiceNode::EmitOutOfLineContinuation(
RegExpCompiler* compiler,
4186 int preload_characters,
4187 bool next_expects_preload) {
4192 Trace out_of_line_trace(*trace);
4193 out_of_line_trace.set_characters_preloaded(preload_characters);
4195 if (not_at_start_) out_of_line_trace.set_at_start(
Trace::FALSE);
4197 int guard_count = (guards ==
NULL) ? 0 : guards->length();
4198 if (next_expects_preload) {
4199 Label reload_current_char;
4200 out_of_line_trace.set_backtrack(&reload_current_char);
4201 for (
int j = 0; j < guard_count; j++) {
4202 GenerateGuard(macro_assembler, guards->
at(j), &out_of_line_trace);
4204 alternative.
node()->
Emit(compiler, &out_of_line_trace);
4205 macro_assembler->
Bind(&reload_current_char);
4212 preload_characters);
4213 macro_assembler->
GoTo(&(alt_gen->
after));
4215 out_of_line_trace.set_backtrack(&(alt_gen->
after));
4216 for (
int j = 0; j < guard_count; j++) {
4217 GenerateGuard(macro_assembler, guards->
at(j), &out_of_line_trace);
4219 alternative.
node()->
Emit(compiler, &out_of_line_trace);
4227 if (limit_result ==
DONE)
return;
4235 new_capture(data_.u_position_register.reg,
4236 data_.u_position_register.is_capture,
4238 Trace new_trace = *trace;
4245 new_increment(data_.u_increment_register.reg);
4246 Trace new_trace = *trace;
4253 new_set(data_.u_store_register.reg, data_.u_store_register.value);
4254 Trace new_trace = *trace;
4261 new_capture(
Interval(data_.u_clear_captures.range_from,
4262 data_.u_clear_captures.range_to));
4263 Trace new_trace = *trace;
4270 trace->
Flush(compiler,
this);
4273 data_.u_submatch.current_position_register, 0);
4275 data_.u_submatch.stack_pointer_register);
4280 int start_pos_reg = data_.u_empty_match_check.start_register;
4282 int rep_reg = data_.u_empty_match_check.repetition_register;
4285 if (know_dist && !has_minimum && stored_pos == trace->
cp_offset()) {
4289 }
else if (know_dist && stored_pos < trace->cp_offset()) {
4294 trace->
Flush(compiler,
this);
4296 Label skip_empty_check;
4300 int limit = data_.u_empty_match_check.repetition_limit;
4301 assembler->
IfRegisterLT(rep_reg, limit, &skip_empty_check);
4307 assembler->
Bind(&skip_empty_check);
4314 trace->
Flush(compiler,
this);
4318 data_.u_submatch.current_position_register);
4320 data_.u_submatch.stack_pointer_register);
4322 if (clear_register_count == 0) {
4326 int clear_registers_from = data_.u_submatch.clear_register_from;
4327 Label clear_registers_backtrack;
4328 Trace new_trace = *trace;
4332 assembler->
Bind(&clear_registers_backtrack);
4333 int clear_registers_to = clear_registers_from + clear_register_count - 1;
4334 assembler->
ClearRegisters(clear_registers_from, clear_registers_to);
4349 trace->
Flush(compiler,
this);
4354 if (limit_result ==
DONE)
return;
4379 explicit DotPrinter(
bool ignore_case)
4380 : ignore_case_(ignore_case),
4381 stream_(&alloc_) { }
4382 void PrintNode(
const char* label, RegExpNode* node);
4383 void Visit(RegExpNode* node);
4384 void PrintAttributes(RegExpNode* from);
4385 StringStream* stream() {
return &stream_; }
4386 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
4387 #define DECLARE_VISIT(Type) \
4388 virtual void Visit##Type(Type##Node* that);
4390 #undef DECLARE_VISIT
4393 HeapStringAllocator alloc_;
4394 StringStream stream_;
4398 void DotPrinter::PrintNode(
const char* label, RegExpNode* node) {
4399 stream()->Add(
"digraph G {\n graph [label=\"");
4400 for (
int i = 0; label[i]; i++) {
4403 stream()->Add(
"\\\\");
4406 stream()->Add(
"\"");
4409 stream()->Put(label[i]);
4413 stream()->Add(
"\"];\n");
4415 stream()->Add(
"}\n");
4420 void DotPrinter::Visit(RegExpNode* node) {
4421 if (node->info()->visited)
return;
4422 node->info()->visited =
true;
4427 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
4428 stream()->Add(
" n%p -> n%p [style=dotted];\n", from, on_failure);
4433 class TableEntryBodyPrinter {
4435 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
4436 : stream_(stream), choice_(choice) { }
4437 void Call(
uc16 from, DispatchTable::Entry entry) {
4438 OutSet* out_set = entry.out_set();
4440 if (out_set->Get(i)) {
4441 stream()->Add(
" n%p:s%io%i -> n%p;\n",
4445 choice()->alternatives()->at(i).node());
4450 StringStream* stream() {
return stream_; }
4451 ChoiceNode* choice() {
return choice_; }
4452 StringStream* stream_;
4453 ChoiceNode* choice_;
4457 class TableEntryHeaderPrinter {
4459 explicit TableEntryHeaderPrinter(StringStream* stream)
4460 : first_(
true), stream_(stream) { }
4461 void Call(
uc16 from, DispatchTable::Entry entry) {
4467 stream()->Add(
"{\\%k-\\%k|{", from, entry.to());
4468 OutSet* out_set = entry.out_set();
4471 if (out_set->Get(i)) {
4472 if (priority > 0) stream()->Add(
"|");
4473 stream()->Add(
"<s%io%i> %i", from, i, priority);
4477 stream()->Add(
"}}");
4482 StringStream* stream() {
return stream_; }
4483 StringStream* stream_;
4487 class AttributePrinter {
4489 explicit AttributePrinter(DotPrinter* out)
4490 : out_(out), first_(
true) { }
4491 void PrintSeparator() {
4495 out_->stream()->Add(
"|");
4498 void PrintBit(
const char* name,
bool value) {
4501 out_->stream()->Add(
"{%s}", name);
4503 void PrintPositive(
const char* name,
int value) {
4504 if (value < 0)
return;
4506 out_->stream()->Add(
"{%s|%x}", name, value);
4514 void DotPrinter::PrintAttributes(RegExpNode* that) {
4515 stream()->Add(
" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
4516 "margin=0.1, fontsize=10, label=\"{",
4518 AttributePrinter printer(
this);
4519 NodeInfo* info = that->info();
4520 printer.PrintBit(
"NI", info->follows_newline_interest);
4521 printer.PrintBit(
"WI", info->follows_word_interest);
4522 printer.PrintBit(
"SI", info->follows_start_interest);
4523 Label* label = that->label();
4524 if (label->is_bound())
4525 printer.PrintPositive(
"@", label->pos());
4526 stream()->Add(
"}\"];\n");
4527 stream()->Add(
" a%p -> n%p [style=dashed, color=grey, "
4528 "arrowhead=none];\n", that, that);
4532 static const bool kPrintDispatchTable =
false;
4533 void DotPrinter::VisitChoice(ChoiceNode* that) {
4534 if (kPrintDispatchTable) {
4535 stream()->Add(
" n%p [shape=Mrecord, label=\"", that);
4536 TableEntryHeaderPrinter header_printer(stream());
4537 that->GetTable(ignore_case_)->ForEach(&header_printer);
4538 stream()->Add(
"\"]\n", that);
4539 PrintAttributes(that);
4540 TableEntryBodyPrinter body_printer(stream(), that);
4541 that->GetTable(ignore_case_)->ForEach(&body_printer);
4543 stream()->Add(
" n%p [shape=Mrecord, label=\"?\"];\n", that);
4544 for (
int i = 0; i < that->alternatives()->length(); i++) {
4545 GuardedAlternative alt = that->alternatives()->at(i);
4546 stream()->Add(
" n%p -> n%p;\n", that, alt.node());
4549 for (
int i = 0; i < that->alternatives()->length(); i++) {
4550 GuardedAlternative alt = that->alternatives()->at(i);
4551 alt.node()->Accept(
this);
4556 void DotPrinter::VisitText(TextNode* that) {
4557 Zone* zone = that->zone();
4558 stream()->Add(
" n%p [label=\"", that);
4559 for (
int i = 0; i < that->elements()->length(); i++) {
4560 if (i > 0) stream()->Add(
" ");
4561 TextElement elm = that->elements()->at(i);
4564 stream()->Add(
"'%w'", elm.data.u_atom->data());
4568 RegExpCharacterClass* node = elm.data.u_char_class;
4570 if (node->is_negated())
4572 for (
int j = 0; j < node->ranges(zone)->length(); j++) {
4573 CharacterRange range = node->ranges(zone)->at(j);
4574 stream()->Add(
"%k-%k", range.from(), range.to());
4583 stream()->Add(
"\", shape=box, peripheries=2];\n");
4584 PrintAttributes(that);
4585 stream()->Add(
" n%p -> n%p;\n", that, that->on_success());
4586 Visit(that->on_success());
4590 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
4591 stream()->Add(
" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
4593 that->start_register(),
4594 that->end_register());
4595 PrintAttributes(that);
4596 stream()->Add(
" n%p -> n%p;\n", that, that->on_success());
4597 Visit(that->on_success());
4601 void DotPrinter::VisitEnd(EndNode* that) {
4602 stream()->Add(
" n%p [style=bold, shape=point];\n", that);
4603 PrintAttributes(that);
4607 void DotPrinter::VisitAssertion(AssertionNode* that) {
4608 stream()->Add(
" n%p [", that);
4609 switch (that->type()) {
4611 stream()->Add(
"label=\"$\", shape=septagon");
4614 stream()->Add(
"label=\"^\", shape=septagon");
4617 stream()->Add(
"label=\"\\b\", shape=septagon");
4620 stream()->Add(
"label=\"\\B\", shape=septagon");
4623 stream()->Add(
"label=\"(?<=\\n)\", shape=septagon");
4626 stream()->Add(
"];\n");
4627 PrintAttributes(that);
4628 RegExpNode* successor = that->on_success();
4629 stream()->Add(
" n%p -> n%p;\n", that, successor);
4634 void DotPrinter::VisitAction(ActionNode* that) {
4635 stream()->Add(
" n%p [", that);
4636 switch (that->type_) {
4638 stream()->Add(
"label=\"$%i:=%i\", shape=octagon",
4639 that->data_.u_store_register.reg,
4640 that->data_.u_store_register.value);
4643 stream()->Add(
"label=\"$%i++\", shape=octagon",
4644 that->data_.u_increment_register.reg);
4647 stream()->Add(
"label=\"$%i:=$pos\", shape=octagon",
4648 that->data_.u_position_register.reg);
4651 stream()->Add(
"label=\"$%i:=$pos,begin\", shape=septagon",
4652 that->data_.u_submatch.current_position_register);
4655 stream()->Add(
"label=\"escape\", shape=septagon");
4658 stream()->Add(
"label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
4659 that->data_.u_empty_match_check.start_register,
4660 that->data_.u_empty_match_check.repetition_register,
4661 that->data_.u_empty_match_check.repetition_limit);
4664 stream()->Add(
"label=\"clear $%i to $%i\", shape=septagon",
4665 that->data_.u_clear_captures.range_from,
4666 that->data_.u_clear_captures.range_to);
4670 stream()->Add(
"];\n");
4671 PrintAttributes(that);
4672 RegExpNode* successor = that->on_success();
4673 stream()->Add(
" n%p -> n%p;\n", that, successor);
4678 class DispatchTableDumper {
4680 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
4681 void Call(
uc16 key, DispatchTable::Entry entry);
4682 StringStream* stream() {
return stream_; }
4684 StringStream* stream_;
4688 void DispatchTableDumper::Call(
uc16 key, DispatchTable::Entry entry) {
4689 stream()->Add(
"[%k-%k]: {", key, entry.to());
4690 OutSet* set = entry.out_set();
4697 stream()->Add(
", ");
4699 stream()->Add(
"%i", i);
4702 stream()->Add(
"}\n");
4707 HeapStringAllocator alloc;
4708 StringStream stream(&alloc);
4709 DispatchTableDumper dumper(&stream);
4710 tree()->ForEach(&dumper);
4718 DotPrinter printer(ignore_case);
4719 printer.PrintNode(label, node);
4734 return new(compiler->
zone())
TextNode(elms, on_success);
4745 const int* special_class,
4748 ASSERT(special_class[length] == 0x10000);
4749 ASSERT(ranges->length() != 0);
4751 ASSERT(special_class[0] != 0);
4752 if (ranges->length() != (length >> 1) + 1) {
4755 CharacterRange range = ranges->
at(0);
4756 if (range.from() != 0) {
4759 for (
int i = 0; i < length; i += 2) {
4760 if (special_class[i] != (range.to() + 1)) {
4763 range = ranges->
at((i >> 1) + 1);
4764 if (special_class[i+1] != range.from()) {
4768 if (range.to() != 0xffff) {
4775 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4776 const int* special_class,
4779 ASSERT(special_class[length] == 0x10000);
4780 if (ranges->length() * 2 != length) {
4783 for (
int i = 0; i < length; i += 2) {
4784 CharacterRange range = ranges->at(i >> 1);
4785 if (range.from() != special_class[i] ||
4786 range.to() != special_class[i + 1] - 1) {
4800 if (set_.is_standard()) {
4803 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4804 set_.set_standard_set_type(
's');
4807 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4808 set_.set_standard_set_type(
'S');
4811 if (CompareInverseRanges(set_.ranges(zone),
4812 kLineTerminatorRanges,
4813 kLineTerminatorRangeCount)) {
4814 set_.set_standard_set_type(
'.');
4817 if (CompareRanges(set_.ranges(zone),
4818 kLineTerminatorRanges,
4819 kLineTerminatorRangeCount)) {
4820 set_.set_standard_set_type(
'n');
4823 if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4824 set_.set_standard_set_type(
'w');
4827 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4828 set_.set_standard_set_type(
'W');
4837 return new(compiler->
zone())
TextNode(
this, on_success);
4844 int length = alternatives->length();
4847 for (
int i = 0; i < length; i++) {
4873 : compiler_(compiler),
4874 saved_expansion_factor_(compiler->current_expansion_factor()),
4877 if (ok_to_expand_) {
4880 ok_to_expand_ =
false;
4883 int new_factor = saved_expansion_factor_ * factor;
4898 int saved_expansion_factor_;
4911 bool not_at_start) {
4932 static const int kMaxUnrolledMinMatches = 3;
4933 static const int kMaxUnrolledMaxMatches = 3;
4934 if (max == 0)
return on_success;
4935 bool body_can_be_empty = (body->
min_match() == 0);
4938 bool needs_capture_clearing = !capture_registers.
is_empty();
4941 if (body_can_be_empty) {
4943 }
else if (FLAG_regexp_optimization && !needs_capture_clearing) {
4948 compiler, min + ((max != min) ? 1 : 0));
4949 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.
ok_to_expand()) {
4950 int new_max = (max ==
kInfinity) ? max : max - min;
4954 0, new_max, is_greedy, body, compiler, on_success,
true);
4958 for (
int i = 0; i <
min; i++) {
4959 answer = body->
ToNode(compiler, answer);
4964 if (max <= kMaxUnrolledMaxMatches && min == 0) {
4970 for (
int i = 0; i <
max; i++) {
4981 answer = alternation;
4988 bool has_min = min > 0;
4990 bool needs_counter = has_min || has_max;
4991 int reg_ctr = needs_counter
4999 : static_cast<RegExpNode*>(center);
5000 if (body_can_be_empty) {
5009 if (body_can_be_empty) {
5014 if (needs_capture_clearing) {
5022 body_alt.
AddGuard(body_guard, zone);
5027 rest_alt.
AddGuard(rest_guard, zone);
5036 if (needs_counter) {
5082 stack_pointer_register,
5101 return new(compiler->
zone())
5119 const int registers_per_capture = 2;
5120 const int register_of_first_capture = 2;
5121 int register_count = capture_count_ * registers_per_capture;
5122 int register_start =
5123 register_of_first_capture + capture_from_ * registers_per_capture;
5128 stack_pointer_register,
5192 for (
int i = children->length() - 1; i >= 0; i--) {
5193 current = children->
at(i)->ToNode(compiler, current);
5199 static void AddClass(
const int* elmv,
5204 ASSERT(elmv[elmc] == 0x10000);
5205 for (
int i = 0; i < elmc; i += 2) {
5206 ASSERT(elmv[i] < elmv[i + 1]);
5212 static void AddClassNegated(
const int *elmv,
5214 ZoneList<CharacterRange>* ranges,
5217 ASSERT(elmv[elmc] == 0x10000);
5218 ASSERT(elmv[0] != 0x0000);
5221 for (
int i = 0; i < elmc; i += 2) {
5222 ASSERT(last <= elmv[i] - 1);
5223 ASSERT(elmv[i] < elmv[i + 1]);
5224 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
5236 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5239 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5242 AddClass(kWordRanges, kWordRangeCount, ranges, zone);
5245 AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
5248 AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
5251 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
5254 AddClassNegated(kLineTerminatorRanges,
5255 kLineTerminatorRangeCount,
5268 AddClass(kLineTerminatorRanges,
5269 kLineTerminatorRangeCount,
5289 : included_(included),
5290 excluded_(excluded),
5322 for (
int i = 0; i < base->length(); i++)
5324 for (
int i = 0; i < overlay.
length(); i += 2) {
5336 Isolate* isolate = Isolate::Current();
5344 if (top == bottom) {
5347 for (
int i = 0; i < length; i++) {
5348 uc32 chr = chars[i];
5349 if (chr != bottom) {
5374 while (pos <= top) {
5381 block_end = range[0];
5383 int end = (block_end > top) ? top : block_end;
5385 for (
int i = 0; i < length; i++) {
5387 uc16 range_from = c - (block_end - pos);
5388 uc16 range_to = c - (block_end - end);
5389 if (!(bottom <= range_from && range_to <= top)) {
5401 int n = ranges->length();
5402 if (n <= 1)
return true;
5403 int max = ranges->
at(0).to();
5404 for (
int i = 1; i < n; i++) {
5406 if (next_range.
from() <= max + 1)
return false;
5407 max = next_range.
to();
5414 if (ranges_ ==
NULL) {
5424 static void MoveRanges(ZoneList<CharacterRange>* list,
5430 for (
int i = count - 1; i >= 0; i--) {
5431 list->at(to + i) = list->at(from + i);
5434 for (
int i = 0; i < count; i++) {
5435 list->at(to + i) = list->at(from + i);
5441 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
5443 CharacterRange insert) {
5449 uc16 from = insert.from();
5450 uc16 to = insert.to();
5452 int end_pos = count;
5453 for (
int i = count - 1; i >= 0; i--) {
5454 CharacterRange current = list->at(i);
5455 if (current.from() > to + 1) {
5457 }
else if (current.to() + 1 < from) {
5470 if (start_pos == end_pos) {
5472 if (start_pos < count) {
5473 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
5475 list->at(start_pos) = insert;
5478 if (start_pos + 1 == end_pos) {
5480 CharacterRange to_replace = list->at(start_pos);
5481 int new_from =
Min(to_replace.from(), from);
5482 int new_to =
Max(to_replace.to(), to);
5483 list->at(start_pos) = CharacterRange(new_from, new_to);
5489 int new_from =
Min(list->at(start_pos).from(), from);
5490 int new_to =
Max(list->at(end_pos - 1).to(), to);
5491 if (end_pos < count) {
5492 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
5494 list->at(start_pos) = CharacterRange(new_from, new_to);
5495 return count - (end_pos - start_pos) + 1;
5502 if (ranges_ ==
NULL)
return;
5508 if (character_ranges->length() <= 1)
return;
5511 int n = character_ranges->length();
5512 int max = character_ranges->
at(0).to();
5516 if (current.
from() <= max + 1) {
5531 int num_canonical = i;
5533 num_canonical = InsertRangeInCanonicalList(character_ranges,
5535 character_ranges->
at(read));
5538 character_ranges->Rewind(num_canonical);
5549 int range_count = ranges->length();
5552 if (range_count > 0 && ranges->
at(0).from() == 0) {
5553 from = ranges->
at(0).to();
5556 while (i < range_count) {
5576 if (successors(zone) !=
NULL) {
5577 for (
int i = 0; i < successors(zone)->length(); i++) {
5578 OutSet* successor = successors(zone)->at(i);
5579 if (successor->
Get(value))
5586 result->Set(value, zone);
5587 successors(zone)->Add(result, zone);
5592 void OutSet::Set(
unsigned value,
Zone *zone) {
5594 first_ |= (1 << value);
5596 if (remaining_ ==
NULL)
5597 remaining_ =
new(zone) ZoneList<unsigned>(1, zone);
5598 if (remaining_->is_empty() || !remaining_->
Contains(value))
5599 remaining_->
Add(value, zone);
5606 return (first_ & (1 << value)) != 0;
5607 }
else if (remaining_ ==
NULL) {
5610 return remaining_->
Contains(value);
5621 if (tree()->is_empty()) {
5626 empty()->
Extend(value, zone)));
5632 if (tree()->FindGreatestLessThan(current.
from(), &loc)) {
5633 Entry* entry = &loc.value();
5638 if (entry->
from() < current.
from() && entry->
to() >= current.
from()) {
5645 entry->
set_to(left.to());
5651 loc.set_value(
Entry(right.from(),
5657 if (tree()->FindLeastGreaterThan(current.
from(), &loc) &&
5658 (loc.value().from() <= current.
to()) &&
5659 (loc.value().to() >= current.
from())) {
5660 Entry* entry = &loc.value();
5664 if (current.
from() < entry->
from()) {
5669 empty()->
Extend(value, zone)));
5675 if (entry->
to() > current.
to()) {
5678 ins.set_value(
Entry(current.
to() + 1,
5700 empty()->
Extend(value, zone)));
5709 if (!tree()->FindGreatestLessThan(value, &loc))
5711 Entry* entry = &loc.value();
5712 if (value <= entry->to())
5724 StackLimitCheck
check(Isolate::Current());
5725 if (check.HasOverflowed()) {
5726 fail(
"Stack overflow");
5738 void Analysis::VisitEnd(
EndNode* that) {
5744 int element_count =
elements()->length();
5748 for (
int i = 0; i < element_count; i++) {
5760 void Analysis::VisitText(
TextNode* that) {
5771 void Analysis::VisitAction(ActionNode* that) {
5772 RegExpNode* target = that->on_success();
5777 that->info()->AddFromFollowing(target->info());
5782 void Analysis::VisitChoice(ChoiceNode* that) {
5783 NodeInfo* info = that->info();
5784 for (
int i = 0; i < that->alternatives()->length(); i++) {
5785 RegExpNode* node = that->alternatives()->at(i).node();
5790 info->AddFromFollowing(node->info());
5797 for (
int i = 0; i < that->
alternatives()->length(); i++) {
5819 void Analysis::VisitAssertion(AssertionNode* that) {
5825 int recursion_depth,
5828 bool not_at_start) {
5841 int recursion_depth,
5844 bool not_at_start) {
5846 budget = (budget - 1) / alts->length();
5847 for (
int i = 0; i < alts->length(); i++) {
5855 offset, recursion_depth + 1, budget, bm, not_at_start);
5862 int recursion_depth,
5865 bool not_at_start) {
5866 if (initial_offset >= bm->
length())
return;
5867 int offset = initial_offset;
5869 for (
int i = 0; i <
elements()->length(); i++) {
5870 if (offset >= bm->
length()) {
5871 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
5877 for (
int j = 0; j < atom->
length(); j++, offset++) {
5878 if (offset >= bm->
length()) {
5879 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
5885 int length = GetCaseIndependentLetters(
5890 for (
int j = 0; j < length; j++) {
5891 bm->
Set(offset, chars[j]);
5894 if (character <= max_char) bm->
Set(offset, character);
5904 for (
int k = 0; k < ranges->length(); k++) {
5906 if (range.
from() > max_char)
continue;
5907 int to =
Min(max_char, static_cast<int>(range.
to()));
5914 if (offset >= bm->
length()) {
5915 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
5919 recursion_depth + 1,
5923 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
5931 void DispatchTableConstructor::VisitEnd(
EndNode* that) {
5939 for (
int i = 0; i < alternatives->length(); i++) {
5941 alternatives->
at(i).node()->Accept(
this);
5950 : constructor_(constructor) { }
5963 void DispatchTableConstructor::VisitChoice(
ChoiceNode* node) {
5972 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
5979 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
5980 RegExpNode* target = that->on_success();
5981 target->Accept(
this);
5985 static int CompareRangeByFrom(
const CharacterRange* a,
5986 const CharacterRange* b) {
5987 return Compare<uc16>(a->from(), b->from());
5992 ranges->
Sort(CompareRangeByFrom);
5994 for (
int i = 0; i < ranges->length(); i++) {
5996 if (last < range.
from())
5998 if (range.
to() >= last) {
6002 last = range.
to() + 1;
6010 void DispatchTableConstructor::VisitText(
TextNode* that) {
6020 ZoneList<CharacterRange>* ranges = tree->
ranges(that->
zone());
6021 if (tree->is_negated()) {
6024 for (
int i = 0; i < ranges->length(); i++)
6036 void DispatchTableConstructor::VisitAction(ActionNode* that) {
6037 RegExpNode* target = that->on_success();
6038 target->Accept(
this);
6052 return IrregexpRegExpTooBig();
6060 int chars_sampled = 0;
6061 int half_way = (sample_subject->length() -
kSampleSize) / 2;
6062 for (
int i =
Max(0, half_way);
6063 i < sample_subject->length() && chars_sampled <
kSampleSize;
6064 i++, chars_sampled++) {
6077 if (!is_start_anchored) {
6096 node = first_step_node;
6110 Analysis analysis(ignore_case, is_ascii);
6118 #ifndef V8_INTERPRETED_REGEXP
6125 #if V8_TARGET_ARCH_IA32
6128 #elif V8_TARGET_ARCH_X64
6131 #elif V8_TARGET_ARCH_ARM
6134 #elif V8_TARGET_ARCH_MIPS
6139 #else // V8_INTERPRETED_REGEXP
6142 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
6143 #endif // V8_INTERPRETED_REGEXP
6147 static const int kMaxBacksearchLimit = 1024;
6148 if (is_end_anchored &&
6149 !is_start_anchored &&
6150 max_length < kMaxBacksearchLimit) {
6151 macro_assembler.SetCurrentPositionFromEnd(max_length);
6155 macro_assembler.set_global_mode(
6161 return compiler.
Assemble(¯o_assembler,
void SetInterval(const Interval &interval)
ZoneList< TextElement > * elements()
AlternativeGenerationList(int count, Zone *zone)
virtual void WriteStackPointerToRegister(int reg)=0
static Handle< Object > New(Handle< JSFunction > func, int argc, Handle< Object > argv[], bool *pending_exception)
bool EmitCharacterFunction(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Failure * StackOverflow()
static bool ParseRegExp(FlatStringReader *input, bool multiline, RegExpCompileData *result, Zone *zone)
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
void FlattenString(Handle< String > string)
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
bool is_standard(Zone *zone)
static const int kIrregexpMaxRegisterCountIndex
double total_regexp_code_generated()
static const int kMaxAsciiCharCode
bool Contains(const T &elm) const
#define DECLARE_VISIT(type)
static Vector< const int > GetWordBounds()
#define ASSERT_RESULT(expr)
virtual void GoTo(Label *label)=0
static AssertionNode * AtStart(RegExpNode *on_success)
const char * ToCString(const v8::String::Utf8Value &value)
void SetInterval(int map_number, const Interval &interval)
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler *assembler, RegExpNode *start, int capture_count, Handle< String > pattern)
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
void set(int index, Object *value)
CompilationCache * compilation_cache()
CharacterRangeSplitter(ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
void PrintF(const char *format,...)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
static String * cast(Object *obj)
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
static const int kMaxUtf16CodeUnit
void set_characters(int characters)
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
unibrow::Mapping< unibrow::Ecma262UnCanonicalize > * jsregexp_uncanonicalize()
VisitMarker(NodeInfo *info)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
virtual void AppendToText(RegExpText *text, Zone *zone)
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
static Smi * FromInt(int value)
struct v8::internal::ActionNode::@10::@12 u_increment_register
#define LOG(isolate, Call)
ZoneList< CharacterRange > * ranges(Zone *zone)
void set_at_start(bool at_start)
GlobalCache(Handle< JSRegExp > regexp, Handle< String > subject, bool is_global, Isolate *isolate)
virtual void CheckNotBackReference(int start_reg, Label *on_no_match)=0
#define ASSERT_NOT_NULL(p)
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
void set_loop_label(Label *label)
static Handle< T > cast(Handle< S > that)
static const int kJSRegexpStaticOffsetsVectorSize
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
virtual void ClearRegisters(int reg_from, int reg_to)=0
Position * positions(int index)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Vector< const char > ToAsciiVector()
static TextElement CharClass(RegExpCharacterClass *char_class)
virtual RegExpNode * FilterASCII(int depth)
void AddFromFollowing(NodeInfo *that)
void AddRange(CharacterRange range, int value, Zone *zone)
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)=0
static ByteArray * cast(Object *obj)
static int IrregexpNumberOfCaptures(FixedArray *re)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
virtual void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1)=0
static void Canonicalize(ZoneList< CharacterRange > *ranges)
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Handle< FixedArray > LookupRegExp(Handle< String > source, JSRegExp::Flags flags)
void EnsureAnalyzed(RegExpNode *node)
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
void set_flush_budget(int to)
static CharacterRange Everything()
virtual bool IsAnchoredAtEnd()
RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii, Zone *zone)
virtual bool try_to_emit_quick_check_for_alternative(int i)
void AddValue(int value, Zone *zone)
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
#define ASSERT(condition)
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
int current_expansion_factor()
#define ASSERT_GE(v1, v2)
void set_characters_preloaded(int count)
const int kPatternTooShortForBoyerMoore
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
const char * error_message()
RegExpCompiler * compiler()
void BuildTable(ChoiceNode *node)
virtual void ReadCurrentPositionFromRegister(int reg)=0
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
#define DEFINE_ACCEPT(Type)
ZoneList< TextElement > * elements()
void SetAll(int map_number)
virtual void PopCurrentPosition()=0
static Code * cast(Object *obj)
static void SetLastSubject(FixedArray *array, String *to)
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
static Smi * cast(Object *object)
Handle< String > FlattenGetString(Handle< String > string)
static const int kRegExpExecutableMemoryLimit
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
static const int kNoRegister
static const int kMaxExpansionFactor
virtual void AdvanceCurrentPosition(int by)=0
int get(uchar c, uchar n, uchar *result)
RegExpMacroAssembler * macro_assembler()
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
struct v8::internal::ActionNode::@10::@14 u_submatch
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
RegExpCharacterClass * u_char_class
SmartArrayPointer< char > ToCString(AllowNullsFlag allow_nulls, RobustnessFlag robustness_flag, int offset, int length, int *length_output=0)
RegExpNode * FilterSuccessor(int depth)
static void SetLastInput(FixedArray *array, String *to)
union v8::internal::TextElement::@9 data
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
virtual Interval CaptureRegisters()
static const int kRegWxpCompiledLimit
void Advance(int by, bool ascii)
void Merge(QuickCheckDetails *other, int from_index)
void IncrementRecursionDepth()
virtual bool CanReadUnaligned()
RegExpExpansionLimiter(RegExpCompiler *compiler, int factor)
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject)
void IncreaseTotalRegexpCodeGenerated(int size)
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
STATIC_ASSERT((FixedDoubleArray::kHeaderSize &kDoubleAlignmentMask)==0)
static const unsigned kFirstLimit
void AddAlternative(GuardedAlternative node)
static void SetLastCaptureCount(FixedArray *array, int to)
void fail(const char *error_message)
AlternativeGeneration * at(int i)
virtual void CheckCharacterGT(uc16 limit, Label *on_greater)=0
void Call(uc32 from, DispatchTable::Entry entry)
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
void set_bound_checked_up_to(int to)
void set_backtrack(Label *backtrack)
static const int kTableSizeBits
static const int kInfinity
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
virtual void Accept(NodeVisitor *visitor)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
v8::Handle< v8::Object > bottom
static const int kTableSize
MemoryAllocator * memory_allocator()
virtual RegExpNode * FilterASCII(int depth)
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
void set_slow_safe(bool ssc)
void AddWork(RegExpNode *node)
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
QuickCheckDetails * quick_check_performed()
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)=0
static Handle< JSArray > SetLastMatchInfo(Handle< JSArray > last_match_info, Handle< String > subject, int capture_count, int32_t *match)
void AddGuard(Guard *guard, Zone *zone)
static int StartRegister(int index)
int32_t * LastSuccessfulMatch()
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
void AddInverse(ZoneList< CharacterRange > *ranges)
struct v8::internal::ActionNode::@10::@13 u_position_register
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
virtual void VisitLoopChoice(LoopChoiceNode *that)
ZoneList< RegExpTree * > * alternatives()
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
virtual void ReadStackPointerFromRegister(int reg)=0
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags, Zone *zone)
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, Label *on_no_match)=0
struct v8::internal::ActionNode::@10::@15 u_empty_match_check
static const int kInOverlay
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
QuickCheckDetails quick_check_details
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Vector< const uc16 > ToUC16Vector()
virtual RegExpNode * FilterASCII(int depth)
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
const int kMaxLookaheadForBoyerMoore
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
static const int kMapSize
bool determines_perfectly
#define ASSERT_LE(v1, v2)
virtual void CheckNotAtStart(Label *on_not_at_start)=0
void add_action(DeferredAction *new_action)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
bool has_pending_exception()
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
static const int kMaxRegister
static const int kIrregexpCaptureCountIndex
void AddRange(CharacterRange range)
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
void set_being_calculated(bool b)
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
int characters_preloaded()
DeferredAction * actions()
activate correct semantics for inheriting readonliness false
static const int kNodeIsTooComplexForGreedyLoops
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
void PutRegExp(Handle< String > source, JSRegExp::Flags flags, Handle< FixedArray > data)
Vector< const char > CStrVector(const char *data)
DispatchTable * GetTable(bool ignore_case)
static int IrregexpMaxRegisterCount(FixedArray *re)
void CountCharacter(int character)
RegExpNode * on_success()
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
virtual void PushCurrentPosition()=0
static const int kTableMask
BoyerMooreLookahead * bm_info(bool not_at_start)
static int AtomExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
#define ASSERT_LT(v1, v2)
static const int kLastMatchOverhead
~AlternativeGenerationList()
void set_from(uc16 value)
FlatContent GetFlatContent()
virtual void AppendToText(RegExpText *text, Zone *zone)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
int bound_checked_up_to()
intptr_t SizeExecutable()
virtual int GreedyLoopTextLength()
virtual bool CheckSpecialCharacterClass(uc16 type, Label *on_no_match)
static const int kMaxRecursion
void AddContinueAlternative(GuardedAlternative alt)
bool GetStoredPosition(int reg, int *cp_offset)
static AssertionNode * AfterNewline(RegExpNode *on_success)
FrequencyCollator * frequency_collator()
void Sort(int(*cmp)(const T *x, const T *y))
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset)=0
static const int kMaxWidth
virtual void VisitLoopChoice(LoopChoiceNode *that)
static const int kAtomPatternIndex
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
virtual void Bind(Label *label)=0
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
int Count(int map_number)
void MakeCaseIndependent(bool is_ascii)
virtual void CheckCharacterLT(uc16 limit, Label *on_less)=0
ZoneList< GuardedAlternative > * alternatives()
virtual bool IsAnchoredAtStart()
static int code_index(bool is_ascii)
static void PrintError(const char *format,...)
void DecrementRecursionDepth()
static Handle< T > null()
int SearchString(Isolate *isolate, Vector< const SubjectChar > subject, Vector< const PatternChar > pattern, int start_index)
virtual RegExpNode * FilterASCII(int depth)
void SetRest(int from_map)
Vector< const uc16 > data()
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
virtual void Backtrack()=0
#define ASSERT_EQ(v1, v2)
void ForEach(Callback *callback)
virtual void AppendToText(RegExpText *text, Zone *zone)
OutSet * Extend(unsigned value, Zone *zone)
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
static const uchar kBadChar
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if expose natives in global object expose gc extension number of stack frames to capture disable builtin natives files print a stack trace if an assertion failure occurs use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations prepare for turning on always opt minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions automatically set the debug break flag when debugger commands are in the queue always cause a debug break before aborting maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print more details following each garbage collection print amount of external allocated memory after each time it is adjusted flush code that we expect not to use again before full gc do incremental marking steps track object counts and memory usage use caching Perform compaction on every full GC Never perform compaction on full GC testing only Compact code space on full incremental collections Default seed for initializing random allows verbose printing trace parsing and preparsing Check icache flushes in ARM and MIPS simulator Stack alingment in bytes in print stack trace when throwing exceptions randomize hashes to avoid predictable hash Fixed seed to use to hash property activate a timer that switches between V8 threads testing_bool_flag float flag Seed used for threading test randomness A filename with extra code to be included in the Print usage message
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
ZoneList< GuardedAlternative > * alternatives_
static const unsigned kMaxAsciiCharCodeU
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
virtual void IfRegisterEqPos(int reg, Label *if_eq)=0
virtual int GreedyLoopTextLength()
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
static FixedArray * cast(Object *obj)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
void set_current_expansion_factor(int value)
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
void Set(int map_number, int character)
static int IrregexpNumberOfRegisters(FixedArray *re)
virtual void CheckBitInTable(Handle< ByteArray > table, Label *on_bit_set)=0
virtual RegExpNode * FilterASCII(int depth)
unibrow::Mapping< unibrow::CanonicalizationRange > * jsregexp_canonrange()
struct v8::internal::ActionNode::@10::@16 u_clear_captures
static const int kCompilationErrorValue
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
virtual void AppendToText(RegExpText *text, Zone *zone)
bool Rationalize(bool ascii)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
void AddLoopAlternative(GuardedAlternative alt)
void set_choice_index(int value)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
virtual void Accept(NodeVisitor *visitor)=0
int EatsAtLeastHelper(int still_to_find, int recursion_depth, RegExpNode *ignore_this_node, bool not_at_start)
static TextElement Atom(RegExpAtom *atom)
ZoneList< Guard * > * guards()
static const int kFillInBMBudget
static int EndRegister(int index)
struct v8::internal::ActionNode::@10::@11 u_store_register
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
#define FOR_EACH_NODE_TYPE(VISIT)
void DeleteArray(T *array)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if expose natives in global object expose gc extension number of stack frames to capture disable builtin natives files print a stack trace if an assertion failure occurs use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations prepare for turning on always opt minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions automatically set the debug break flag when debugger commands are in the queue always cause a debug break before aborting maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print more details following each garbage collection print amount of external allocated memory after each time it is adjusted flush code that we expect not to use again before full gc do incremental marking steps track object counts and memory usage use caching Perform compaction on every full GC Never perform compaction on full GC testing only Compact code space on full incremental collections Default seed for initializing random allows verbose printing trace parsing and preparsing Check icache flushes in ARM and MIPS simulator Stack alingment in bytes in print stack trace when throwing exceptions randomize hashes to avoid predictable hash Fixed seed to use to hash property activate a timer that switches between V8 threads testing_bool_flag float flag Seed used for threading test randomness A filename with extra code to be included in the Print usage including flags
void Call(uc16 from, DispatchTable::Entry entry)
virtual int max_match()=0
~RegExpExpansionLimiter()
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
static const int kUninitializedValue
ZoneList< RegExpTree * > * nodes()
void set_quick_check_performed(QuickCheckDetails *d)
void check(i::Vector< const char > string)
void InvalidateCurrentCharacter()
virtual void CheckNotCharacter(unsigned c, Label *on_not_equal)=0
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
static int saved_code_index(bool is_ascii)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
int Frequency(int in_character)
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
RecursionCheck(RegExpCompiler *compiler)
static CharacterRange Singleton(uc16 value)
static AssertionNode * AtEnd(RegExpNode *on_success)
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
void AddElement(TextElement elm, Zone *zone)
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_not_equal)=0
virtual Handle< HeapObject > GetCode(Handle< String > source)=0
AddDispatchRange(DispatchTableConstructor *constructor)
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
void set_stop_node(RegExpNode *node)
virtual void PushBacktrack(Label *label)=0
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
virtual RegExpNode * FilterASCII(int depth)
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
static void SetCapture(FixedArray *array, int index, int to)
static AssertionNode * AtBoundary(RegExpNode *on_success)
bool mentions_reg(int reg)
static const int kMaxCPOffset