v8  3.25.30(node0.11.13)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
jsregexp.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 
30 #include "ast.h"
31 #include "compiler.h"
32 #include "execution.h"
33 #include "factory.h"
34 #include "jsregexp.h"
35 #include "jsregexp-inl.h"
36 #include "platform.h"
37 #include "string-search.h"
38 #include "runtime.h"
39 #include "compilation-cache.h"
40 #include "string-stream.h"
41 #include "parser.h"
42 #include "regexp-macro-assembler.h"
45 #include "regexp-stack.h"
46 
47 #ifndef V8_INTERPRETED_REGEXP
48 #if V8_TARGET_ARCH_IA32
50 #elif V8_TARGET_ARCH_X64
52 #elif V8_TARGET_ARCH_ARM64
54 #elif V8_TARGET_ARCH_ARM
56 #elif V8_TARGET_ARCH_MIPS
58 #else
59 #error Unsupported target architecture.
60 #endif
61 #endif
62 
63 #include "interpreter-irregexp.h"
64 
65 
66 namespace v8 {
67 namespace internal {
68 
70  Handle<String> pattern,
72  bool* has_pending_exception) {
73  // Call the construct code with 2 arguments.
74  Handle<Object> argv[] = { pattern, flags };
75  return Execution::New(constructor, ARRAY_SIZE(argv), argv,
76  has_pending_exception);
77 }
78 
79 
80 static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
81  int flags = JSRegExp::NONE;
82  for (int i = 0; i < str->length(); i++) {
83  switch (str->Get(i)) {
84  case 'i':
85  flags |= JSRegExp::IGNORE_CASE;
86  break;
87  case 'g':
88  flags |= JSRegExp::GLOBAL;
89  break;
90  case 'm':
91  flags |= JSRegExp::MULTILINE;
92  break;
93  }
94  }
95  return JSRegExp::Flags(flags);
96 }
97 
98 
99 static inline void ThrowRegExpException(Handle<JSRegExp> re,
100  Handle<String> pattern,
101  Handle<String> error_text,
102  const char* message) {
103  Isolate* isolate = re->GetIsolate();
104  Factory* factory = isolate->factory();
105  Handle<FixedArray> elements = factory->NewFixedArray(2);
106  elements->set(0, *pattern);
107  elements->set(1, *error_text);
108  Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
109  Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
110  isolate->Throw(*regexp_err);
111 }
112 
113 
115  const int* ranges,
116  int ranges_length,
117  Interval new_range) {
118  ASSERT((ranges_length & 1) == 1);
119  ASSERT(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);
120  if (containment == kLatticeUnknown) return containment;
121  bool inside = false;
122  int last = 0;
123  for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
124  // Consider the range from last to ranges[i].
125  // We haven't got to the new range yet.
126  if (ranges[i] <= new_range.from()) continue;
127  // New range is wholly inside last-ranges[i]. Note that new_range.to() is
128  // inclusive, but the values in ranges are not.
129  if (last <= new_range.from() && new_range.to() < ranges[i]) {
130  return Combine(containment, inside ? kLatticeIn : kLatticeOut);
131  }
132  return kLatticeUnknown;
133  }
134  return containment;
135 }
136 
137 
138 // More makes code generation slower, less makes V8 benchmark score lower.
140 // In a 3-character pattern you can maximally step forwards 3 characters
141 // at a time, which is not always enough to pay for the extra logic.
143 
144 
145 // Identifies the sort of regexps where the regexp engine is faster
146 // than the code used for atom matches.
147 static bool HasFewDifferentCharacters(Handle<String> pattern) {
148  int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
149  if (length <= kPatternTooShortForBoyerMoore) return false;
150  const int kMod = 128;
151  bool character_found[kMod];
152  int different = 0;
153  memset(&character_found[0], 0, sizeof(character_found));
154  for (int i = 0; i < length; i++) {
155  int ch = (pattern->Get(i) & (kMod - 1));
156  if (!character_found[ch]) {
157  character_found[ch] = true;
158  different++;
159  // We declare a regexp low-alphabet if it has at least 3 times as many
160  // characters as it has different characters.
161  if (different * 3 > length) return false;
162  }
163  }
164  return true;
165 }
166 
167 
168 // Generic RegExp methods. Dispatches to implementation specific methods.
169 
170 
172  Handle<String> pattern,
173  Handle<String> flag_str) {
174  Isolate* isolate = re->GetIsolate();
175  Zone zone(isolate);
176  JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
177  CompilationCache* compilation_cache = isolate->compilation_cache();
178  Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
179  bool in_cache = !cached.is_null();
180  LOG(isolate, RegExpCompileEvent(re, in_cache));
181 
182  Handle<Object> result;
183  if (in_cache) {
184  re->set_data(*cached);
185  return re;
186  }
187  pattern = FlattenGetString(pattern);
188  PostponeInterruptsScope postpone(isolate);
189  RegExpCompileData parse_result;
190  FlatStringReader reader(isolate, pattern);
191  if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
192  &parse_result, &zone)) {
193  // Throw an exception if we fail to parse the pattern.
194  ThrowRegExpException(re,
195  pattern,
196  parse_result.error,
197  "malformed_regexp");
198  return Handle<Object>::null();
199  }
200 
201  bool has_been_compiled = false;
202 
203  if (parse_result.simple &&
204  !flags.is_ignore_case() &&
205  !HasFewDifferentCharacters(pattern)) {
206  // Parse-tree is a single atom that is equal to the pattern.
207  AtomCompile(re, pattern, flags, pattern);
208  has_been_compiled = true;
209  } else if (parse_result.tree->IsAtom() &&
210  !flags.is_ignore_case() &&
211  parse_result.capture_count == 0) {
212  RegExpAtom* atom = parse_result.tree->AsAtom();
213  Vector<const uc16> atom_pattern = atom->data();
214  Handle<String> atom_string =
215  isolate->factory()->NewStringFromTwoByte(atom_pattern);
216  if (!HasFewDifferentCharacters(atom_string)) {
217  AtomCompile(re, pattern, flags, atom_string);
218  has_been_compiled = true;
219  }
220  }
221  if (!has_been_compiled) {
222  IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
223  }
224  ASSERT(re->data()->IsFixedArray());
225  // Compilation succeeded so the data is set on the regexp
226  // and we can store it in the cache.
227  Handle<FixedArray> data(FixedArray::cast(re->data()));
228  compilation_cache->PutRegExp(pattern, flags, data);
229 
230  return re;
231 }
232 
233 
235  Handle<String> subject,
236  int index,
237  Handle<JSArray> last_match_info) {
238  switch (regexp->TypeTag()) {
239  case JSRegExp::ATOM:
240  return AtomExec(regexp, subject, index, last_match_info);
241  case JSRegExp::IRREGEXP: {
242  Handle<Object> result =
243  IrregexpExec(regexp, subject, index, last_match_info);
244  ASSERT(!result.is_null() ||
245  regexp->GetIsolate()->has_pending_exception());
246  return result;
247  }
248  default:
249  UNREACHABLE();
250  return Handle<Object>::null();
251  }
252 }
253 
254 
255 // RegExp Atom implementation: Simple string search using indexOf.
256 
257 
259  Handle<String> pattern,
260  JSRegExp::Flags flags,
261  Handle<String> match_pattern) {
262  re->GetIsolate()->factory()->SetRegExpAtomData(re,
264  pattern,
265  flags,
266  match_pattern);
267 }
268 
269 
270 static void SetAtomLastCapture(FixedArray* array,
271  String* subject,
272  int from,
273  int to) {
274  SealHandleScope shs(array->GetIsolate());
276  RegExpImpl::SetLastSubject(array, subject);
277  RegExpImpl::SetLastInput(array, subject);
278  RegExpImpl::SetCapture(array, 0, from);
279  RegExpImpl::SetCapture(array, 1, to);
280 }
281 
282 
284  Handle<String> subject,
285  int index,
286  int32_t* output,
287  int output_size) {
288  Isolate* isolate = regexp->GetIsolate();
289 
290  ASSERT(0 <= index);
291  ASSERT(index <= subject->length());
292 
293  if (!subject->IsFlat()) FlattenString(subject);
294  DisallowHeapAllocation no_gc; // ensure vectors stay valid
295 
296  String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
297  int needle_len = needle->length();
298  ASSERT(needle->IsFlat());
299  ASSERT_LT(0, needle_len);
300 
301  if (index + needle_len > subject->length()) {
302  return RegExpImpl::RE_FAILURE;
303  }
304 
305  for (int i = 0; i < output_size; i += 2) {
306  String::FlatContent needle_content = needle->GetFlatContent();
307  String::FlatContent subject_content = subject->GetFlatContent();
308  ASSERT(needle_content.IsFlat());
309  ASSERT(subject_content.IsFlat());
310  // dispatch on type of strings
311  index = (needle_content.IsAscii()
312  ? (subject_content.IsAscii()
313  ? SearchString(isolate,
314  subject_content.ToOneByteVector(),
315  needle_content.ToOneByteVector(),
316  index)
317  : SearchString(isolate,
318  subject_content.ToUC16Vector(),
319  needle_content.ToOneByteVector(),
320  index))
321  : (subject_content.IsAscii()
322  ? SearchString(isolate,
323  subject_content.ToOneByteVector(),
324  needle_content.ToUC16Vector(),
325  index)
326  : SearchString(isolate,
327  subject_content.ToUC16Vector(),
328  needle_content.ToUC16Vector(),
329  index)));
330  if (index == -1) {
331  return i / 2; // Return number of matches.
332  } else {
333  output[i] = index;
334  output[i+1] = index + needle_len;
335  index += needle_len;
336  }
337  }
338  return output_size / 2;
339 }
340 
341 
343  Handle<String> subject,
344  int index,
345  Handle<JSArray> last_match_info) {
346  Isolate* isolate = re->GetIsolate();
347 
348  static const int kNumRegisters = 2;
350  int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
351 
352  int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
353 
354  if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
355 
357  SealHandleScope shs(isolate);
358  FixedArray* array = FixedArray::cast(last_match_info->elements());
359  SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
360  return last_match_info;
361 }
362 
363 
364 // Irregexp implementation.
365 
366 // Ensures that the regexp object contains a compiled version of the
367 // source for either ASCII or non-ASCII strings.
368 // If the compiled version doesn't already exist, it is compiled
369 // from the source pattern.
370 // If compilation fails, an exception is thrown and this function
371 // returns false.
372 bool RegExpImpl::EnsureCompiledIrregexp(
373  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {
374  Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
375 #ifdef V8_INTERPRETED_REGEXP
376  if (compiled_code->IsByteArray()) return true;
377 #else // V8_INTERPRETED_REGEXP (RegExp native code)
378  if (compiled_code->IsCode()) return true;
379 #endif
380  // We could potentially have marked this as flushable, but have kept
381  // a saved version if we did not flush it yet.
382  Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
383  if (saved_code->IsCode()) {
384  // Reinstate the code in the original place.
385  re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
386  ASSERT(compiled_code->IsSmi());
387  return true;
388  }
389  return CompileIrregexp(re, sample_subject, is_ascii);
390 }
391 
392 
393 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
394  bool is_ascii,
395  Handle<String> error_message,
396  Isolate* isolate) {
397  Factory* factory = isolate->factory();
398  Handle<FixedArray> elements = factory->NewFixedArray(2);
399  elements->set(0, re->Pattern());
400  elements->set(1, *error_message);
401  Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
402  Handle<Object> regexp_err =
403  factory->NewSyntaxError("malformed_regexp", array);
404  isolate->Throw(*regexp_err);
405  return false;
406 }
407 
408 
409 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
410  Handle<String> sample_subject,
411  bool is_ascii) {
412  // Compile the RegExp.
413  Isolate* isolate = re->GetIsolate();
414  Zone zone(isolate);
415  PostponeInterruptsScope postpone(isolate);
416  // If we had a compilation error the last time this is saved at the
417  // saved code index.
418  Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
419  // When arriving here entry can only be a smi, either representing an
420  // uncompiled regexp, a previous compilation error, or code that has
421  // been flushed.
422  ASSERT(entry->IsSmi());
423  int entry_value = Smi::cast(entry)->value();
424  ASSERT(entry_value == JSRegExp::kUninitializedValue ||
425  entry_value == JSRegExp::kCompilationErrorValue ||
426  (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
427 
428  if (entry_value == JSRegExp::kCompilationErrorValue) {
429  // A previous compilation failed and threw an error which we store in
430  // the saved code index (we store the error message, not the actual
431  // error). Recreate the error object and throw it.
432  Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
433  ASSERT(error_string->IsString());
434  Handle<String> error_message(String::cast(error_string));
435  CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
436  return false;
437  }
438 
439  JSRegExp::Flags flags = re->GetFlags();
440 
441  Handle<String> pattern(re->Pattern());
442  if (!pattern->IsFlat()) FlattenString(pattern);
443  RegExpCompileData compile_data;
444  FlatStringReader reader(isolate, pattern);
445  if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
446  &compile_data,
447  &zone)) {
448  // Throw an exception if we fail to parse the pattern.
449  // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
450  ThrowRegExpException(re,
451  pattern,
452  compile_data.error,
453  "malformed_regexp");
454  return false;
455  }
456  RegExpEngine::CompilationResult result =
457  RegExpEngine::Compile(&compile_data,
458  flags.is_ignore_case(),
459  flags.is_global(),
460  flags.is_multiline(),
461  pattern,
462  sample_subject,
463  is_ascii,
464  &zone);
465  if (result.error_message != NULL) {
466  // Unable to compile regexp.
467  Handle<String> error_message =
468  isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
469  ASSERT(!error_message.is_null());
470  CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
471  return false;
472  }
473 
474  Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
475  data->set(JSRegExp::code_index(is_ascii), result.code);
476  int register_max = IrregexpMaxRegisterCount(*data);
477  if (result.num_registers > register_max) {
478  SetIrregexpMaxRegisterCount(*data, result.num_registers);
479  }
480 
481  return true;
482 }
483 
484 
486  return Smi::cast(
488 }
489 
490 
493 }
494 
495 
497  return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
498 }
499 
500 
503 }
504 
505 
507  return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
508 }
509 
510 
512  return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
513 }
514 
515 
517  Handle<String> pattern,
518  JSRegExp::Flags flags,
519  int capture_count) {
520  // Initialize compiled code entries to null.
521  re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
523  pattern,
524  flags,
525  capture_count);
526 }
527 
528 
530  Handle<String> subject) {
531  if (!subject->IsFlat()) FlattenString(subject);
532 
533  // Check the asciiness of the underlying storage.
534  bool is_ascii = subject->IsOneByteRepresentationUnderneath();
535  if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
536 
537 #ifdef V8_INTERPRETED_REGEXP
538  // Byte-code regexp needs space allocated for all its registers.
539  // The result captures are copied to the start of the registers array
540  // if the match succeeds. This way those registers are not clobbered
541  // when we set the last match info from last successful match.
542  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
543  (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
544 #else // V8_INTERPRETED_REGEXP
545  // Native regexp only needs room to output captures. Registers are handled
546  // internally.
547  return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
548 #endif // V8_INTERPRETED_REGEXP
549 }
550 
551 
553  Handle<String> subject,
554  int index,
555  int32_t* output,
556  int output_size) {
557  Isolate* isolate = regexp->GetIsolate();
558 
559  Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
560 
561  ASSERT(index >= 0);
562  ASSERT(index <= subject->length());
563  ASSERT(subject->IsFlat());
564 
565  bool is_ascii = subject->IsOneByteRepresentationUnderneath();
566 
567 #ifndef V8_INTERPRETED_REGEXP
568  ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
569  do {
570  EnsureCompiledIrregexp(regexp, subject, is_ascii);
571  Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
572  // The stack is used to allocate registers for the compiled regexp code.
573  // This means that in case of failure, the output registers array is left
574  // untouched and contains the capture results from the previous successful
575  // match. We can use that to set the last match info lazily.
578  subject,
579  output,
580  output_size,
581  index,
582  isolate);
585  isolate->has_pending_exception());
587  static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
589  static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
591  == RE_EXCEPTION);
592  return static_cast<IrregexpResult>(res);
593  }
594  // If result is RETRY, the string has changed representation, and we
595  // must restart from scratch.
596  // In this case, it means we must make sure we are prepared to handle
597  // the, potentially, different subject (the string can switch between
598  // being internal and external, and even between being ASCII and UC16,
599  // but the characters are always the same).
600  IrregexpPrepare(regexp, subject);
601  is_ascii = subject->IsOneByteRepresentationUnderneath();
602  } while (true);
603  UNREACHABLE();
604  return RE_EXCEPTION;
605 #else // V8_INTERPRETED_REGEXP
606 
607  ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp));
608  // We must have done EnsureCompiledIrregexp, so we can get the number of
609  // registers.
610  int number_of_capture_registers =
611  (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
612  int32_t* raw_output = &output[number_of_capture_registers];
613  // We do not touch the actual capture result registers until we know there
614  // has been a match so that we can use those capture results to set the
615  // last match info.
616  for (int i = number_of_capture_registers - 1; i >= 0; i--) {
617  raw_output[i] = -1;
618  }
619  Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
620 
622  byte_codes,
623  subject,
624  raw_output,
625  index);
626  if (result == RE_SUCCESS) {
627  // Copy capture results to the start of the registers array.
628  OS::MemCopy(
629  output, raw_output, number_of_capture_registers * sizeof(int32_t));
630  }
631  if (result == RE_EXCEPTION) {
632  ASSERT(!isolate->has_pending_exception());
633  isolate->StackOverflow();
634  }
635  return result;
636 #endif // V8_INTERPRETED_REGEXP
637 }
638 
639 
641  Handle<String> subject,
642  int previous_index,
643  Handle<JSArray> last_match_info) {
644  Isolate* isolate = regexp->GetIsolate();
645  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
646 
647  // Prepare space for the return values.
648 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
649  if (FLAG_trace_regexp_bytecodes) {
650  String* pattern = regexp->Pattern();
651  PrintF("\n\nRegexp match: /%s/\n\n", pattern->ToCString().get());
652  PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
653  }
654 #endif
655  int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
656  if (required_registers < 0) {
657  // Compiling failed with an exception.
658  ASSERT(isolate->has_pending_exception());
659  return Handle<Object>::null();
660  }
661 
662  int32_t* output_registers = NULL;
663  if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
664  output_registers = NewArray<int32_t>(required_registers);
665  }
666  SmartArrayPointer<int32_t> auto_release(output_registers);
667  if (output_registers == NULL) {
668  output_registers = isolate->jsregexp_static_offsets_vector();
669  }
670 
671  int res = RegExpImpl::IrregexpExecRaw(
672  regexp, subject, previous_index, output_registers, required_registers);
673  if (res == RE_SUCCESS) {
674  int capture_count =
676  return SetLastMatchInfo(
677  last_match_info, subject, capture_count, output_registers);
678  }
679  if (res == RE_EXCEPTION) {
680  ASSERT(isolate->has_pending_exception());
681  return Handle<Object>::null();
682  }
683  ASSERT(res == RE_FAILURE);
684  return isolate->factory()->null_value();
685 }
686 
687 
689  Handle<String> subject,
690  int capture_count,
691  int32_t* match) {
692  ASSERT(last_match_info->HasFastObjectElements());
693  int capture_register_count = (capture_count + 1) * 2;
694  JSArray::EnsureSize(last_match_info,
695  capture_register_count + kLastMatchOverhead);
696  DisallowHeapAllocation no_allocation;
697  FixedArray* array = FixedArray::cast(last_match_info->elements());
698  if (match != NULL) {
699  for (int i = 0; i < capture_register_count; i += 2) {
700  SetCapture(array, i, match[i]);
701  SetCapture(array, i + 1, match[i + 1]);
702  }
703  }
704  SetLastCaptureCount(array, capture_register_count);
705  SetLastSubject(array, *subject);
706  SetLastInput(array, *subject);
707  return last_match_info;
708 }
709 
710 
712  Handle<String> subject,
713  bool is_global,
714  Isolate* isolate)
715  : register_array_(NULL),
716  register_array_size_(0),
717  regexp_(regexp),
718  subject_(subject) {
719 #ifdef V8_INTERPRETED_REGEXP
720  bool interpreted = true;
721 #else
722  bool interpreted = false;
723 #endif // V8_INTERPRETED_REGEXP
724 
725  if (regexp_->TypeTag() == JSRegExp::ATOM) {
726  static const int kAtomRegistersPerMatch = 2;
727  registers_per_match_ = kAtomRegistersPerMatch;
728  // There is no distinction between interpreted and native for atom regexps.
729  interpreted = false;
730  } else {
731  registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
732  if (registers_per_match_ < 0) {
733  num_matches_ = -1; // Signal exception.
734  return;
735  }
736  }
737 
738  if (is_global && !interpreted) {
739  register_array_size_ =
740  Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
741  max_matches_ = register_array_size_ / registers_per_match_;
742  } else {
743  // Global loop in interpreted regexp is not implemented. We choose
744  // the size of the offsets vector so that it can only store one match.
745  register_array_size_ = registers_per_match_;
746  max_matches_ = 1;
747  }
748 
749  if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
750  register_array_ = NewArray<int32_t>(register_array_size_);
751  } else {
752  register_array_ = isolate->jsregexp_static_offsets_vector();
753  }
754 
755  // Set state so that fetching the results the first time triggers a call
756  // to the compiled regexp.
757  current_match_index_ = max_matches_ - 1;
758  num_matches_ = max_matches_;
759  ASSERT(registers_per_match_ >= 2); // Each match has at least one capture.
760  ASSERT_GE(register_array_size_, registers_per_match_);
761  int32_t* last_match =
762  &register_array_[current_match_index_ * registers_per_match_];
763  last_match[0] = -1;
764  last_match[1] = 0;
765 }
766 
767 
768 // -------------------------------------------------------------------
769 // Implementation of the Irregexp regular expression engine.
770 //
771 // The Irregexp regular expression engine is intended to be a complete
772 // implementation of ECMAScript regular expressions. It generates either
773 // bytecodes or native code.
774 
775 // The Irregexp regexp engine is structured in three steps.
776 // 1) The parser generates an abstract syntax tree. See ast.cc.
777 // 2) From the AST a node network is created. The nodes are all
778 // subclasses of RegExpNode. The nodes represent states when
779 // executing a regular expression. Several optimizations are
780 // performed on the node network.
781 // 3) From the nodes we generate either byte codes or native code
782 // that can actually execute the regular expression (perform
783 // the search). The code generation step is described in more
784 // detail below.
785 
786 // Code generation.
787 //
788 // The nodes are divided into four main categories.
789 // * Choice nodes
790 // These represent places where the regular expression can
791 // match in more than one way. For example on entry to an
792 // alternation (foo|bar) or a repetition (*, +, ? or {}).
793 // * Action nodes
794 // These represent places where some action should be
795 // performed. Examples include recording the current position
796 // in the input string to a register (in order to implement
797 // captures) or other actions on register for example in order
798 // to implement the counters needed for {} repetitions.
799 // * Matching nodes
800 // These attempt to match some element part of the input string.
801 // Examples of elements include character classes, plain strings
802 // or back references.
803 // * End nodes
804 // These are used to implement the actions required on finding
805 // a successful match or failing to find a match.
806 //
807 // The code generated (whether as byte codes or native code) maintains
808 // some state as it runs. This consists of the following elements:
809 //
810 // * The capture registers. Used for string captures.
811 // * Other registers. Used for counters etc.
812 // * The current position.
813 // * The stack of backtracking information. Used when a matching node
814 // fails to find a match and needs to try an alternative.
815 //
816 // Conceptual regular expression execution model:
817 //
818 // There is a simple conceptual model of regular expression execution
819 // which will be presented first. The actual code generated is a more
820 // efficient simulation of the simple conceptual model:
821 //
822 // * Choice nodes are implemented as follows:
823 // For each choice except the last {
824 // push current position
825 // push backtrack code location
826 // <generate code to test for choice>
827 // backtrack code location:
828 // pop current position
829 // }
830 // <generate code to test for last choice>
831 //
832 // * Actions nodes are generated as follows
833 // <push affected registers on backtrack stack>
834 // <generate code to perform action>
835 // push backtrack code location
836 // <generate code to test for following nodes>
837 // backtrack code location:
838 // <pop affected registers to restore their state>
839 // <pop backtrack location from stack and go to it>
840 //
841 // * Matching nodes are generated as follows:
842 // if input string matches at current position
843 // update current position
844 // <generate code to test for following nodes>
845 // else
846 // <pop backtrack location from stack and go to it>
847 //
848 // Thus it can be seen that the current position is saved and restored
849 // by the choice nodes, whereas the registers are saved and restored by
850 // by the action nodes that manipulate them.
851 //
852 // The other interesting aspect of this model is that nodes are generated
853 // at the point where they are needed by a recursive call to Emit(). If
854 // the node has already been code generated then the Emit() call will
855 // generate a jump to the previously generated code instead. In order to
856 // limit recursion it is possible for the Emit() function to put the node
857 // on a work list for later generation and instead generate a jump. The
858 // destination of the jump is resolved later when the code is generated.
859 //
860 // Actual regular expression code generation.
861 //
862 // Code generation is actually more complicated than the above. In order
863 // to improve the efficiency of the generated code some optimizations are
864 // performed
865 //
866 // * Choice nodes have 1-character lookahead.
867 // A choice node looks at the following character and eliminates some of
868 // the choices immediately based on that character. This is not yet
869 // implemented.
870 // * Simple greedy loops store reduced backtracking information.
871 // A quantifier like /.*foo/m will greedily match the whole input. It will
872 // then need to backtrack to a point where it can match "foo". The naive
873 // implementation of this would push each character position onto the
874 // backtracking stack, then pop them off one by one. This would use space
875 // proportional to the length of the input string. However since the "."
876 // can only match in one way and always has a constant length (in this case
877 // of 1) it suffices to store the current position on the top of the stack
878 // once. Matching now becomes merely incrementing the current position and
879 // backtracking becomes decrementing the current position and checking the
880 // result against the stored current position. This is faster and saves
881 // space.
882 // * The current state is virtualized.
883 // This is used to defer expensive operations until it is clear that they
884 // are needed and to generate code for a node more than once, allowing
885 // specialized an efficient versions of the code to be created. This is
886 // explained in the section below.
887 //
888 // Execution state virtualization.
889 //
890 // Instead of emitting code, nodes that manipulate the state can record their
891 // manipulation in an object called the Trace. The Trace object can record a
892 // current position offset, an optional backtrack code location on the top of
893 // the virtualized backtrack stack and some register changes. When a node is
894 // to be emitted it can flush the Trace or update it. Flushing the Trace
895 // will emit code to bring the actual state into line with the virtual state.
896 // Avoiding flushing the state can postpone some work (e.g. updates of capture
897 // registers). Postponing work can save time when executing the regular
898 // expression since it may be found that the work never has to be done as a
899 // failure to match can occur. In addition it is much faster to jump to a
900 // known backtrack code location than it is to pop an unknown backtrack
901 // location from the stack and jump there.
902 //
903 // The virtual state found in the Trace affects code generation. For example
904 // the virtual state contains the difference between the actual current
905 // position and the virtual current position, and matching code needs to use
906 // this offset to attempt a match in the correct location of the input
907 // string. Therefore code generated for a non-trivial trace is specialized
908 // to that trace. The code generator therefore has the ability to generate
909 // code for each node several times. In order to limit the size of the
910 // generated code there is an arbitrary limit on how many specialized sets of
911 // code may be generated for a given node. If the limit is reached, the
912 // trace is flushed and a generic version of the code for a node is emitted.
913 // This is subsequently used for that node. The code emitted for non-generic
914 // trace is not recorded in the node and so it cannot currently be reused in
915 // the event that code generation is requested for an identical trace.
916 
917 
918 void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
919  UNREACHABLE();
920 }
921 
922 
923 void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
924  text->AddElement(TextElement::Atom(this), zone);
925 }
926 
927 
928 void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
929  text->AddElement(TextElement::CharClass(this), zone);
930 }
931 
932 
933 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
934  for (int i = 0; i < elements()->length(); i++)
935  text->AddElement(elements()->at(i), zone);
936 }
937 
938 
939 TextElement TextElement::Atom(RegExpAtom* atom) {
940  return TextElement(ATOM, atom);
941 }
942 
943 
944 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
945  return TextElement(CHAR_CLASS, char_class);
946 }
947 
948 
949 int TextElement::length() const {
950  switch (text_type()) {
951  case ATOM:
952  return atom()->length();
953 
954  case CHAR_CLASS:
955  return 1;
956  }
957  UNREACHABLE();
958  return 0;
959 }
960 
961 
963  if (table_ == NULL) {
964  table_ = new(zone()) DispatchTable(zone());
965  DispatchTableConstructor cons(table_, ignore_case, zone());
966  cons.BuildTable(this);
967  }
968  return table_;
969 }
970 
971 
973  public:
974  FrequencyCollator() : total_samples_(0) {
975  for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
976  frequencies_[i] = CharacterFrequency(i);
977  }
978  }
979 
980  void CountCharacter(int character) {
981  int index = (character & RegExpMacroAssembler::kTableMask);
982  frequencies_[index].Increment();
983  total_samples_++;
984  }
985 
986  // Does not measure in percent, but rather per-128 (the table size from the
987  // regexp macro assembler).
988  int Frequency(int in_character) {
989  ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
990  if (total_samples_ < 1) return 1; // Division by zero.
991  int freq_in_per128 =
992  (frequencies_[in_character].counter() * 128) / total_samples_;
993  return freq_in_per128;
994  }
995 
996  private:
997  class CharacterFrequency {
998  public:
999  CharacterFrequency() : counter_(0), character_(-1) { }
1000  explicit CharacterFrequency(int character)
1001  : counter_(0), character_(character) { }
1002 
1003  void Increment() { counter_++; }
1004  int counter() { return counter_; }
1005  int character() { return character_; }
1006 
1007  private:
1008  int counter_;
1009  int character_;
1010  };
1011 
1012 
1013  private:
1014  CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
1015  int total_samples_;
1016 };
1017 
1018 
1020  public:
1021  RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
1022  Zone* zone);
1023 
1025  if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
1026  reg_exp_too_big_ = true;
1027  return next_register_;
1028  }
1029  return next_register_++;
1030  }
1031 
1033  RegExpNode* start,
1034  int capture_count,
1035  Handle<String> pattern);
1036 
1037  inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
1038 
1039  static const int kImplementationOffset = 0;
1040  static const int kNumberOfRegistersOffset = 0;
1041  static const int kCodeOffset = 1;
1042 
1043  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
1044  EndNode* accept() { return accept_; }
1045 
1046  static const int kMaxRecursion = 100;
1047  inline int recursion_depth() { return recursion_depth_; }
1048  inline void IncrementRecursionDepth() { recursion_depth_++; }
1049  inline void DecrementRecursionDepth() { recursion_depth_--; }
1050 
1051  void SetRegExpTooBig() { reg_exp_too_big_ = true; }
1052 
1053  inline bool ignore_case() { return ignore_case_; }
1054  inline bool ascii() { return ascii_; }
1055  FrequencyCollator* frequency_collator() { return &frequency_collator_; }
1056 
1057  int current_expansion_factor() { return current_expansion_factor_; }
1059  current_expansion_factor_ = value;
1060  }
1061 
1062  Zone* zone() const { return zone_; }
1063 
1064  static const int kNoRegister = -1;
1065 
1066  private:
1067  EndNode* accept_;
1068  int next_register_;
1069  List<RegExpNode*>* work_list_;
1070  int recursion_depth_;
1071  RegExpMacroAssembler* macro_assembler_;
1072  bool ignore_case_;
1073  bool ascii_;
1074  bool reg_exp_too_big_;
1075  int current_expansion_factor_;
1076  FrequencyCollator frequency_collator_;
1077  Zone* zone_;
1078 };
1079 
1080 
1082  public:
1083  explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1084  compiler->IncrementRecursionDepth();
1085  }
1086  ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1087  private:
1088  RegExpCompiler* compiler_;
1089 };
1090 
1091 
1092 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
1093  return RegExpEngine::CompilationResult(isolate, "RegExp too big");
1094 }
1095 
1096 
1097 // Attempts to compile the regexp using an Irregexp code generator. Returns
1098 // a fixed array or a null handle depending on whether it succeeded.
1099 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
1100  Zone* zone)
1101  : next_register_(2 * (capture_count + 1)),
1102  work_list_(NULL),
1103  recursion_depth_(0),
1104  ignore_case_(ignore_case),
1105  ascii_(ascii),
1106  reg_exp_too_big_(false),
1107  current_expansion_factor_(1),
1108  frequency_collator_(),
1109  zone_(zone) {
1110  accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
1111  ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1112 }
1113 
1114 
1116  RegExpMacroAssembler* macro_assembler,
1117  RegExpNode* start,
1118  int capture_count,
1119  Handle<String> pattern) {
1120  Heap* heap = pattern->GetHeap();
1121 
1122  bool use_slow_safe_regexp_compiler = false;
1123  if (heap->total_regexp_code_generated() >
1125  heap->isolate()->memory_allocator()->SizeExecutable() >
1127  use_slow_safe_regexp_compiler = true;
1128  }
1129 
1130  macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
1131 
1132 #ifdef DEBUG
1133  if (FLAG_trace_regexp_assembler)
1134  macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1135  else
1136 #endif
1137  macro_assembler_ = macro_assembler;
1138 
1139  List <RegExpNode*> work_list(0);
1140  work_list_ = &work_list;
1141  Label fail;
1142  macro_assembler_->PushBacktrack(&fail);
1143  Trace new_trace;
1144  start->Emit(this, &new_trace);
1145  macro_assembler_->Bind(&fail);
1146  macro_assembler_->Fail();
1147  while (!work_list.is_empty()) {
1148  work_list.RemoveLast()->Emit(this, &new_trace);
1149  }
1150  if (reg_exp_too_big_) return IrregexpRegExpTooBig(zone_->isolate());
1151 
1152  Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
1153  heap->IncreaseTotalRegexpCodeGenerated(code->Size());
1154  work_list_ = NULL;
1155 #ifdef DEBUG
1156  if (FLAG_print_code) {
1157  CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer());
1158  Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(),
1159  trace_scope.file());
1160  }
1161  if (FLAG_trace_regexp_assembler) {
1162  delete macro_assembler_;
1163  }
1164 #endif
1165  return RegExpEngine::CompilationResult(*code, next_register_);
1166 }
1167 
1168 
1171  Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1172  return range.Contains(that);
1173  } else {
1174  return reg() == that;
1175  }
1176 }
1177 
1178 
1179 bool Trace::mentions_reg(int reg) {
1180  for (DeferredAction* action = actions_;
1181  action != NULL;
1182  action = action->next()) {
1183  if (action->Mentions(reg))
1184  return true;
1185  }
1186  return false;
1187 }
1188 
1189 
1191  ASSERT_EQ(0, *cp_offset);
1192  for (DeferredAction* action = actions_;
1193  action != NULL;
1194  action = action->next()) {
1195  if (action->Mentions(reg)) {
1196  if (action->action_type() == ActionNode::STORE_POSITION) {
1197  *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
1198  return true;
1199  } else {
1200  return false;
1201  }
1202  }
1203  }
1204  return false;
1205 }
1206 
1207 
1208 int Trace::FindAffectedRegisters(OutSet* affected_registers,
1209  Zone* zone) {
1210  int max_register = RegExpCompiler::kNoRegister;
1211  for (DeferredAction* action = actions_;
1212  action != NULL;
1213  action = action->next()) {
1214  if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
1215  Interval range = static_cast<DeferredClearCaptures*>(action)->range();
1216  for (int i = range.from(); i <= range.to(); i++)
1217  affected_registers->Set(i, zone);
1218  if (range.to() > max_register) max_register = range.to();
1219  } else {
1220  affected_registers->Set(action->reg(), zone);
1221  if (action->reg() > max_register) max_register = action->reg();
1222  }
1223  }
1224  return max_register;
1225 }
1226 
1227 
1228 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
1229  int max_register,
1230  OutSet& registers_to_pop,
1231  OutSet& registers_to_clear) {
1232  for (int reg = max_register; reg >= 0; reg--) {
1233  if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
1234  else if (registers_to_clear.Get(reg)) {
1235  int clear_to = reg;
1236  while (reg > 0 && registers_to_clear.Get(reg - 1)) {
1237  reg--;
1238  }
1239  assembler->ClearRegisters(reg, clear_to);
1240  }
1241  }
1242 }
1243 
1244 
1245 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
1246  int max_register,
1247  OutSet& affected_registers,
1248  OutSet* registers_to_pop,
1249  OutSet* registers_to_clear,
1250  Zone* zone) {
1251  // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
1252  const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1253 
1254  // Count pushes performed to force a stack limit check occasionally.
1255  int pushes = 0;
1256 
1257  for (int reg = 0; reg <= max_register; reg++) {
1258  if (!affected_registers.Get(reg)) {
1259  continue;
1260  }
1261 
1262  // The chronologically first deferred action in the trace
1263  // is used to infer the action needed to restore a register
1264  // to its previous state (or not, if it's safe to ignore it).
1265  enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1266  DeferredActionUndoType undo_action = IGNORE;
1267 
1268  int value = 0;
1269  bool absolute = false;
1270  bool clear = false;
1271  int store_position = -1;
1272  // This is a little tricky because we are scanning the actions in reverse
1273  // historical order (newest first).
1274  for (DeferredAction* action = actions_;
1275  action != NULL;
1276  action = action->next()) {
1277  if (action->Mentions(reg)) {
1278  switch (action->action_type()) {
1279  case ActionNode::SET_REGISTER: {
1280  Trace::DeferredSetRegister* psr =
1281  static_cast<Trace::DeferredSetRegister*>(action);
1282  if (!absolute) {
1283  value += psr->value();
1284  absolute = true;
1285  }
1286  // SET_REGISTER is currently only used for newly introduced loop
1287  // counters. They can have a significant previous value if they
1288  // occour in a loop. TODO(lrn): Propagate this information, so
1289  // we can set undo_action to IGNORE if we know there is no value to
1290  // restore.
1291  undo_action = RESTORE;
1292  ASSERT_EQ(store_position, -1);
1293  ASSERT(!clear);
1294  break;
1295  }
1297  if (!absolute) {
1298  value++;
1299  }
1300  ASSERT_EQ(store_position, -1);
1301  ASSERT(!clear);
1302  undo_action = RESTORE;
1303  break;
1305  Trace::DeferredCapture* pc =
1306  static_cast<Trace::DeferredCapture*>(action);
1307  if (!clear && store_position == -1) {
1308  store_position = pc->cp_offset();
1309  }
1310 
1311  // For captures we know that stores and clears alternate.
1312  // Other register, are never cleared, and if the occur
1313  // inside a loop, they might be assigned more than once.
1314  if (reg <= 1) {
1315  // Registers zero and one, aka "capture zero", is
1316  // always set correctly if we succeed. There is no
1317  // need to undo a setting on backtrack, because we
1318  // will set it again or fail.
1319  undo_action = IGNORE;
1320  } else {
1321  undo_action = pc->is_capture() ? CLEAR : RESTORE;
1322  }
1323  ASSERT(!absolute);
1324  ASSERT_EQ(value, 0);
1325  break;
1326  }
1328  // Since we're scanning in reverse order, if we've already
1329  // set the position we have to ignore historically earlier
1330  // clearing operations.
1331  if (store_position == -1) {
1332  clear = true;
1333  }
1334  undo_action = RESTORE;
1335  ASSERT(!absolute);
1336  ASSERT_EQ(value, 0);
1337  break;
1338  }
1339  default:
1340  UNREACHABLE();
1341  break;
1342  }
1343  }
1344  }
1345  // Prepare for the undo-action (e.g., push if it's going to be popped).
1346  if (undo_action == RESTORE) {
1347  pushes++;
1350  if (pushes == push_limit) {
1352  pushes = 0;
1353  }
1354 
1355  assembler->PushRegister(reg, stack_check);
1356  registers_to_pop->Set(reg, zone);
1357  } else if (undo_action == CLEAR) {
1358  registers_to_clear->Set(reg, zone);
1359  }
1360  // Perform the chronologically last action (or accumulated increment)
1361  // for the register.
1362  if (store_position != -1) {
1363  assembler->WriteCurrentPositionToRegister(reg, store_position);
1364  } else if (clear) {
1365  assembler->ClearRegisters(reg, reg);
1366  } else if (absolute) {
1367  assembler->SetRegister(reg, value);
1368  } else if (value != 0) {
1369  assembler->AdvanceRegister(reg, value);
1370  }
1371  }
1372 }
1373 
1374 
1375 // This is called as we come into a loop choice node and some other tricky
1376 // nodes. It normalizes the state of the code generator to ensure we can
1377 // generate generic code.
1378 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1379  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1380 
1381  ASSERT(!is_trivial());
1382 
1383  if (actions_ == NULL && backtrack() == NULL) {
1384  // Here we just have some deferred cp advances to fix and we are back to
1385  // a normal situation. We may also have to forget some information gained
1386  // through a quick check that was already performed.
1387  if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1388  // Create a new trivial state and generate the node with that.
1389  Trace new_state;
1390  successor->Emit(compiler, &new_state);
1391  return;
1392  }
1393 
1394  // Generate deferred actions here along with code to undo them again.
1395  OutSet affected_registers;
1396 
1397  if (backtrack() != NULL) {
1398  // Here we have a concrete backtrack location. These are set up by choice
1399  // nodes and so they indicate that we have a deferred save of the current
1400  // position which we may need to emit here.
1401  assembler->PushCurrentPosition();
1402  }
1403 
1404  int max_register = FindAffectedRegisters(&affected_registers,
1405  compiler->zone());
1406  OutSet registers_to_pop;
1407  OutSet registers_to_clear;
1408  PerformDeferredActions(assembler,
1409  max_register,
1410  affected_registers,
1411  &registers_to_pop,
1412  &registers_to_clear,
1413  compiler->zone());
1414  if (cp_offset_ != 0) {
1415  assembler->AdvanceCurrentPosition(cp_offset_);
1416  }
1417 
1418  // Create a new trivial state and generate the node with that.
1419  Label undo;
1420  assembler->PushBacktrack(&undo);
1421  Trace new_state;
1422  successor->Emit(compiler, &new_state);
1423 
1424  // On backtrack we need to restore state.
1425  assembler->Bind(&undo);
1426  RestoreAffectedRegisters(assembler,
1427  max_register,
1428  registers_to_pop,
1429  registers_to_clear);
1430  if (backtrack() == NULL) {
1431  assembler->Backtrack();
1432  } else {
1433  assembler->PopCurrentPosition();
1434  assembler->GoTo(backtrack());
1435  }
1436 }
1437 
1438 
1440  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1441 
1442  // Omit flushing the trace. We discard the entire stack frame anyway.
1443 
1444  if (!label()->is_bound()) {
1445  // We are completely independent of the trace, since we ignore it,
1446  // so this code can be used as the generic version.
1447  assembler->Bind(label());
1448  }
1449 
1450  // Throw away everything on the backtrack stack since the start
1451  // of the negative submatch and restore the character position.
1452  assembler->ReadCurrentPositionFromRegister(current_position_register_);
1453  assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1454  if (clear_capture_count_ > 0) {
1455  // Clear any captures that might have been performed during the success
1456  // of the body of the negative look-ahead.
1457  int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
1458  assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
1459  }
1460  // Now that we have unwound the stack we find at the top of the stack the
1461  // backtrack that the BeginSubmatch node got.
1462  assembler->Backtrack();
1463 }
1464 
1465 
1466 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1467  if (!trace->is_trivial()) {
1468  trace->Flush(compiler, this);
1469  return;
1470  }
1471  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1472  if (!label()->is_bound()) {
1473  assembler->Bind(label());
1474  }
1475  switch (action_) {
1476  case ACCEPT:
1477  assembler->Succeed();
1478  return;
1479  case BACKTRACK:
1480  assembler->GoTo(trace->backtrack());
1481  return;
1482  case NEGATIVE_SUBMATCH_SUCCESS:
1483  // This case is handled in a different virtual method.
1484  UNREACHABLE();
1485  }
1486  UNIMPLEMENTED();
1487 }
1488 
1489 
1491  if (guards_ == NULL)
1492  guards_ = new(zone) ZoneList<Guard*>(1, zone);
1493  guards_->Add(guard, zone);
1494 }
1495 
1496 
1498  int val,
1499  RegExpNode* on_success) {
1500  ActionNode* result =
1501  new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
1502  result->data_.u_store_register.reg = reg;
1503  result->data_.u_store_register.value = val;
1504  return result;
1505 }
1506 
1507 
1509  ActionNode* result =
1510  new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
1511  result->data_.u_increment_register.reg = reg;
1512  return result;
1513 }
1514 
1515 
1517  bool is_capture,
1518  RegExpNode* on_success) {
1519  ActionNode* result =
1520  new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
1521  result->data_.u_position_register.reg = reg;
1522  result->data_.u_position_register.is_capture = is_capture;
1523  return result;
1524 }
1525 
1526 
1528  RegExpNode* on_success) {
1529  ActionNode* result =
1530  new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
1531  result->data_.u_clear_captures.range_from = range.from();
1532  result->data_.u_clear_captures.range_to = range.to();
1533  return result;
1534 }
1535 
1536 
1538  int position_reg,
1539  RegExpNode* on_success) {
1540  ActionNode* result =
1541  new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
1542  result->data_.u_submatch.stack_pointer_register = stack_reg;
1543  result->data_.u_submatch.current_position_register = position_reg;
1544  return result;
1545 }
1546 
1547 
1549  int position_reg,
1550  int clear_register_count,
1551  int clear_register_from,
1552  RegExpNode* on_success) {
1553  ActionNode* result =
1554  new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1555  result->data_.u_submatch.stack_pointer_register = stack_reg;
1556  result->data_.u_submatch.current_position_register = position_reg;
1557  result->data_.u_submatch.clear_register_count = clear_register_count;
1558  result->data_.u_submatch.clear_register_from = clear_register_from;
1559  return result;
1560 }
1561 
1562 
1564  int repetition_register,
1565  int repetition_limit,
1566  RegExpNode* on_success) {
1567  ActionNode* result =
1568  new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
1569  result->data_.u_empty_match_check.start_register = start_register;
1570  result->data_.u_empty_match_check.repetition_register = repetition_register;
1571  result->data_.u_empty_match_check.repetition_limit = repetition_limit;
1572  return result;
1573 }
1574 
1575 
1576 #define DEFINE_ACCEPT(Type) \
1577  void Type##Node::Accept(NodeVisitor* visitor) { \
1578  visitor->Visit##Type(this); \
1579  }
1581 #undef DEFINE_ACCEPT
1582 
1583 
1585  visitor->VisitLoopChoice(this);
1586 }
1587 
1588 
1589 // -------------------------------------------------------------------
1590 // Emit code.
1591 
1592 
1593 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
1594  Guard* guard,
1595  Trace* trace) {
1596  switch (guard->op()) {
1597  case Guard::LT:
1598  ASSERT(!trace->mentions_reg(guard->reg()));
1599  macro_assembler->IfRegisterGE(guard->reg(),
1600  guard->value(),
1601  trace->backtrack());
1602  break;
1603  case Guard::GEQ:
1604  ASSERT(!trace->mentions_reg(guard->reg()));
1605  macro_assembler->IfRegisterLT(guard->reg(),
1606  guard->value(),
1607  trace->backtrack());
1608  break;
1609  }
1610 }
1611 
1612 
1613 // Returns the number of characters in the equivalence class, omitting those
1614 // that cannot occur in the source string because it is ASCII.
1615 static int GetCaseIndependentLetters(Isolate* isolate,
1616  uc16 character,
1617  bool ascii_subject,
1618  unibrow::uchar* letters) {
1619  int length =
1620  isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1621  // Unibrow returns 0 or 1 for characters where case independence is
1622  // trivial.
1623  if (length == 0) {
1624  letters[0] = character;
1625  length = 1;
1626  }
1627  if (!ascii_subject || character <= String::kMaxOneByteCharCode) {
1628  return length;
1629  }
1630  // The standard requires that non-ASCII characters cannot have ASCII
1631  // character codes in their equivalence class.
1632  return 0;
1633 }
1634 
1635 
1636 static inline bool EmitSimpleCharacter(Isolate* isolate,
1637  RegExpCompiler* compiler,
1638  uc16 c,
1639  Label* on_failure,
1640  int cp_offset,
1641  bool check,
1642  bool preloaded) {
1643  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1644  bool bound_checked = false;
1645  if (!preloaded) {
1646  assembler->LoadCurrentCharacter(
1647  cp_offset,
1648  on_failure,
1649  check);
1650  bound_checked = true;
1651  }
1652  assembler->CheckNotCharacter(c, on_failure);
1653  return bound_checked;
1654 }
1655 
1656 
1657 // Only emits non-letters (things that don't have case). Only used for case
1658 // independent matches.
1659 static inline bool EmitAtomNonLetter(Isolate* isolate,
1660  RegExpCompiler* compiler,
1661  uc16 c,
1662  Label* on_failure,
1663  int cp_offset,
1664  bool check,
1665  bool preloaded) {
1666  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1667  bool ascii = compiler->ascii();
1669  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1670  if (length < 1) {
1671  // This can't match. Must be an ASCII subject and a non-ASCII character.
1672  // We do not need to do anything since the ASCII pass already handled this.
1673  return false; // Bounds not checked.
1674  }
1675  bool checked = false;
1676  // We handle the length > 1 case in a later pass.
1677  if (length == 1) {
1678  if (ascii && c > String::kMaxOneByteCharCodeU) {
1679  // Can't match - see above.
1680  return false; // Bounds not checked.
1681  }
1682  if (!preloaded) {
1683  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1684  checked = check;
1685  }
1686  macro_assembler->CheckNotCharacter(c, on_failure);
1687  }
1688  return checked;
1689 }
1690 
1691 
1692 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1693  bool ascii,
1694  uc16 c1,
1695  uc16 c2,
1696  Label* on_failure) {
1697  uc16 char_mask;
1698  if (ascii) {
1699  char_mask = String::kMaxOneByteCharCode;
1700  } else {
1701  char_mask = String::kMaxUtf16CodeUnit;
1702  }
1703  uc16 exor = c1 ^ c2;
1704  // Check whether exor has only one bit set.
1705  if (((exor - 1) & exor) == 0) {
1706  // If c1 and c2 differ only by one bit.
1707  // Ecma262UnCanonicalize always gives the highest number last.
1708  ASSERT(c2 > c1);
1709  uc16 mask = char_mask ^ exor;
1710  macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1711  return true;
1712  }
1713  ASSERT(c2 > c1);
1714  uc16 diff = c2 - c1;
1715  if (((diff - 1) & diff) == 0 && c1 >= diff) {
1716  // If the characters differ by 2^n but don't differ by one bit then
1717  // subtract the difference from the found character, then do the or
1718  // trick. We avoid the theoretical case where negative numbers are
1719  // involved in order to simplify code generation.
1720  uc16 mask = char_mask ^ diff;
1721  macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1722  diff,
1723  mask,
1724  on_failure);
1725  return true;
1726  }
1727  return false;
1728 }
1729 
1730 
1731 typedef bool EmitCharacterFunction(Isolate* isolate,
1732  RegExpCompiler* compiler,
1733  uc16 c,
1734  Label* on_failure,
1735  int cp_offset,
1736  bool check,
1737  bool preloaded);
1738 
1739 // Only emits letters (things that have case). Only used for case independent
1740 // matches.
1741 static inline bool EmitAtomLetter(Isolate* isolate,
1742  RegExpCompiler* compiler,
1743  uc16 c,
1744  Label* on_failure,
1745  int cp_offset,
1746  bool check,
1747  bool preloaded) {
1748  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1749  bool ascii = compiler->ascii();
1751  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1752  if (length <= 1) return false;
1753  // We may not need to check against the end of the input string
1754  // if this character lies before a character that matched.
1755  if (!preloaded) {
1756  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1757  }
1758  Label ok;
1760  switch (length) {
1761  case 2: {
1762  if (ShortCutEmitCharacterPair(macro_assembler,
1763  ascii,
1764  chars[0],
1765  chars[1],
1766  on_failure)) {
1767  } else {
1768  macro_assembler->CheckCharacter(chars[0], &ok);
1769  macro_assembler->CheckNotCharacter(chars[1], on_failure);
1770  macro_assembler->Bind(&ok);
1771  }
1772  break;
1773  }
1774  case 4:
1775  macro_assembler->CheckCharacter(chars[3], &ok);
1776  // Fall through!
1777  case 3:
1778  macro_assembler->CheckCharacter(chars[0], &ok);
1779  macro_assembler->CheckCharacter(chars[1], &ok);
1780  macro_assembler->CheckNotCharacter(chars[2], on_failure);
1781  macro_assembler->Bind(&ok);
1782  break;
1783  default:
1784  UNREACHABLE();
1785  break;
1786  }
1787  return true;
1788 }
1789 
1790 
1791 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1792  int border,
1793  Label* fall_through,
1794  Label* above_or_equal,
1795  Label* below) {
1796  if (below != fall_through) {
1797  masm->CheckCharacterLT(border, below);
1798  if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1799  } else {
1800  masm->CheckCharacterGT(border - 1, above_or_equal);
1801  }
1802 }
1803 
1804 
1805 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1806  int first,
1807  int last,
1808  Label* fall_through,
1809  Label* in_range,
1810  Label* out_of_range) {
1811  if (in_range == fall_through) {
1812  if (first == last) {
1813  masm->CheckNotCharacter(first, out_of_range);
1814  } else {
1815  masm->CheckCharacterNotInRange(first, last, out_of_range);
1816  }
1817  } else {
1818  if (first == last) {
1819  masm->CheckCharacter(first, in_range);
1820  } else {
1821  masm->CheckCharacterInRange(first, last, in_range);
1822  }
1823  if (out_of_range != fall_through) masm->GoTo(out_of_range);
1824  }
1825 }
1826 
1827 
1828 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1829 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1830 static void EmitUseLookupTable(
1831  RegExpMacroAssembler* masm,
1832  ZoneList<int>* ranges,
1833  int start_index,
1834  int end_index,
1835  int min_char,
1836  Label* fall_through,
1837  Label* even_label,
1838  Label* odd_label) {
1839  static const int kSize = RegExpMacroAssembler::kTableSize;
1840  static const int kMask = RegExpMacroAssembler::kTableMask;
1841 
1842  int base = (min_char & ~kMask);
1843  USE(base);
1844 
1845  // Assert that everything is on one kTableSize page.
1846  for (int i = start_index; i <= end_index; i++) {
1847  ASSERT_EQ(ranges->at(i) & ~kMask, base);
1848  }
1849  ASSERT(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1850 
1851  char templ[kSize];
1852  Label* on_bit_set;
1853  Label* on_bit_clear;
1854  int bit;
1855  if (even_label == fall_through) {
1856  on_bit_set = odd_label;
1857  on_bit_clear = even_label;
1858  bit = 1;
1859  } else {
1860  on_bit_set = even_label;
1861  on_bit_clear = odd_label;
1862  bit = 0;
1863  }
1864  for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
1865  templ[i] = bit;
1866  }
1867  int j = 0;
1868  bit ^= 1;
1869  for (int i = start_index; i < end_index; i++) {
1870  for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1871  templ[j] = bit;
1872  }
1873  bit ^= 1;
1874  }
1875  for (int i = j; i < kSize; i++) {
1876  templ[i] = bit;
1877  }
1878  Factory* factory = masm->zone()->isolate()->factory();
1879  // TODO(erikcorry): Cache these.
1880  Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED);
1881  for (int i = 0; i < kSize; i++) {
1882  ba->set(i, templ[i]);
1883  }
1884  masm->CheckBitInTable(ba, on_bit_set);
1885  if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1886 }
1887 
1888 
1889 static void CutOutRange(RegExpMacroAssembler* masm,
1890  ZoneList<int>* ranges,
1891  int start_index,
1892  int end_index,
1893  int cut_index,
1894  Label* even_label,
1895  Label* odd_label) {
1896  bool odd = (((cut_index - start_index) & 1) == 1);
1897  Label* in_range_label = odd ? odd_label : even_label;
1898  Label dummy;
1899  EmitDoubleBoundaryTest(masm,
1900  ranges->at(cut_index),
1901  ranges->at(cut_index + 1) - 1,
1902  &dummy,
1903  in_range_label,
1904  &dummy);
1905  ASSERT(!dummy.is_linked());
1906  // Cut out the single range by rewriting the array. This creates a new
1907  // range that is a merger of the two ranges on either side of the one we
1908  // are cutting out. The oddity of the labels is preserved.
1909  for (int j = cut_index; j > start_index; j--) {
1910  ranges->at(j) = ranges->at(j - 1);
1911  }
1912  for (int j = cut_index + 1; j < end_index; j++) {
1913  ranges->at(j) = ranges->at(j + 1);
1914  }
1915 }
1916 
1917 
1918 // Unicode case. Split the search space into kSize spaces that are handled
1919 // with recursion.
1920 static void SplitSearchSpace(ZoneList<int>* ranges,
1921  int start_index,
1922  int end_index,
1923  int* new_start_index,
1924  int* new_end_index,
1925  int* border) {
1926  static const int kSize = RegExpMacroAssembler::kTableSize;
1927  static const int kMask = RegExpMacroAssembler::kTableMask;
1928 
1929  int first = ranges->at(start_index);
1930  int last = ranges->at(end_index) - 1;
1931 
1932  *new_start_index = start_index;
1933  *border = (ranges->at(start_index) & ~kMask) + kSize;
1934  while (*new_start_index < end_index) {
1935  if (ranges->at(*new_start_index) > *border) break;
1936  (*new_start_index)++;
1937  }
1938  // new_start_index is the index of the first edge that is beyond the
1939  // current kSize space.
1940 
1941  // For very large search spaces we do a binary chop search of the non-ASCII
1942  // space instead of just going to the end of the current kSize space. The
1943  // heuristics are complicated a little by the fact that any 128-character
1944  // encoding space can be quickly tested with a table lookup, so we don't
1945  // wish to do binary chop search at a smaller granularity than that. A
1946  // 128-character space can take up a lot of space in the ranges array if,
1947  // for example, we only want to match every second character (eg. the lower
1948  // case characters on some Unicode pages).
1949  int binary_chop_index = (end_index + start_index) / 2;
1950  // The first test ensures that we get to the code that handles the ASCII
1951  // range with a single not-taken branch, speeding up this important
1952  // character range (even non-ASCII charset-based text has spaces and
1953  // punctuation).
1954  if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.
1955  end_index - start_index > (*new_start_index - start_index) * 2 &&
1956  last - first > kSize * 2 &&
1957  binary_chop_index > *new_start_index &&
1958  ranges->at(binary_chop_index) >= first + 2 * kSize) {
1959  int scan_forward_for_section_border = binary_chop_index;;
1960  int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1961 
1962  while (scan_forward_for_section_border < end_index) {
1963  if (ranges->at(scan_forward_for_section_border) > new_border) {
1964  *new_start_index = scan_forward_for_section_border;
1965  *border = new_border;
1966  break;
1967  }
1968  scan_forward_for_section_border++;
1969  }
1970  }
1971 
1972  ASSERT(*new_start_index > start_index);
1973  *new_end_index = *new_start_index - 1;
1974  if (ranges->at(*new_end_index) == *border) {
1975  (*new_end_index)--;
1976  }
1977  if (*border >= ranges->at(end_index)) {
1978  *border = ranges->at(end_index);
1979  *new_start_index = end_index; // Won't be used.
1980  *new_end_index = end_index - 1;
1981  }
1982 }
1983 
1984 
1985 // Gets a series of segment boundaries representing a character class. If the
1986 // character is in the range between an even and an odd boundary (counting from
1987 // start_index) then go to even_label, otherwise go to odd_label. We already
1988 // know that the character is in the range of min_char to max_char inclusive.
1989 // Either label can be NULL indicating backtracking. Either label can also be
1990 // equal to the fall_through label.
1991 static void GenerateBranches(RegExpMacroAssembler* masm,
1992  ZoneList<int>* ranges,
1993  int start_index,
1994  int end_index,
1995  uc16 min_char,
1996  uc16 max_char,
1997  Label* fall_through,
1998  Label* even_label,
1999  Label* odd_label) {
2000  int first = ranges->at(start_index);
2001  int last = ranges->at(end_index) - 1;
2002 
2003  ASSERT_LT(min_char, first);
2004 
2005  // Just need to test if the character is before or on-or-after
2006  // a particular character.
2007  if (start_index == end_index) {
2008  EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
2009  return;
2010  }
2011 
2012  // Another almost trivial case: There is one interval in the middle that is
2013  // different from the end intervals.
2014  if (start_index + 1 == end_index) {
2015  EmitDoubleBoundaryTest(
2016  masm, first, last, fall_through, even_label, odd_label);
2017  return;
2018  }
2019 
2020  // It's not worth using table lookup if there are very few intervals in the
2021  // character class.
2022  if (end_index - start_index <= 6) {
2023  // It is faster to test for individual characters, so we look for those
2024  // first, then try arbitrary ranges in the second round.
2025  static int kNoCutIndex = -1;
2026  int cut = kNoCutIndex;
2027  for (int i = start_index; i < end_index; i++) {
2028  if (ranges->at(i) == ranges->at(i + 1) - 1) {
2029  cut = i;
2030  break;
2031  }
2032  }
2033  if (cut == kNoCutIndex) cut = start_index;
2034  CutOutRange(
2035  masm, ranges, start_index, end_index, cut, even_label, odd_label);
2036  ASSERT_GE(end_index - start_index, 2);
2037  GenerateBranches(masm,
2038  ranges,
2039  start_index + 1,
2040  end_index - 1,
2041  min_char,
2042  max_char,
2043  fall_through,
2044  even_label,
2045  odd_label);
2046  return;
2047  }
2048 
2049  // If there are a lot of intervals in the regexp, then we will use tables to
2050  // determine whether the character is inside or outside the character class.
2051  static const int kBits = RegExpMacroAssembler::kTableSizeBits;
2052 
2053  if ((max_char >> kBits) == (min_char >> kBits)) {
2054  EmitUseLookupTable(masm,
2055  ranges,
2056  start_index,
2057  end_index,
2058  min_char,
2059  fall_through,
2060  even_label,
2061  odd_label);
2062  return;
2063  }
2064 
2065  if ((min_char >> kBits) != (first >> kBits)) {
2066  masm->CheckCharacterLT(first, odd_label);
2067  GenerateBranches(masm,
2068  ranges,
2069  start_index + 1,
2070  end_index,
2071  first,
2072  max_char,
2073  fall_through,
2074  odd_label,
2075  even_label);
2076  return;
2077  }
2078 
2079  int new_start_index = 0;
2080  int new_end_index = 0;
2081  int border = 0;
2082 
2083  SplitSearchSpace(ranges,
2084  start_index,
2085  end_index,
2086  &new_start_index,
2087  &new_end_index,
2088  &border);
2089 
2090  Label handle_rest;
2091  Label* above = &handle_rest;
2092  if (border == last + 1) {
2093  // We didn't find any section that started after the limit, so everything
2094  // above the border is one of the terminal labels.
2095  above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
2096  ASSERT(new_end_index == end_index - 1);
2097  }
2098 
2099  ASSERT_LE(start_index, new_end_index);
2100  ASSERT_LE(new_start_index, end_index);
2101  ASSERT_LT(start_index, new_start_index);
2102  ASSERT_LT(new_end_index, end_index);
2103  ASSERT(new_end_index + 1 == new_start_index ||
2104  (new_end_index + 2 == new_start_index &&
2105  border == ranges->at(new_end_index + 1)));
2106  ASSERT_LT(min_char, border - 1);
2107  ASSERT_LT(border, max_char);
2108  ASSERT_LT(ranges->at(new_end_index), border);
2109  ASSERT(border < ranges->at(new_start_index) ||
2110  (border == ranges->at(new_start_index) &&
2111  new_start_index == end_index &&
2112  new_end_index == end_index - 1 &&
2113  border == last + 1));
2114  ASSERT(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
2115 
2116  masm->CheckCharacterGT(border - 1, above);
2117  Label dummy;
2118  GenerateBranches(masm,
2119  ranges,
2120  start_index,
2121  new_end_index,
2122  min_char,
2123  border - 1,
2124  &dummy,
2125  even_label,
2126  odd_label);
2127  if (handle_rest.is_linked()) {
2128  masm->Bind(&handle_rest);
2129  bool flip = (new_start_index & 1) != (start_index & 1);
2130  GenerateBranches(masm,
2131  ranges,
2132  new_start_index,
2133  end_index,
2134  border,
2135  max_char,
2136  &dummy,
2137  flip ? odd_label : even_label,
2138  flip ? even_label : odd_label);
2139  }
2140 }
2141 
2142 
2143 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2144  RegExpCharacterClass* cc,
2145  bool ascii,
2146  Label* on_failure,
2147  int cp_offset,
2148  bool check_offset,
2149  bool preloaded,
2150  Zone* zone) {
2151  ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2152  if (!CharacterRange::IsCanonical(ranges)) {
2154  }
2155 
2156  int max_char;
2157  if (ascii) {
2158  max_char = String::kMaxOneByteCharCode;
2159  } else {
2160  max_char = String::kMaxUtf16CodeUnit;
2161  }
2162 
2163  int range_count = ranges->length();
2164 
2165  int last_valid_range = range_count - 1;
2166  while (last_valid_range >= 0) {
2167  CharacterRange& range = ranges->at(last_valid_range);
2168  if (range.from() <= max_char) {
2169  break;
2170  }
2171  last_valid_range--;
2172  }
2173 
2174  if (last_valid_range < 0) {
2175  if (!cc->is_negated()) {
2176  macro_assembler->GoTo(on_failure);
2177  }
2178  if (check_offset) {
2179  macro_assembler->CheckPosition(cp_offset, on_failure);
2180  }
2181  return;
2182  }
2183 
2184  if (last_valid_range == 0 &&
2185  ranges->at(0).IsEverything(max_char)) {
2186  if (cc->is_negated()) {
2187  macro_assembler->GoTo(on_failure);
2188  } else {
2189  // This is a common case hit by non-anchored expressions.
2190  if (check_offset) {
2191  macro_assembler->CheckPosition(cp_offset, on_failure);
2192  }
2193  }
2194  return;
2195  }
2196  if (last_valid_range == 0 &&
2197  !cc->is_negated() &&
2198  ranges->at(0).IsEverything(max_char)) {
2199  // This is a common case hit by non-anchored expressions.
2200  if (check_offset) {
2201  macro_assembler->CheckPosition(cp_offset, on_failure);
2202  }
2203  return;
2204  }
2205 
2206  if (!preloaded) {
2207  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2208  }
2209 
2210  if (cc->is_standard(zone) &&
2211  macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2212  on_failure)) {
2213  return;
2214  }
2215 
2216 
2217  // A new list with ascending entries. Each entry is a code unit
2218  // where there is a boundary between code units that are part of
2219  // the class and code units that are not. Normally we insert an
2220  // entry at zero which goes to the failure label, but if there
2221  // was already one there we fall through for success on that entry.
2222  // Subsequent entries have alternating meaning (success/failure).
2223  ZoneList<int>* range_boundaries =
2224  new(zone) ZoneList<int>(last_valid_range, zone);
2225 
2226  bool zeroth_entry_is_failure = !cc->is_negated();
2227 
2228  for (int i = 0; i <= last_valid_range; i++) {
2229  CharacterRange& range = ranges->at(i);
2230  if (range.from() == 0) {
2231  ASSERT_EQ(i, 0);
2232  zeroth_entry_is_failure = !zeroth_entry_is_failure;
2233  } else {
2234  range_boundaries->Add(range.from(), zone);
2235  }
2236  range_boundaries->Add(range.to() + 1, zone);
2237  }
2238  int end_index = range_boundaries->length() - 1;
2239  if (range_boundaries->at(end_index) > max_char) {
2240  end_index--;
2241  }
2242 
2243  Label fall_through;
2244  GenerateBranches(macro_assembler,
2245  range_boundaries,
2246  0, // start_index.
2247  end_index,
2248  0, // min_char.
2249  max_char,
2250  &fall_through,
2251  zeroth_entry_is_failure ? &fall_through : on_failure,
2252  zeroth_entry_is_failure ? on_failure : &fall_through);
2253  macro_assembler->Bind(&fall_through);
2254 }
2255 
2256 
2258 }
2259 
2260 
2262  Trace* trace) {
2263  // If we are generating a greedy loop then don't stop and don't reuse code.
2264  if (trace->stop_node() != NULL) {
2265  return CONTINUE;
2266  }
2267 
2268  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2269  if (trace->is_trivial()) {
2270  if (label_.is_bound()) {
2271  // We are being asked to generate a generic version, but that's already
2272  // been done so just go to it.
2273  macro_assembler->GoTo(&label_);
2274  return DONE;
2275  }
2276  if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
2277  // To avoid too deep recursion we push the node to the work queue and just
2278  // generate a goto here.
2279  compiler->AddWork(this);
2280  macro_assembler->GoTo(&label_);
2281  return DONE;
2282  }
2283  // Generate generic version of the node and bind the label for later use.
2284  macro_assembler->Bind(&label_);
2285  return CONTINUE;
2286  }
2287 
2288  // We are being asked to make a non-generic version. Keep track of how many
2289  // non-generic versions we generate so as not to overdo it.
2290  trace_count_++;
2291  if (FLAG_regexp_optimization &&
2292  trace_count_ < kMaxCopiesCodeGenerated &&
2294  return CONTINUE;
2295  }
2296 
2297  // If we get here code has been generated for this node too many times or
2298  // recursion is too deep. Time to switch to a generic version. The code for
2299  // generic versions above can handle deep recursion properly.
2300  trace->Flush(compiler, this);
2301  return DONE;
2302 }
2303 
2304 
2305 int ActionNode::EatsAtLeast(int still_to_find,
2306  int budget,
2307  bool not_at_start) {
2308  if (budget <= 0) return 0;
2309  if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2310  return on_success()->EatsAtLeast(still_to_find,
2311  budget - 1,
2312  not_at_start);
2313 }
2314 
2315 
2317  int budget,
2318  BoyerMooreLookahead* bm,
2319  bool not_at_start) {
2320  if (action_type_ == BEGIN_SUBMATCH) {
2321  bm->SetRest(offset);
2322  } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
2323  on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
2324  }
2325  SaveBMInfo(bm, not_at_start, offset);
2326 }
2327 
2328 
2329 int AssertionNode::EatsAtLeast(int still_to_find,
2330  int budget,
2331  bool not_at_start) {
2332  if (budget <= 0) return 0;
2333  // If we know we are not at the start and we are asked "how many characters
2334  // will you match if you succeed?" then we can answer anything since false
2335  // implies false. So lets just return the max answer (still_to_find) since
2336  // that won't prevent us from preloading a lot of characters for the other
2337  // branches in the node graph.
2338  if (assertion_type() == AT_START && not_at_start) return still_to_find;
2339  return on_success()->EatsAtLeast(still_to_find,
2340  budget - 1,
2341  not_at_start);
2342 }
2343 
2344 
2346  int budget,
2347  BoyerMooreLookahead* bm,
2348  bool not_at_start) {
2349  // Match the behaviour of EatsAtLeast on this node.
2350  if (assertion_type() == AT_START && not_at_start) return;
2351  on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
2352  SaveBMInfo(bm, not_at_start, offset);
2353 }
2354 
2355 
2356 int BackReferenceNode::EatsAtLeast(int still_to_find,
2357  int budget,
2358  bool not_at_start) {
2359  if (budget <= 0) return 0;
2360  return on_success()->EatsAtLeast(still_to_find,
2361  budget - 1,
2362  not_at_start);
2363 }
2364 
2365 
2366 int TextNode::EatsAtLeast(int still_to_find,
2367  int budget,
2368  bool not_at_start) {
2369  int answer = Length();
2370  if (answer >= still_to_find) return answer;
2371  if (budget <= 0) return answer;
2372  // We are not at start after this node so we set the last argument to 'true'.
2373  return answer + on_success()->EatsAtLeast(still_to_find - answer,
2374  budget - 1,
2375  true);
2376 }
2377 
2378 
2380  int budget,
2381  bool not_at_start) {
2382  if (budget <= 0) return 0;
2383  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2384  // afterwards.
2385  RegExpNode* node = alternatives_->at(1).node();
2386  return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
2387 }
2388 
2389 
2391  QuickCheckDetails* details,
2392  RegExpCompiler* compiler,
2393  int filled_in,
2394  bool not_at_start) {
2395  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2396  // afterwards.
2397  RegExpNode* node = alternatives_->at(1).node();
2398  return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
2399 }
2400 
2401 
2402 int ChoiceNode::EatsAtLeastHelper(int still_to_find,
2403  int budget,
2404  RegExpNode* ignore_this_node,
2405  bool not_at_start) {
2406  if (budget <= 0) return 0;
2407  int min = 100;
2408  int choice_count = alternatives_->length();
2409  budget = (budget - 1) / choice_count;
2410  for (int i = 0; i < choice_count; i++) {
2411  RegExpNode* node = alternatives_->at(i).node();
2412  if (node == ignore_this_node) continue;
2413  int node_eats_at_least =
2414  node->EatsAtLeast(still_to_find, budget, not_at_start);
2415  if (node_eats_at_least < min) min = node_eats_at_least;
2416  if (min == 0) return 0;
2417  }
2418  return min;
2419 }
2420 
2421 
2422 int LoopChoiceNode::EatsAtLeast(int still_to_find,
2423  int budget,
2424  bool not_at_start) {
2425  return EatsAtLeastHelper(still_to_find,
2426  budget - 1,
2427  loop_node_,
2428  not_at_start);
2429 }
2430 
2431 
2432 int ChoiceNode::EatsAtLeast(int still_to_find,
2433  int budget,
2434  bool not_at_start) {
2435  return EatsAtLeastHelper(still_to_find,
2436  budget,
2437  NULL,
2438  not_at_start);
2439 }
2440 
2441 
2442 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
2443 static inline uint32_t SmearBitsRight(uint32_t v) {
2444  v |= v >> 1;
2445  v |= v >> 2;
2446  v |= v >> 4;
2447  v |= v >> 8;
2448  v |= v >> 16;
2449  return v;
2450 }
2451 
2452 
2454  bool found_useful_op = false;
2455  uint32_t char_mask;
2456  if (asc) {
2457  char_mask = String::kMaxOneByteCharCode;
2458  } else {
2459  char_mask = String::kMaxUtf16CodeUnit;
2460  }
2461  mask_ = 0;
2462  value_ = 0;
2463  int char_shift = 0;
2464  for (int i = 0; i < characters_; i++) {
2465  Position* pos = &positions_[i];
2466  if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
2467  found_useful_op = true;
2468  }
2469  mask_ |= (pos->mask & char_mask) << char_shift;
2470  value_ |= (pos->value & char_mask) << char_shift;
2471  char_shift += asc ? 8 : 16;
2472  }
2473  return found_useful_op;
2474 }
2475 
2476 
2478  Trace* trace,
2479  bool preload_has_checked_bounds,
2480  Label* on_possible_success,
2481  QuickCheckDetails* details,
2482  bool fall_through_on_failure) {
2483  if (details->characters() == 0) return false;
2484  GetQuickCheckDetails(
2485  details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
2486  if (details->cannot_match()) return false;
2487  if (!details->Rationalize(compiler->ascii())) return false;
2488  ASSERT(details->characters() == 1 ||
2489  compiler->macro_assembler()->CanReadUnaligned());
2490  uint32_t mask = details->mask();
2491  uint32_t value = details->value();
2492 
2493  RegExpMacroAssembler* assembler = compiler->macro_assembler();
2494 
2495  if (trace->characters_preloaded() != details->characters()) {
2496  assembler->LoadCurrentCharacter(trace->cp_offset(),
2497  trace->backtrack(),
2498  !preload_has_checked_bounds,
2499  details->characters());
2500  }
2501 
2502 
2503  bool need_mask = true;
2504 
2505  if (details->characters() == 1) {
2506  // If number of characters preloaded is 1 then we used a byte or 16 bit
2507  // load so the value is already masked down.
2508  uint32_t char_mask;
2509  if (compiler->ascii()) {
2510  char_mask = String::kMaxOneByteCharCode;
2511  } else {
2512  char_mask = String::kMaxUtf16CodeUnit;
2513  }
2514  if ((mask & char_mask) == char_mask) need_mask = false;
2515  mask &= char_mask;
2516  } else {
2517  // For 2-character preloads in ASCII mode or 1-character preloads in
2518  // TWO_BYTE mode we also use a 16 bit load with zero extend.
2519  if (details->characters() == 2 && compiler->ascii()) {
2520  if ((mask & 0xffff) == 0xffff) need_mask = false;
2521  } else if (details->characters() == 1 && !compiler->ascii()) {
2522  if ((mask & 0xffff) == 0xffff) need_mask = false;
2523  } else {
2524  if (mask == 0xffffffff) need_mask = false;
2525  }
2526  }
2527 
2528  if (fall_through_on_failure) {
2529  if (need_mask) {
2530  assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2531  } else {
2532  assembler->CheckCharacter(value, on_possible_success);
2533  }
2534  } else {
2535  if (need_mask) {
2536  assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
2537  } else {
2538  assembler->CheckNotCharacter(value, trace->backtrack());
2539  }
2540  }
2541  return true;
2542 }
2543 
2544 
2545 // Here is the meat of GetQuickCheckDetails (see also the comment on the
2546 // super-class in the .h file).
2547 //
2548 // We iterate along the text object, building up for each character a
2549 // mask and value that can be used to test for a quick failure to match.
2550 // The masks and values for the positions will be combined into a single
2551 // machine word for the current character width in order to be used in
2552 // generating a quick check.
2554  RegExpCompiler* compiler,
2555  int characters_filled_in,
2556  bool not_at_start) {
2557  Isolate* isolate = compiler->macro_assembler()->zone()->isolate();
2558  ASSERT(characters_filled_in < details->characters());
2559  int characters = details->characters();
2560  int char_mask;
2561  if (compiler->ascii()) {
2562  char_mask = String::kMaxOneByteCharCode;
2563  } else {
2564  char_mask = String::kMaxUtf16CodeUnit;
2565  }
2566  for (int k = 0; k < elms_->length(); k++) {
2567  TextElement elm = elms_->at(k);
2568  if (elm.text_type() == TextElement::ATOM) {
2569  Vector<const uc16> quarks = elm.atom()->data();
2570  for (int i = 0; i < characters && i < quarks.length(); i++) {
2572  details->positions(characters_filled_in);
2573  uc16 c = quarks[i];
2574  if (c > char_mask) {
2575  // If we expect a non-ASCII character from an ASCII string,
2576  // there is no way we can match. Not even case independent
2577  // matching can turn an ASCII character into non-ASCII or
2578  // vice versa.
2579  details->set_cannot_match();
2580  pos->determines_perfectly = false;
2581  return;
2582  }
2583  if (compiler->ignore_case()) {
2585  int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
2586  chars);
2587  ASSERT(length != 0); // Can only happen if c > char_mask (see above).
2588  if (length == 1) {
2589  // This letter has no case equivalents, so it's nice and simple
2590  // and the mask-compare will determine definitely whether we have
2591  // a match at this character position.
2592  pos->mask = char_mask;
2593  pos->value = c;
2594  pos->determines_perfectly = true;
2595  } else {
2596  uint32_t common_bits = char_mask;
2597  uint32_t bits = chars[0];
2598  for (int j = 1; j < length; j++) {
2599  uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2600  common_bits ^= differing_bits;
2601  bits &= common_bits;
2602  }
2603  // If length is 2 and common bits has only one zero in it then
2604  // our mask and compare instruction will determine definitely
2605  // whether we have a match at this character position. Otherwise
2606  // it can only be an approximate check.
2607  uint32_t one_zero = (common_bits | ~char_mask);
2608  if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2609  pos->determines_perfectly = true;
2610  }
2611  pos->mask = common_bits;
2612  pos->value = bits;
2613  }
2614  } else {
2615  // Don't ignore case. Nice simple case where the mask-compare will
2616  // determine definitely whether we have a match at this character
2617  // position.
2618  pos->mask = char_mask;
2619  pos->value = c;
2620  pos->determines_perfectly = true;
2621  }
2622  characters_filled_in++;
2623  ASSERT(characters_filled_in <= details->characters());
2624  if (characters_filled_in == details->characters()) {
2625  return;
2626  }
2627  }
2628  } else {
2630  details->positions(characters_filled_in);
2631  RegExpCharacterClass* tree = elm.char_class();
2632  ZoneList<CharacterRange>* ranges = tree->ranges(zone());
2633  if (tree->is_negated()) {
2634  // A quick check uses multi-character mask and compare. There is no
2635  // useful way to incorporate a negative char class into this scheme
2636  // so we just conservatively create a mask and value that will always
2637  // succeed.
2638  pos->mask = 0;
2639  pos->value = 0;
2640  } else {
2641  int first_range = 0;
2642  while (ranges->at(first_range).from() > char_mask) {
2643  first_range++;
2644  if (first_range == ranges->length()) {
2645  details->set_cannot_match();
2646  pos->determines_perfectly = false;
2647  return;
2648  }
2649  }
2650  CharacterRange range = ranges->at(first_range);
2651  uc16 from = range.from();
2652  uc16 to = range.to();
2653  if (to > char_mask) {
2654  to = char_mask;
2655  }
2656  uint32_t differing_bits = (from ^ to);
2657  // A mask and compare is only perfect if the differing bits form a
2658  // number like 00011111 with one single block of trailing 1s.
2659  if ((differing_bits & (differing_bits + 1)) == 0 &&
2660  from + differing_bits == to) {
2661  pos->determines_perfectly = true;
2662  }
2663  uint32_t common_bits = ~SmearBitsRight(differing_bits);
2664  uint32_t bits = (from & common_bits);
2665  for (int i = first_range + 1; i < ranges->length(); i++) {
2666  CharacterRange range = ranges->at(i);
2667  uc16 from = range.from();
2668  uc16 to = range.to();
2669  if (from > char_mask) continue;
2670  if (to > char_mask) to = char_mask;
2671  // Here we are combining more ranges into the mask and compare
2672  // value. With each new range the mask becomes more sparse and
2673  // so the chances of a false positive rise. A character class
2674  // with multiple ranges is assumed never to be equivalent to a
2675  // mask and compare operation.
2676  pos->determines_perfectly = false;
2677  uint32_t new_common_bits = (from ^ to);
2678  new_common_bits = ~SmearBitsRight(new_common_bits);
2679  common_bits &= new_common_bits;
2680  bits &= new_common_bits;
2681  uint32_t differing_bits = (from & common_bits) ^ bits;
2682  common_bits ^= differing_bits;
2683  bits &= common_bits;
2684  }
2685  pos->mask = common_bits;
2686  pos->value = bits;
2687  }
2688  characters_filled_in++;
2689  ASSERT(characters_filled_in <= details->characters());
2690  if (characters_filled_in == details->characters()) {
2691  return;
2692  }
2693  }
2694  }
2695  ASSERT(characters_filled_in != details->characters());
2696  if (!details->cannot_match()) {
2697  on_success()-> GetQuickCheckDetails(details,
2698  compiler,
2699  characters_filled_in,
2700  true);
2701  }
2702 }
2703 
2704 
2706  for (int i = 0; i < characters_; i++) {
2707  positions_[i].mask = 0;
2708  positions_[i].value = 0;
2709  positions_[i].determines_perfectly = false;
2710  }
2711  characters_ = 0;
2712 }
2713 
2714 
2715 void QuickCheckDetails::Advance(int by, bool ascii) {
2716  ASSERT(by >= 0);
2717  if (by >= characters_) {
2718  Clear();
2719  return;
2720  }
2721  for (int i = 0; i < characters_ - by; i++) {
2722  positions_[i] = positions_[by + i];
2723  }
2724  for (int i = characters_ - by; i < characters_; i++) {
2725  positions_[i].mask = 0;
2726  positions_[i].value = 0;
2727  positions_[i].determines_perfectly = false;
2728  }
2729  characters_ -= by;
2730  // We could change mask_ and value_ here but we would never advance unless
2731  // they had already been used in a check and they won't be used again because
2732  // it would gain us nothing. So there's no point.
2733 }
2734 
2735 
2736 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
2737  ASSERT(characters_ == other->characters_);
2738  if (other->cannot_match_) {
2739  return;
2740  }
2741  if (cannot_match_) {
2742  *this = *other;
2743  return;
2744  }
2745  for (int i = from_index; i < characters_; i++) {
2746  QuickCheckDetails::Position* pos = positions(i);
2747  QuickCheckDetails::Position* other_pos = other->positions(i);
2748  if (pos->mask != other_pos->mask ||
2749  pos->value != other_pos->value ||
2750  !other_pos->determines_perfectly) {
2751  // Our mask-compare operation will be approximate unless we have the
2752  // exact same operation on both sides of the alternation.
2753  pos->determines_perfectly = false;
2754  }
2755  pos->mask &= other_pos->mask;
2756  pos->value &= pos->mask;
2757  other_pos->value &= pos->mask;
2758  uc16 differing_bits = (pos->value ^ other_pos->value);
2759  pos->mask &= ~differing_bits;
2760  pos->value &= pos->mask;
2761  }
2762 }
2763 
2764 
2766  public:
2767  explicit VisitMarker(NodeInfo* info) : info_(info) {
2768  ASSERT(!info->visited);
2769  info->visited = true;
2770  }
2772  info_->visited = false;
2773  }
2774  private:
2775  NodeInfo* info_;
2776 };
2777 
2778 
2779 RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
2780  if (info()->replacement_calculated) return replacement();
2781  if (depth < 0) return this;
2782  ASSERT(!info()->visited);
2783  VisitMarker marker(info());
2784  return FilterSuccessor(depth - 1, ignore_case);
2785 }
2786 
2787 
2788 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
2789  RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
2790  if (next == NULL) return set_replacement(NULL);
2791  on_success_ = next;
2792  return set_replacement(this);
2793 }
2794 
2795 
2796 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2797 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2798  // TODO(dcarney): this could be a lot more efficient.
2799  return range.Contains(0x39c) ||
2800  range.Contains(0x3bc) || range.Contains(0x178);
2801 }
2802 
2803 
2804 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2805  for (int i = 0; i < ranges->length(); i++) {
2806  // TODO(dcarney): this could be a lot more efficient.
2807  if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
2808  }
2809  return false;
2810 }
2811 
2812 
2813 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
2814  if (info()->replacement_calculated) return replacement();
2815  if (depth < 0) return this;
2816  ASSERT(!info()->visited);
2817  VisitMarker marker(info());
2818  int element_count = elms_->length();
2819  for (int i = 0; i < element_count; i++) {
2820  TextElement elm = elms_->at(i);
2821  if (elm.text_type() == TextElement::ATOM) {
2822  Vector<const uc16> quarks = elm.atom()->data();
2823  for (int j = 0; j < quarks.length(); j++) {
2824  uint16_t c = quarks[j];
2825  if (c <= String::kMaxOneByteCharCode) continue;
2826  if (!ignore_case) return set_replacement(NULL);
2827  // Here, we need to check for characters whose upper and lower cases
2828  // are outside the Latin-1 range.
2830  // Character is outside Latin-1 completely
2831  if (converted == 0) return set_replacement(NULL);
2832  // Convert quark to Latin-1 in place.
2833  uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2834  copy[j] = converted;
2835  }
2836  } else {
2837  ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2838  RegExpCharacterClass* cc = elm.char_class();
2839  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2840  if (!CharacterRange::IsCanonical(ranges)) {
2842  }
2843  // Now they are in order so we only need to look at the first.
2844  int range_count = ranges->length();
2845  if (cc->is_negated()) {
2846  if (range_count != 0 &&
2847  ranges->at(0).from() == 0 &&
2848  ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2849  // This will be handled in a later filter.
2850  if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2851  return set_replacement(NULL);
2852  }
2853  } else {
2854  if (range_count == 0 ||
2855  ranges->at(0).from() > String::kMaxOneByteCharCode) {
2856  // This will be handled in a later filter.
2857  if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2858  return set_replacement(NULL);
2859  }
2860  }
2861  }
2862  }
2863  return FilterSuccessor(depth - 1, ignore_case);
2864 }
2865 
2866 
2867 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
2868  if (info()->replacement_calculated) return replacement();
2869  if (depth < 0) return this;
2870  if (info()->visited) return this;
2871  {
2872  VisitMarker marker(info());
2873 
2874  RegExpNode* continue_replacement =
2875  continue_node_->FilterASCII(depth - 1, ignore_case);
2876  // If we can't continue after the loop then there is no sense in doing the
2877  // loop.
2878  if (continue_replacement == NULL) return set_replacement(NULL);
2879  }
2880 
2881  return ChoiceNode::FilterASCII(depth - 1, ignore_case);
2882 }
2883 
2884 
2885 RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
2886  if (info()->replacement_calculated) return replacement();
2887  if (depth < 0) return this;
2888  if (info()->visited) return this;
2889  VisitMarker marker(info());
2890  int choice_count = alternatives_->length();
2891 
2892  for (int i = 0; i < choice_count; i++) {
2893  GuardedAlternative alternative = alternatives_->at(i);
2894  if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
2895  set_replacement(this);
2896  return this;
2897  }
2898  }
2899 
2900  int surviving = 0;
2901  RegExpNode* survivor = NULL;
2902  for (int i = 0; i < choice_count; i++) {
2903  GuardedAlternative alternative = alternatives_->at(i);
2904  RegExpNode* replacement =
2905  alternative.node()->FilterASCII(depth - 1, ignore_case);
2906  ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
2907  if (replacement != NULL) {
2908  alternatives_->at(i).set_node(replacement);
2909  surviving++;
2910  survivor = replacement;
2911  }
2912  }
2913  if (surviving < 2) return set_replacement(survivor);
2914 
2915  set_replacement(this);
2916  if (surviving == choice_count) {
2917  return this;
2918  }
2919  // Only some of the nodes survived the filtering. We need to rebuild the
2920  // alternatives list.
2921  ZoneList<GuardedAlternative>* new_alternatives =
2922  new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
2923  for (int i = 0; i < choice_count; i++) {
2924  RegExpNode* replacement =
2925  alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);
2926  if (replacement != NULL) {
2927  alternatives_->at(i).set_node(replacement);
2928  new_alternatives->Add(alternatives_->at(i), zone());
2929  }
2930  }
2931  alternatives_ = new_alternatives;
2932  return this;
2933 }
2934 
2935 
2937  bool ignore_case) {
2938  if (info()->replacement_calculated) return replacement();
2939  if (depth < 0) return this;
2940  if (info()->visited) return this;
2941  VisitMarker marker(info());
2942  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2943  // afterwards.
2944  RegExpNode* node = alternatives_->at(1).node();
2945  RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);
2946  if (replacement == NULL) return set_replacement(NULL);
2947  alternatives_->at(1).set_node(replacement);
2948 
2949  RegExpNode* neg_node = alternatives_->at(0).node();
2950  RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);
2951  // If the negative lookahead is always going to fail then
2952  // we don't need to check it.
2953  if (neg_replacement == NULL) return set_replacement(replacement);
2954  alternatives_->at(0).set_node(neg_replacement);
2955  return set_replacement(this);
2956 }
2957 
2958 
2960  RegExpCompiler* compiler,
2961  int characters_filled_in,
2962  bool not_at_start) {
2963  if (body_can_be_zero_length_ || info()->visited) return;
2964  VisitMarker marker(info());
2965  return ChoiceNode::GetQuickCheckDetails(details,
2966  compiler,
2967  characters_filled_in,
2968  not_at_start);
2969 }
2970 
2971 
2973  int budget,
2974  BoyerMooreLookahead* bm,
2975  bool not_at_start) {
2976  if (body_can_be_zero_length_ || budget <= 0) {
2977  bm->SetRest(offset);
2978  SaveBMInfo(bm, not_at_start, offset);
2979  return;
2980  }
2981  ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
2982  SaveBMInfo(bm, not_at_start, offset);
2983 }
2984 
2985 
2987  RegExpCompiler* compiler,
2988  int characters_filled_in,
2989  bool not_at_start) {
2990  not_at_start = (not_at_start || not_at_start_);
2991  int choice_count = alternatives_->length();
2992  ASSERT(choice_count > 0);
2993  alternatives_->at(0).node()->GetQuickCheckDetails(details,
2994  compiler,
2995  characters_filled_in,
2996  not_at_start);
2997  for (int i = 1; i < choice_count; i++) {
2998  QuickCheckDetails new_details(details->characters());
2999  RegExpNode* node = alternatives_->at(i).node();
3000  node->GetQuickCheckDetails(&new_details, compiler,
3001  characters_filled_in,
3002  not_at_start);
3003  // Here we merge the quick match details of the two branches.
3004  details->Merge(&new_details, characters_filled_in);
3005  }
3006 }
3007 
3008 
3009 // Check for [0-9A-Z_a-z].
3010 static void EmitWordCheck(RegExpMacroAssembler* assembler,
3011  Label* word,
3012  Label* non_word,
3013  bool fall_through_on_word) {
3014  if (assembler->CheckSpecialCharacterClass(
3015  fall_through_on_word ? 'w' : 'W',
3016  fall_through_on_word ? non_word : word)) {
3017  // Optimized implementation available.
3018  return;
3019  }
3020  assembler->CheckCharacterGT('z', non_word);
3021  assembler->CheckCharacterLT('0', non_word);
3022  assembler->CheckCharacterGT('a' - 1, word);
3023  assembler->CheckCharacterLT('9' + 1, word);
3024  assembler->CheckCharacterLT('A', non_word);
3025  assembler->CheckCharacterLT('Z' + 1, word);
3026  if (fall_through_on_word) {
3027  assembler->CheckNotCharacter('_', non_word);
3028  } else {
3029  assembler->CheckCharacter('_', word);
3030  }
3031 }
3032 
3033 
3034 // Emit the code to check for a ^ in multiline mode (1-character lookbehind
3035 // that matches newline or the start of input).
3036 static void EmitHat(RegExpCompiler* compiler,
3037  RegExpNode* on_success,
3038  Trace* trace) {
3039  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3040  // We will be loading the previous character into the current character
3041  // register.
3042  Trace new_trace(*trace);
3043  new_trace.InvalidateCurrentCharacter();
3044 
3045  Label ok;
3046  if (new_trace.cp_offset() == 0) {
3047  // The start of input counts as a newline in this context, so skip to
3048  // ok if we are at the start.
3049  assembler->CheckAtStart(&ok);
3050  }
3051  // We already checked that we are not at the start of input so it must be
3052  // OK to load the previous character.
3053  assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
3054  new_trace.backtrack(),
3055  false);
3056  if (!assembler->CheckSpecialCharacterClass('n',
3057  new_trace.backtrack())) {
3058  // Newline means \n, \r, 0x2028 or 0x2029.
3059  if (!compiler->ascii()) {
3060  assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
3061  }
3062  assembler->CheckCharacter('\n', &ok);
3063  assembler->CheckNotCharacter('\r', new_trace.backtrack());
3064  }
3065  assembler->Bind(&ok);
3066  on_success->Emit(compiler, &new_trace);
3067 }
3068 
3069 
3070 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
3071 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
3072  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3073  Trace::TriBool next_is_word_character = Trace::UNKNOWN;
3074  bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
3075  BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3076  if (lookahead == NULL) {
3077  int eats_at_least =
3079  kRecursionBudget,
3080  not_at_start));
3081  if (eats_at_least >= 1) {
3082  BoyerMooreLookahead* bm =
3083  new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
3084  FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
3085  if (bm->at(0)->is_non_word())
3086  next_is_word_character = Trace::FALSE_VALUE;
3087  if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
3088  }
3089  } else {
3090  if (lookahead->at(0)->is_non_word())
3091  next_is_word_character = Trace::FALSE_VALUE;
3092  if (lookahead->at(0)->is_word())
3093  next_is_word_character = Trace::TRUE_VALUE;
3094  }
3095  bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
3096  if (next_is_word_character == Trace::UNKNOWN) {
3097  Label before_non_word;
3098  Label before_word;
3099  if (trace->characters_preloaded() != 1) {
3100  assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
3101  }
3102  // Fall through on non-word.
3103  EmitWordCheck(assembler, &before_word, &before_non_word, false);
3104  // Next character is not a word character.
3105  assembler->Bind(&before_non_word);
3106  Label ok;
3107  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3108  assembler->GoTo(&ok);
3109 
3110  assembler->Bind(&before_word);
3111  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3112  assembler->Bind(&ok);
3113  } else if (next_is_word_character == Trace::TRUE_VALUE) {
3114  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3115  } else {
3116  ASSERT(next_is_word_character == Trace::FALSE_VALUE);
3117  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3118  }
3119 }
3120 
3121 
3122 void AssertionNode::BacktrackIfPrevious(
3123  RegExpCompiler* compiler,
3124  Trace* trace,
3125  AssertionNode::IfPrevious backtrack_if_previous) {
3126  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3127  Trace new_trace(*trace);
3128  new_trace.InvalidateCurrentCharacter();
3129 
3130  Label fall_through, dummy;
3131 
3132  Label* non_word = backtrack_if_previous == kIsNonWord ?
3133  new_trace.backtrack() :
3134  &fall_through;
3135  Label* word = backtrack_if_previous == kIsNonWord ?
3136  &fall_through :
3137  new_trace.backtrack();
3138 
3139  if (new_trace.cp_offset() == 0) {
3140  // The start of input counts as a non-word character, so the question is
3141  // decided if we are at the start.
3142  assembler->CheckAtStart(non_word);
3143  }
3144  // We already checked that we are not at the start of input so it must be
3145  // OK to load the previous character.
3146  assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
3147  EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
3148 
3149  assembler->Bind(&fall_through);
3150  on_success()->Emit(compiler, &new_trace);
3151 }
3152 
3153 
3155  RegExpCompiler* compiler,
3156  int filled_in,
3157  bool not_at_start) {
3158  if (assertion_type_ == AT_START && not_at_start) {
3159  details->set_cannot_match();
3160  return;
3161  }
3162  return on_success()->GetQuickCheckDetails(details,
3163  compiler,
3164  filled_in,
3165  not_at_start);
3166 }
3167 
3168 
3169 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3170  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3171  switch (assertion_type_) {
3172  case AT_END: {
3173  Label ok;
3174  assembler->CheckPosition(trace->cp_offset(), &ok);
3175  assembler->GoTo(trace->backtrack());
3176  assembler->Bind(&ok);
3177  break;
3178  }
3179  case AT_START: {
3180  if (trace->at_start() == Trace::FALSE_VALUE) {
3181  assembler->GoTo(trace->backtrack());
3182  return;
3183  }
3184  if (trace->at_start() == Trace::UNKNOWN) {
3185  assembler->CheckNotAtStart(trace->backtrack());
3186  Trace at_start_trace = *trace;
3187  at_start_trace.set_at_start(true);
3188  on_success()->Emit(compiler, &at_start_trace);
3189  return;
3190  }
3191  }
3192  break;
3193  case AFTER_NEWLINE:
3194  EmitHat(compiler, on_success(), trace);
3195  return;
3196  case AT_BOUNDARY:
3197  case AT_NON_BOUNDARY: {
3198  EmitBoundaryCheck(compiler, trace);
3199  return;
3200  }
3201  }
3202  on_success()->Emit(compiler, trace);
3203 }
3204 
3205 
3206 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
3207  if (quick_check == NULL) return false;
3208  if (offset >= quick_check->characters()) return false;
3209  return quick_check->positions(offset)->determines_perfectly;
3210 }
3211 
3212 
3213 static void UpdateBoundsCheck(int index, int* checked_up_to) {
3214  if (index > *checked_up_to) {
3215  *checked_up_to = index;
3216  }
3217 }
3218 
3219 
3220 // We call this repeatedly to generate code for each pass over the text node.
3221 // The passes are in increasing order of difficulty because we hope one
3222 // of the first passes will fail in which case we are saved the work of the
3223 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
3224 // we will check the '%' in the first pass, the case independent 'a' in the
3225 // second pass and the character class in the last pass.
3226 //
3227 // The passes are done from right to left, so for example to test for /bar/
3228 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
3229 // and then a 'b' with offset 0. This means we can avoid the end-of-input
3230 // bounds check most of the time. In the example we only need to check for
3231 // end-of-input when loading the putative 'r'.
3232 //
3233 // A slight complication involves the fact that the first character may already
3234 // be fetched into a register by the previous node. In this case we want to
3235 // do the test for that character first. We do this in separate passes. The
3236 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a
3237 // pass has been performed then subsequent passes will have true in
3238 // first_element_checked to indicate that that character does not need to be
3239 // checked again.
3240 //
3241 // In addition to all this we are passed a Trace, which can
3242 // contain an AlternativeGeneration object. In this AlternativeGeneration
3243 // object we can see details of any quick check that was already passed in
3244 // order to get to the code we are now generating. The quick check can involve
3245 // loading characters, which means we do not need to recheck the bounds
3246 // up to the limit the quick check already checked. In addition the quick
3247 // check can have involved a mask and compare operation which may simplify
3248 // or obviate the need for further checks at some character positions.
3249 void TextNode::TextEmitPass(RegExpCompiler* compiler,
3250  TextEmitPassType pass,
3251  bool preloaded,
3252  Trace* trace,
3253  bool first_element_checked,
3254  int* checked_up_to) {
3255  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3256  Isolate* isolate = assembler->zone()->isolate();
3257  bool ascii = compiler->ascii();
3258  Label* backtrack = trace->backtrack();
3259  QuickCheckDetails* quick_check = trace->quick_check_performed();
3260  int element_count = elms_->length();
3261  for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3262  TextElement elm = elms_->at(i);
3263  int cp_offset = trace->cp_offset() + elm.cp_offset();
3264  if (elm.text_type() == TextElement::ATOM) {
3265  Vector<const uc16> quarks = elm.atom()->data();
3266  for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3267  if (first_element_checked && i == 0 && j == 0) continue;
3268  if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3269  EmitCharacterFunction* emit_function = NULL;
3270  switch (pass) {
3271  case NON_ASCII_MATCH:
3272  ASSERT(ascii);
3273  if (quarks[j] > String::kMaxOneByteCharCode) {
3274  assembler->GoTo(backtrack);
3275  return;
3276  }
3277  break;
3278  case NON_LETTER_CHARACTER_MATCH:
3279  emit_function = &EmitAtomNonLetter;
3280  break;
3281  case SIMPLE_CHARACTER_MATCH:
3282  emit_function = &EmitSimpleCharacter;
3283  break;
3284  case CASE_CHARACTER_MATCH:
3285  emit_function = &EmitAtomLetter;
3286  break;
3287  default:
3288  break;
3289  }
3290  if (emit_function != NULL) {
3291  bool bound_checked = emit_function(isolate,
3292  compiler,
3293  quarks[j],
3294  backtrack,
3295  cp_offset + j,
3296  *checked_up_to < cp_offset + j,
3297  preloaded);
3298  if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3299  }
3300  }
3301  } else {
3302  ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type());
3303  if (pass == CHARACTER_CLASS_MATCH) {
3304  if (first_element_checked && i == 0) continue;
3305  if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3306  RegExpCharacterClass* cc = elm.char_class();
3307  EmitCharClass(assembler,
3308  cc,
3309  ascii,
3310  backtrack,
3311  cp_offset,
3312  *checked_up_to < cp_offset,
3313  preloaded,
3314  zone());
3315  UpdateBoundsCheck(cp_offset, checked_up_to);
3316  }
3317  }
3318  }
3319 }
3320 
3321 
3322 int TextNode::Length() {
3323  TextElement elm = elms_->last();
3324  ASSERT(elm.cp_offset() >= 0);
3325  return elm.cp_offset() + elm.length();
3326 }
3327 
3328 
3329 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3330  TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3331  if (ignore_case) {
3332  return pass == SIMPLE_CHARACTER_MATCH;
3333  } else {
3334  return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3335  }
3336 }
3337 
3338 
3339 // This generates the code to match a text node. A text node can contain
3340 // straight character sequences (possibly to be matched in a case-independent
3341 // way) and character classes. For efficiency we do not do this in a single
3342 // pass from left to right. Instead we pass over the text node several times,
3343 // emitting code for some character positions every time. See the comment on
3344 // TextEmitPass for details.
3345 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3346  LimitResult limit_result = LimitVersions(compiler, trace);
3347  if (limit_result == DONE) return;
3348  ASSERT(limit_result == CONTINUE);
3349 
3350  if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
3351  compiler->SetRegExpTooBig();
3352  return;
3353  }
3354 
3355  if (compiler->ascii()) {
3356  int dummy = 0;
3357  TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
3358  }
3359 
3360  bool first_elt_done = false;
3361  int bound_checked_to = trace->cp_offset() - 1;
3362  bound_checked_to += trace->bound_checked_up_to();
3363 
3364  // If a character is preloaded into the current character register then
3365  // check that now.
3366  if (trace->characters_preloaded() == 1) {
3367  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3368  if (!SkipPass(pass, compiler->ignore_case())) {
3369  TextEmitPass(compiler,
3370  static_cast<TextEmitPassType>(pass),
3371  true,
3372  trace,
3373  false,
3374  &bound_checked_to);
3375  }
3376  }
3377  first_elt_done = true;
3378  }
3379 
3380  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3381  if (!SkipPass(pass, compiler->ignore_case())) {
3382  TextEmitPass(compiler,
3383  static_cast<TextEmitPassType>(pass),
3384  false,
3385  trace,
3386  first_elt_done,
3387  &bound_checked_to);
3388  }
3389  }
3390 
3391  Trace successor_trace(*trace);
3392  successor_trace.set_at_start(false);
3393  successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
3394  RecursionCheck rc(compiler);
3395  on_success()->Emit(compiler, &successor_trace);
3396 }
3397 
3398 
3400  characters_preloaded_ = 0;
3401 }
3402 
3403 
3405  ASSERT(by > 0);
3406  // We don't have an instruction for shifting the current character register
3407  // down or for using a shifted value for anything so lets just forget that
3408  // we preloaded any characters into it.
3409  characters_preloaded_ = 0;
3410  // Adjust the offsets of the quick check performed information. This
3411  // information is used to find out what we already determined about the
3412  // characters by means of mask and compare.
3413  quick_check_performed_.Advance(by, compiler->ascii());
3414  cp_offset_ += by;
3415  if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3416  compiler->SetRegExpTooBig();
3417  cp_offset_ = 0;
3418  }
3419  bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3420 }
3421 
3422 
3423 void TextNode::MakeCaseIndependent(bool is_ascii) {
3424  int element_count = elms_->length();
3425  for (int i = 0; i < element_count; i++) {
3426  TextElement elm = elms_->at(i);
3427  if (elm.text_type() == TextElement::CHAR_CLASS) {
3428  RegExpCharacterClass* cc = elm.char_class();
3429  // None of the standard character classes is different in the case
3430  // independent case and it slows us down if we don't know that.
3431  if (cc->is_standard(zone())) continue;
3432  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3433  int range_count = ranges->length();
3434  for (int j = 0; j < range_count; j++) {
3435  ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
3436  }
3437  }
3438  }
3439 }
3440 
3441 
3443  TextElement elm = elms_->at(elms_->length() - 1);
3444  return elm.cp_offset() + elm.length();
3445 }
3446 
3447 
3449  RegExpCompiler* compiler) {
3450  if (elms_->length() != 1) return NULL;
3451  TextElement elm = elms_->at(0);
3452  if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3453  RegExpCharacterClass* node = elm.char_class();
3454  ZoneList<CharacterRange>* ranges = node->ranges(zone());
3455  if (!CharacterRange::IsCanonical(ranges)) {
3457  }
3458  if (node->is_negated()) {
3459  return ranges->length() == 0 ? on_success() : NULL;
3460  }
3461  if (ranges->length() != 1) return NULL;
3462  uint32_t max_char;
3463  if (compiler->ascii()) {
3464  max_char = String::kMaxOneByteCharCode;
3465  } else {
3466  max_char = String::kMaxUtf16CodeUnit;
3467  }
3468  return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
3469 }
3470 
3471 
3472 // Finds the fixed match length of a sequence of nodes that goes from
3473 // this alternative and back to this choice node. If there are variable
3474 // length nodes or other complications in the way then return a sentinel
3475 // value indicating that a greedy loop cannot be constructed.
3477  GuardedAlternative* alternative) {
3478  int length = 0;
3479  RegExpNode* node = alternative->node();
3480  // Later we will generate code for all these text nodes using recursion
3481  // so we have to limit the max number.
3482  int recursion_depth = 0;
3483  while (node != this) {
3484  if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
3485  return kNodeIsTooComplexForGreedyLoops;
3486  }
3487  int node_length = node->GreedyLoopTextLength();
3488  if (node_length == kNodeIsTooComplexForGreedyLoops) {
3489  return kNodeIsTooComplexForGreedyLoops;
3490  }
3491  length += node_length;
3492  SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
3493  node = seq_node->on_success();
3494  }
3495  return length;
3496 }
3497 
3498 
3500  ASSERT_EQ(loop_node_, NULL);
3501  AddAlternative(alt);
3502  loop_node_ = alt.node();
3503 }
3504 
3505 
3507  ASSERT_EQ(continue_node_, NULL);
3508  AddAlternative(alt);
3509  continue_node_ = alt.node();
3510 }
3511 
3512 
3513 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3514  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3515  if (trace->stop_node() == this) {
3516  int text_length =
3517  GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3518  ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
3519  // Update the counter-based backtracking info on the stack. This is an
3520  // optimization for greedy loops (see below).
3521  ASSERT(trace->cp_offset() == text_length);
3522  macro_assembler->AdvanceCurrentPosition(text_length);
3523  macro_assembler->GoTo(trace->loop_label());
3524  return;
3525  }
3526  ASSERT(trace->stop_node() == NULL);
3527  if (!trace->is_trivial()) {
3528  trace->Flush(compiler, this);
3529  return;
3530  }
3531  ChoiceNode::Emit(compiler, trace);
3532 }
3533 
3534 
3535 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
3536  int eats_at_least) {
3537  int preload_characters = Min(4, eats_at_least);
3538  if (compiler->macro_assembler()->CanReadUnaligned()) {
3539  bool ascii = compiler->ascii();
3540  if (ascii) {
3541  if (preload_characters > 4) preload_characters = 4;
3542  // We can't preload 3 characters because there is no machine instruction
3543  // to do that. We can't just load 4 because we could be reading
3544  // beyond the end of the string, which could cause a memory fault.
3545  if (preload_characters == 3) preload_characters = 2;
3546  } else {
3547  if (preload_characters > 2) preload_characters = 2;
3548  }
3549  } else {
3550  if (preload_characters > 1) preload_characters = 1;
3551  }
3552  return preload_characters;
3553 }
3554 
3555 
3556 // This class is used when generating the alternatives in a choice node. It
3557 // records the way the alternative is being code generated.
3559  public:
3561  : possible_success(),
3562  expects_preload(false),
3563  after(),
3564  quick_check_details() { }
3567  Label after;
3569 };
3570 
3571 
3572 // Creates a list of AlternativeGenerations. If the list has a reasonable
3573 // size then it is on the stack, otherwise the excess is on the heap.
3575  public:
3577  : alt_gens_(count, zone) {
3578  for (int i = 0; i < count && i < kAFew; i++) {
3579  alt_gens_.Add(a_few_alt_gens_ + i, zone);
3580  }
3581  for (int i = kAFew; i < count; i++) {
3582  alt_gens_.Add(new AlternativeGeneration(), zone);
3583  }
3584  }
3586  for (int i = kAFew; i < alt_gens_.length(); i++) {
3587  delete alt_gens_[i];
3588  alt_gens_[i] = NULL;
3589  }
3590  }
3591 
3593  return alt_gens_[i];
3594  }
3595 
3596  private:
3597  static const int kAFew = 10;
3599  AlternativeGeneration a_few_alt_gens_[kAFew];
3600 };
3601 
3602 
3603 // The '2' variant is has inclusive from and exclusive to.
3604 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
3605 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
3606 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
3607  0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
3608  0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
3609  0xFEFF, 0xFF00, 0x10000 };
3610 static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
3611 
3612 static const int kWordRanges[] = {
3613  '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
3614 static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);
3615 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
3616 static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
3617 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
3618 static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
3619 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
3620  0x2028, 0x202A, 0x10000 };
3621 static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);
3622 
3623 
3624 void BoyerMoorePositionInfo::Set(int character) {
3625  SetInterval(Interval(character, character));
3626 }
3627 
3628 
3630  s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3631  w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
3632  d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3633  surrogate_ =
3634  AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
3635  if (interval.to() - interval.from() >= kMapSize - 1) {
3636  if (map_count_ != kMapSize) {
3637  map_count_ = kMapSize;
3638  for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3639  }
3640  return;
3641  }
3642  for (int i = interval.from(); i <= interval.to(); i++) {
3643  int mod_character = (i & kMask);
3644  if (!map_->at(mod_character)) {
3645  map_count_++;
3646  map_->at(mod_character) = true;
3647  }
3648  if (map_count_ == kMapSize) return;
3649  }
3650 }
3651 
3652 
3654  s_ = w_ = d_ = kLatticeUnknown;
3655  if (map_count_ != kMapSize) {
3656  map_count_ = kMapSize;
3657  for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3658  }
3659 }
3660 
3661 
3663  int length, RegExpCompiler* compiler, Zone* zone)
3664  : length_(length),
3665  compiler_(compiler) {
3666  if (compiler->ascii()) {
3667  max_char_ = String::kMaxOneByteCharCode;
3668  } else {
3669  max_char_ = String::kMaxUtf16CodeUnit;
3670  }
3671  bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
3672  for (int i = 0; i < length; i++) {
3673  bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
3674  }
3675 }
3676 
3677 
3678 // Find the longest range of lookahead that has the fewest number of different
3679 // characters that can occur at a given position. Since we are optimizing two
3680 // different parameters at once this is a tradeoff.
3681 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
3682  int biggest_points = 0;
3683  // If more than 32 characters out of 128 can occur it is unlikely that we can
3684  // be lucky enough to step forwards much of the time.
3685  const int kMaxMax = 32;
3686  for (int max_number_of_chars = 4;
3687  max_number_of_chars < kMaxMax;
3688  max_number_of_chars *= 2) {
3689  biggest_points =
3690  FindBestInterval(max_number_of_chars, biggest_points, from, to);
3691  }
3692  if (biggest_points == 0) return false;
3693  return true;
3694 }
3695 
3696 
3697 // Find the highest-points range between 0 and length_ where the character
3698 // information is not too vague. 'Too vague' means that there are more than
3699 // max_number_of_chars that can occur at this position. Calculates the number
3700 // of points as the product of width-of-the-range and
3701 // probability-of-finding-one-of-the-characters, where the probability is
3702 // calculated using the frequency distribution of the sample subject string.
3703 int BoyerMooreLookahead::FindBestInterval(
3704  int max_number_of_chars, int old_biggest_points, int* from, int* to) {
3705  int biggest_points = old_biggest_points;
3706  static const int kSize = RegExpMacroAssembler::kTableSize;
3707  for (int i = 0; i < length_; ) {
3708  while (i < length_ && Count(i) > max_number_of_chars) i++;
3709  if (i == length_) break;
3710  int remembered_from = i;
3711  bool union_map[kSize];
3712  for (int j = 0; j < kSize; j++) union_map[j] = false;
3713  while (i < length_ && Count(i) <= max_number_of_chars) {
3714  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3715  for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
3716  i++;
3717  }
3718  int frequency = 0;
3719  for (int j = 0; j < kSize; j++) {
3720  if (union_map[j]) {
3721  // Add 1 to the frequency to give a small per-character boost for
3722  // the cases where our sampling is not good enough and many
3723  // characters have a frequency of zero. This means the frequency
3724  // can theoretically be up to 2*kSize though we treat it mostly as
3725  // a fraction of kSize.
3726  frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3727  }
3728  }
3729  // We use the probability of skipping times the distance we are skipping to
3730  // judge the effectiveness of this. Actually we have a cut-off: By
3731  // dividing by 2 we switch off the skipping if the probability of skipping
3732  // is less than 50%. This is because the multibyte mask-and-compare
3733  // skipping in quickcheck is more likely to do well on this case.
3734  bool in_quickcheck_range = ((i - remembered_from < 4) ||
3735  (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
3736  // Called 'probability' but it is only a rough estimate and can actually
3737  // be outside the 0-kSize range.
3738  int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3739  int points = (i - remembered_from) * probability;
3740  if (points > biggest_points) {
3741  *from = remembered_from;
3742  *to = i - 1;
3743  biggest_points = points;
3744  }
3745  }
3746  return biggest_points;
3747 }
3748 
3749 
3750 // Take all the characters that will not prevent a successful match if they
3751 // occur in the subject string in the range between min_lookahead and
3752 // max_lookahead (inclusive) measured from the current position. If the
3753 // character at max_lookahead offset is not one of these characters, then we
3754 // can safely skip forwards by the number of characters in the range.
3755 int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
3756  int max_lookahead,
3757  Handle<ByteArray> boolean_skip_table) {
3758  const int kSize = RegExpMacroAssembler::kTableSize;
3759 
3760  const int kSkipArrayEntry = 0;
3761  const int kDontSkipArrayEntry = 1;
3762 
3763  for (int i = 0; i < kSize; i++) {
3764  boolean_skip_table->set(i, kSkipArrayEntry);
3765  }
3766  int skip = max_lookahead + 1 - min_lookahead;
3767 
3768  for (int i = max_lookahead; i >= min_lookahead; i--) {
3769  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3770  for (int j = 0; j < kSize; j++) {
3771  if (map->at(j)) {
3772  boolean_skip_table->set(j, kDontSkipArrayEntry);
3773  }
3774  }
3775  }
3776 
3777  return skip;
3778 }
3779 
3780 
3781 // See comment above on the implementation of GetSkipTable.
3783  const int kSize = RegExpMacroAssembler::kTableSize;
3784 
3785  int min_lookahead = 0;
3786  int max_lookahead = 0;
3787 
3788  if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
3789 
3790  bool found_single_character = false;
3791  int single_character = 0;
3792  for (int i = max_lookahead; i >= min_lookahead; i--) {
3793  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3794  if (map->map_count() > 1 ||
3795  (found_single_character && map->map_count() != 0)) {
3796  found_single_character = false;
3797  break;
3798  }
3799  for (int j = 0; j < kSize; j++) {
3800  if (map->at(j)) {
3801  found_single_character = true;
3802  single_character = j;
3803  break;
3804  }
3805  }
3806  }
3807 
3808  int lookahead_width = max_lookahead + 1 - min_lookahead;
3809 
3810  if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3811  // The mask-compare can probably handle this better.
3812  return false;
3813  }
3814 
3815  if (found_single_character) {
3816  Label cont, again;
3817  masm->Bind(&again);
3818  masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3819  if (max_char_ > kSize) {
3820  masm->CheckCharacterAfterAnd(single_character,
3822  &cont);
3823  } else {
3824  masm->CheckCharacter(single_character, &cont);
3825  }
3826  masm->AdvanceCurrentPosition(lookahead_width);
3827  masm->GoTo(&again);
3828  masm->Bind(&cont);
3829  return true;
3830  }
3831 
3832  Factory* factory = masm->zone()->isolate()->factory();
3833  Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED);
3834  int skip_distance = GetSkipTable(
3835  min_lookahead, max_lookahead, boolean_skip_table);
3836  ASSERT(skip_distance != 0);
3837 
3838  Label cont, again;
3839  masm->Bind(&again);
3840  masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3841  masm->CheckBitInTable(boolean_skip_table, &cont);
3842  masm->AdvanceCurrentPosition(skip_distance);
3843  masm->GoTo(&again);
3844  masm->Bind(&cont);
3845 
3846  return true;
3847 }
3848 
3849 
3850 /* Code generation for choice nodes.
3851  *
3852  * We generate quick checks that do a mask and compare to eliminate a
3853  * choice. If the quick check succeeds then it jumps to the continuation to
3854  * do slow checks and check subsequent nodes. If it fails (the common case)
3855  * it falls through to the next choice.
3856  *
3857  * Here is the desired flow graph. Nodes directly below each other imply
3858  * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3859  * 3 doesn't have a quick check so we have to call the slow check.
3860  * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
3861  * regexp continuation is generated directly after the Sn node, up to the
3862  * next GoTo if we decide to reuse some already generated code. Some
3863  * nodes expect preload_characters to be preloaded into the current
3864  * character register. R nodes do this preloading. Vertices are marked
3865  * F for failures and S for success (possible success in the case of quick
3866  * nodes). L, V, < and > are used as arrow heads.
3867  *
3868  * ----------> R
3869  * |
3870  * V
3871  * Q1 -----> S1
3872  * | S /
3873  * F| /
3874  * | F/
3875  * | /
3876  * | R
3877  * | /
3878  * V L
3879  * Q2 -----> S2
3880  * | S /
3881  * F| /
3882  * | F/
3883  * | /
3884  * | R
3885  * | /
3886  * V L
3887  * S3
3888  * |
3889  * F|
3890  * |
3891  * R
3892  * |
3893  * backtrack V
3894  * <----------Q4
3895  * \ F |
3896  * \ |S
3897  * \ F V
3898  * \-----S4
3899  *
3900  * For greedy loops we reverse our expectation and expect to match rather
3901  * than fail. Therefore we want the loop code to look like this (U is the
3902  * unwind code that steps back in the greedy loop). The following alternatives
3903  * look the same as above.
3904  * _____
3905  * / \
3906  * V |
3907  * ----------> S1 |
3908  * /| |
3909  * / |S |
3910  * F/ \_____/
3911  * /
3912  * |<-----------
3913  * | \
3914  * V \
3915  * Q2 ---> S2 \
3916  * | S / |
3917  * F| / |
3918  * | F/ |
3919  * | / |
3920  * | R |
3921  * | / |
3922  * F VL |
3923  * <------U |
3924  * back |S |
3925  * \______________/
3926  */
3927 
3928 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3929  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3930  int choice_count = alternatives_->length();
3931 #ifdef DEBUG
3932  for (int i = 0; i < choice_count - 1; i++) {
3933  GuardedAlternative alternative = alternatives_->at(i);
3934  ZoneList<Guard*>* guards = alternative.guards();
3935  int guard_count = (guards == NULL) ? 0 : guards->length();
3936  for (int j = 0; j < guard_count; j++) {
3937  ASSERT(!trace->mentions_reg(guards->at(j)->reg()));
3938  }
3939  }
3940 #endif
3941 
3942  LimitResult limit_result = LimitVersions(compiler, trace);
3943  if (limit_result == DONE) return;
3944  ASSERT(limit_result == CONTINUE);
3945 
3946  int new_flush_budget = trace->flush_budget() / choice_count;
3947  if (trace->flush_budget() == 0 && trace->actions() != NULL) {
3948  trace->Flush(compiler, this);
3949  return;
3950  }
3951 
3952  RecursionCheck rc(compiler);
3953 
3954  Trace* current_trace = trace;
3955 
3956  int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3957  bool greedy_loop = false;
3958  Label greedy_loop_label;
3959  Trace counter_backtrack_trace;
3960  counter_backtrack_trace.set_backtrack(&greedy_loop_label);
3961  if (not_at_start()) counter_backtrack_trace.set_at_start(false);
3962 
3963  if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3964  // Here we have special handling for greedy loops containing only text nodes
3965  // and other simple nodes. These are handled by pushing the current
3966  // position on the stack and then incrementing the current position each
3967  // time around the switch. On backtrack we decrement the current position
3968  // and check it against the pushed value. This avoids pushing backtrack
3969  // information for each iteration of the loop, which could take up a lot of
3970  // space.
3971  greedy_loop = true;
3972  ASSERT(trace->stop_node() == NULL);
3973  macro_assembler->PushCurrentPosition();
3974  current_trace = &counter_backtrack_trace;
3975  Label greedy_match_failed;
3976  Trace greedy_match_trace;
3977  if (not_at_start()) greedy_match_trace.set_at_start(false);
3978  greedy_match_trace.set_backtrack(&greedy_match_failed);
3979  Label loop_label;
3980  macro_assembler->Bind(&loop_label);
3981  greedy_match_trace.set_stop_node(this);
3982  greedy_match_trace.set_loop_label(&loop_label);
3983  alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3984  macro_assembler->Bind(&greedy_match_failed);
3985  }
3986 
3987  Label second_choice; // For use in greedy matches.
3988  macro_assembler->Bind(&second_choice);
3989 
3990  int first_normal_choice = greedy_loop ? 1 : 0;
3991 
3992  bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
3993  const int kEatsAtLeastNotYetInitialized = -1;
3994  int eats_at_least = kEatsAtLeastNotYetInitialized;
3995 
3996  bool skip_was_emitted = false;
3997 
3998  if (!greedy_loop && choice_count == 2) {
3999  GuardedAlternative alt1 = alternatives_->at(1);
4000  if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
4001  RegExpNode* eats_anything_node = alt1.node();
4002  if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
4003  this) {
4004  // At this point we know that we are at a non-greedy loop that will eat
4005  // any character one at a time. Any non-anchored regexp has such a
4006  // loop prepended to it in order to find where it starts. We look for
4007  // a pattern of the form ...abc... where we can look 6 characters ahead
4008  // and step forwards 3 if the character is not one of abc. Abc need
4009  // not be atoms, they can be any reasonably limited character class or
4010  // small alternation.
4011  ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
4012  BoyerMooreLookahead* lookahead = bm_info(not_at_start);
4013  if (lookahead == NULL) {
4014  eats_at_least = Min(kMaxLookaheadForBoyerMoore,
4017  not_at_start));
4018  if (eats_at_least >= 1) {
4019  BoyerMooreLookahead* bm =
4020  new(zone()) BoyerMooreLookahead(eats_at_least,
4021  compiler,
4022  zone());
4023  GuardedAlternative alt0 = alternatives_->at(0);
4024  alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
4025  skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
4026  }
4027  } else {
4028  skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
4029  }
4030  }
4031  }
4032  }
4033 
4034  if (eats_at_least == kEatsAtLeastNotYetInitialized) {
4035  // Save some time by looking at most one machine word ahead.
4036  eats_at_least =
4037  EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
4038  }
4039  int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
4040 
4041  bool preload_is_current = !skip_was_emitted &&
4042  (current_trace->characters_preloaded() == preload_characters);
4043  bool preload_has_checked_bounds = preload_is_current;
4044 
4045  AlternativeGenerationList alt_gens(choice_count, zone());
4046 
4047  // For now we just call all choices one after the other. The idea ultimately
4048  // is to use the Dispatch table to try only the relevant ones.
4049  for (int i = first_normal_choice; i < choice_count; i++) {
4050  GuardedAlternative alternative = alternatives_->at(i);
4051  AlternativeGeneration* alt_gen = alt_gens.at(i);
4052  alt_gen->quick_check_details.set_characters(preload_characters);
4053  ZoneList<Guard*>* guards = alternative.guards();
4054  int guard_count = (guards == NULL) ? 0 : guards->length();
4055  Trace new_trace(*current_trace);
4056  new_trace.set_characters_preloaded(preload_is_current ?
4057  preload_characters :
4058  0);
4059  if (preload_has_checked_bounds) {
4060  new_trace.set_bound_checked_up_to(preload_characters);
4061  }
4062  new_trace.quick_check_performed()->Clear();
4063  if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
4064  alt_gen->expects_preload = preload_is_current;
4065  bool generate_full_check_inline = false;
4066  if (FLAG_regexp_optimization &&
4068  alternative.node()->EmitQuickCheck(compiler,
4069  &new_trace,
4070  preload_has_checked_bounds,
4071  &alt_gen->possible_success,
4072  &alt_gen->quick_check_details,
4073  i < choice_count - 1)) {
4074  // Quick check was generated for this choice.
4075  preload_is_current = true;
4076  preload_has_checked_bounds = true;
4077  // On the last choice in the ChoiceNode we generated the quick
4078  // check to fall through on possible success. So now we need to
4079  // generate the full check inline.
4080  if (i == choice_count - 1) {
4081  macro_assembler->Bind(&alt_gen->possible_success);
4082  new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4083  new_trace.set_characters_preloaded(preload_characters);
4084  new_trace.set_bound_checked_up_to(preload_characters);
4085  generate_full_check_inline = true;
4086  }
4087  } else if (alt_gen->quick_check_details.cannot_match()) {
4088  if (i == choice_count - 1 && !greedy_loop) {
4089  macro_assembler->GoTo(trace->backtrack());
4090  }
4091  continue;
4092  } else {
4093  // No quick check was generated. Put the full code here.
4094  // If this is not the first choice then there could be slow checks from
4095  // previous cases that go here when they fail. There's no reason to
4096  // insist that they preload characters since the slow check we are about
4097  // to generate probably can't use it.
4098  if (i != first_normal_choice) {
4099  alt_gen->expects_preload = false;
4100  new_trace.InvalidateCurrentCharacter();
4101  }
4102  if (i < choice_count - 1) {
4103  new_trace.set_backtrack(&alt_gen->after);
4104  }
4105  generate_full_check_inline = true;
4106  }
4107  if (generate_full_check_inline) {
4108  if (new_trace.actions() != NULL) {
4109  new_trace.set_flush_budget(new_flush_budget);
4110  }
4111  for (int j = 0; j < guard_count; j++) {
4112  GenerateGuard(macro_assembler, guards->at(j), &new_trace);
4113  }
4114  alternative.node()->Emit(compiler, &new_trace);
4115  preload_is_current = false;
4116  }
4117  macro_assembler->Bind(&alt_gen->after);
4118  }
4119  if (greedy_loop) {
4120  macro_assembler->Bind(&greedy_loop_label);
4121  // If we have unwound to the bottom then backtrack.
4122  macro_assembler->CheckGreedyLoop(trace->backtrack());
4123  // Otherwise try the second priority at an earlier position.
4124  macro_assembler->AdvanceCurrentPosition(-text_length);
4125  macro_assembler->GoTo(&second_choice);
4126  }
4127 
4128  // At this point we need to generate slow checks for the alternatives where
4129  // the quick check was inlined. We can recognize these because the associated
4130  // label was bound.
4131  for (int i = first_normal_choice; i < choice_count - 1; i++) {
4132  AlternativeGeneration* alt_gen = alt_gens.at(i);
4133  Trace new_trace(*current_trace);
4134  // If there are actions to be flushed we have to limit how many times
4135  // they are flushed. Take the budget of the parent trace and distribute
4136  // it fairly amongst the children.
4137  if (new_trace.actions() != NULL) {
4138  new_trace.set_flush_budget(new_flush_budget);
4139  }
4140  EmitOutOfLineContinuation(compiler,
4141  &new_trace,
4142  alternatives_->at(i),
4143  alt_gen,
4144  preload_characters,
4145  alt_gens.at(i + 1)->expects_preload);
4146  }
4147 }
4148 
4149 
4150 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
4151  Trace* trace,
4152  GuardedAlternative alternative,
4153  AlternativeGeneration* alt_gen,
4154  int preload_characters,
4155  bool next_expects_preload) {
4156  if (!alt_gen->possible_success.is_linked()) return;
4157 
4158  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
4159  macro_assembler->Bind(&alt_gen->possible_success);
4160  Trace out_of_line_trace(*trace);
4161  out_of_line_trace.set_characters_preloaded(preload_characters);
4162  out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4163  if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
4164  ZoneList<Guard*>* guards = alternative.guards();
4165  int guard_count = (guards == NULL) ? 0 : guards->length();
4166  if (next_expects_preload) {
4167  Label reload_current_char;
4168  out_of_line_trace.set_backtrack(&reload_current_char);
4169  for (int j = 0; j < guard_count; j++) {
4170  GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4171  }
4172  alternative.node()->Emit(compiler, &out_of_line_trace);
4173  macro_assembler->Bind(&reload_current_char);
4174  // Reload the current character, since the next quick check expects that.
4175  // We don't need to check bounds here because we only get into this
4176  // code through a quick check which already did the checked load.
4177  macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
4178  NULL,
4179  false,
4180  preload_characters);
4181  macro_assembler->GoTo(&(alt_gen->after));
4182  } else {
4183  out_of_line_trace.set_backtrack(&(alt_gen->after));
4184  for (int j = 0; j < guard_count; j++) {
4185  GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4186  }
4187  alternative.node()->Emit(compiler, &out_of_line_trace);
4188  }
4189 }
4190 
4191 
4192 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
4193  RegExpMacroAssembler* assembler = compiler->macro_assembler();
4194  LimitResult limit_result = LimitVersions(compiler, trace);
4195  if (limit_result == DONE) return;
4196  ASSERT(limit_result == CONTINUE);
4197 
4198  RecursionCheck rc(compiler);
4199 
4200  switch (action_type_) {
4201  case STORE_POSITION: {
4203  new_capture(data_.u_position_register.reg,
4204  data_.u_position_register.is_capture,
4205  trace);
4206  Trace new_trace = *trace;
4207  new_trace.add_action(&new_capture);
4208  on_success()->Emit(compiler, &new_trace);
4209  break;
4210  }
4211  case INCREMENT_REGISTER: {
4213  new_increment(data_.u_increment_register.reg);
4214  Trace new_trace = *trace;
4215  new_trace.add_action(&new_increment);
4216  on_success()->Emit(compiler, &new_trace);
4217  break;
4218  }
4219  case SET_REGISTER: {
4221  new_set(data_.u_store_register.reg, data_.u_store_register.value);
4222  Trace new_trace = *trace;
4223  new_trace.add_action(&new_set);
4224  on_success()->Emit(compiler, &new_trace);
4225  break;
4226  }
4227  case CLEAR_CAPTURES: {
4229  new_capture(Interval(data_.u_clear_captures.range_from,
4230  data_.u_clear_captures.range_to));
4231  Trace new_trace = *trace;
4232  new_trace.add_action(&new_capture);
4233  on_success()->Emit(compiler, &new_trace);
4234  break;
4235  }
4236  case BEGIN_SUBMATCH:
4237  if (!trace->is_trivial()) {
4238  trace->Flush(compiler, this);
4239  } else {
4240  assembler->WriteCurrentPositionToRegister(
4241  data_.u_submatch.current_position_register, 0);
4242  assembler->WriteStackPointerToRegister(
4243  data_.u_submatch.stack_pointer_register);
4244  on_success()->Emit(compiler, trace);
4245  }
4246  break;
4247  case EMPTY_MATCH_CHECK: {
4248  int start_pos_reg = data_.u_empty_match_check.start_register;
4249  int stored_pos = 0;
4250  int rep_reg = data_.u_empty_match_check.repetition_register;
4251  bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
4252  bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
4253  if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
4254  // If we know we haven't advanced and there is no minimum we
4255  // can just backtrack immediately.
4256  assembler->GoTo(trace->backtrack());
4257  } else if (know_dist && stored_pos < trace->cp_offset()) {
4258  // If we know we've advanced we can generate the continuation
4259  // immediately.
4260  on_success()->Emit(compiler, trace);
4261  } else if (!trace->is_trivial()) {
4262  trace->Flush(compiler, this);
4263  } else {
4264  Label skip_empty_check;
4265  // If we have a minimum number of repetitions we check the current
4266  // number first and skip the empty check if it's not enough.
4267  if (has_minimum) {
4268  int limit = data_.u_empty_match_check.repetition_limit;
4269  assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
4270  }
4271  // If the match is empty we bail out, otherwise we fall through
4272  // to the on-success continuation.
4273  assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
4274  trace->backtrack());
4275  assembler->Bind(&skip_empty_check);
4276  on_success()->Emit(compiler, trace);
4277  }
4278  break;
4279  }
4281  if (!trace->is_trivial()) {
4282  trace->Flush(compiler, this);
4283  return;
4284  }
4286  data_.u_submatch.current_position_register);
4287  assembler->ReadStackPointerFromRegister(
4288  data_.u_submatch.stack_pointer_register);
4289  int clear_register_count = data_.u_submatch.clear_register_count;
4290  if (clear_register_count == 0) {
4291  on_success()->Emit(compiler, trace);
4292  return;
4293  }
4294  int clear_registers_from = data_.u_submatch.clear_register_from;
4295  Label clear_registers_backtrack;
4296  Trace new_trace = *trace;
4297  new_trace.set_backtrack(&clear_registers_backtrack);
4298  on_success()->Emit(compiler, &new_trace);
4299 
4300  assembler->Bind(&clear_registers_backtrack);
4301  int clear_registers_to = clear_registers_from + clear_register_count - 1;
4302  assembler->ClearRegisters(clear_registers_from, clear_registers_to);
4303 
4304  ASSERT(trace->backtrack() == NULL);
4305  assembler->Backtrack();
4306  return;
4307  }
4308  default:
4309  UNREACHABLE();
4310  }
4311 }
4312 
4313 
4315  RegExpMacroAssembler* assembler = compiler->macro_assembler();
4316  if (!trace->is_trivial()) {
4317  trace->Flush(compiler, this);
4318  return;
4319  }
4320 
4321  LimitResult limit_result = LimitVersions(compiler, trace);
4322  if (limit_result == DONE) return;
4323  ASSERT(limit_result == CONTINUE);
4324 
4325  RecursionCheck rc(compiler);
4326 
4327  ASSERT_EQ(start_reg_ + 1, end_reg_);
4328  if (compiler->ignore_case()) {
4329  assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
4330  trace->backtrack());
4331  } else {
4332  assembler->CheckNotBackReference(start_reg_, trace->backtrack());
4333  }
4334  on_success()->Emit(compiler, trace);
4335 }
4336 
4337 
4338 // -------------------------------------------------------------------
4339 // Dot/dotty output
4340 
4341 
4342 #ifdef DEBUG
4343 
4344 
4345 class DotPrinter: public NodeVisitor {
4346  public:
4347  explicit DotPrinter(bool ignore_case)
4348  : ignore_case_(ignore_case),
4349  stream_(&alloc_) { }
4350  void PrintNode(const char* label, RegExpNode* node);
4351  void Visit(RegExpNode* node);
4352  void PrintAttributes(RegExpNode* from);
4353  StringStream* stream() { return &stream_; }
4354  void PrintOnFailure(RegExpNode* from, RegExpNode* to);
4355 #define DECLARE_VISIT(Type) \
4356  virtual void Visit##Type(Type##Node* that);
4358 #undef DECLARE_VISIT
4359  private:
4360  bool ignore_case_;
4361  HeapStringAllocator alloc_;
4362  StringStream stream_;
4363 };
4364 
4365 
4366 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
4367  stream()->Add("digraph G {\n graph [label=\"");
4368  for (int i = 0; label[i]; i++) {
4369  switch (label[i]) {
4370  case '\\':
4371  stream()->Add("\\\\");
4372  break;
4373  case '"':
4374  stream()->Add("\"");
4375  break;
4376  default:
4377  stream()->Put(label[i]);
4378  break;
4379  }
4380  }
4381  stream()->Add("\"];\n");
4382  Visit(node);
4383  stream()->Add("}\n");
4384  printf("%s", stream()->ToCString().get());
4385 }
4386 
4387 
4388 void DotPrinter::Visit(RegExpNode* node) {
4389  if (node->info()->visited) return;
4390  node->info()->visited = true;
4391  node->Accept(this);
4392 }
4393 
4394 
4395 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
4396  stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
4397  Visit(on_failure);
4398 }
4399 
4400 
4401 class TableEntryBodyPrinter {
4402  public:
4403  TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
4404  : stream_(stream), choice_(choice) { }
4405  void Call(uc16 from, DispatchTable::Entry entry) {
4406  OutSet* out_set = entry.out_set();
4407  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4408  if (out_set->Get(i)) {
4409  stream()->Add(" n%p:s%io%i -> n%p;\n",
4410  choice(),
4411  from,
4412  i,
4413  choice()->alternatives()->at(i).node());
4414  }
4415  }
4416  }
4417  private:
4418  StringStream* stream() { return stream_; }
4419  ChoiceNode* choice() { return choice_; }
4420  StringStream* stream_;
4421  ChoiceNode* choice_;
4422 };
4423 
4424 
4425 class TableEntryHeaderPrinter {
4426  public:
4427  explicit TableEntryHeaderPrinter(StringStream* stream)
4428  : first_(true), stream_(stream) { }
4429  void Call(uc16 from, DispatchTable::Entry entry) {
4430  if (first_) {
4431  first_ = false;
4432  } else {
4433  stream()->Add("|");
4434  }
4435  stream()->Add("{\\%k-\\%k|{", from, entry.to());
4436  OutSet* out_set = entry.out_set();
4437  int priority = 0;
4438  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4439  if (out_set->Get(i)) {
4440  if (priority > 0) stream()->Add("|");
4441  stream()->Add("<s%io%i> %i", from, i, priority);
4442  priority++;
4443  }
4444  }
4445  stream()->Add("}}");
4446  }
4447 
4448  private:
4449  bool first_;
4450  StringStream* stream() { return stream_; }
4451  StringStream* stream_;
4452 };
4453 
4454 
4455 class AttributePrinter {
4456  public:
4457  explicit AttributePrinter(DotPrinter* out)
4458  : out_(out), first_(true) { }
4459  void PrintSeparator() {
4460  if (first_) {
4461  first_ = false;
4462  } else {
4463  out_->stream()->Add("|");
4464  }
4465  }
4466  void PrintBit(const char* name, bool value) {
4467  if (!value) return;
4468  PrintSeparator();
4469  out_->stream()->Add("{%s}", name);
4470  }
4471  void PrintPositive(const char* name, int value) {
4472  if (value < 0) return;
4473  PrintSeparator();
4474  out_->stream()->Add("{%s|%x}", name, value);
4475  }
4476  private:
4477  DotPrinter* out_;
4478  bool first_;
4479 };
4480 
4481 
4482 void DotPrinter::PrintAttributes(RegExpNode* that) {
4483  stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
4484  "margin=0.1, fontsize=10, label=\"{",
4485  that);
4486  AttributePrinter printer(this);
4487  NodeInfo* info = that->info();
4488  printer.PrintBit("NI", info->follows_newline_interest);
4489  printer.PrintBit("WI", info->follows_word_interest);
4490  printer.PrintBit("SI", info->follows_start_interest);
4491  Label* label = that->label();
4492  if (label->is_bound())
4493  printer.PrintPositive("@", label->pos());
4494  stream()->Add("}\"];\n");
4495  stream()->Add(" a%p -> n%p [style=dashed, color=grey, "
4496  "arrowhead=none];\n", that, that);
4497 }
4498 
4499 
4500 static const bool kPrintDispatchTable = false;
4501 void DotPrinter::VisitChoice(ChoiceNode* that) {
4502  if (kPrintDispatchTable) {
4503  stream()->Add(" n%p [shape=Mrecord, label=\"", that);
4504  TableEntryHeaderPrinter header_printer(stream());
4505  that->GetTable(ignore_case_)->ForEach(&header_printer);
4506  stream()->Add("\"]\n", that);
4507  PrintAttributes(that);
4508  TableEntryBodyPrinter body_printer(stream(), that);
4509  that->GetTable(ignore_case_)->ForEach(&body_printer);
4510  } else {
4511  stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
4512  for (int i = 0; i < that->alternatives()->length(); i++) {
4513  GuardedAlternative alt = that->alternatives()->at(i);
4514  stream()->Add(" n%p -> n%p;\n", that, alt.node());
4515  }
4516  }
4517  for (int i = 0; i < that->alternatives()->length(); i++) {
4518  GuardedAlternative alt = that->alternatives()->at(i);
4519  alt.node()->Accept(this);
4520  }
4521 }
4522 
4523 
4524 void DotPrinter::VisitText(TextNode* that) {
4525  Zone* zone = that->zone();
4526  stream()->Add(" n%p [label=\"", that);
4527  for (int i = 0; i < that->elements()->length(); i++) {
4528  if (i > 0) stream()->Add(" ");
4529  TextElement elm = that->elements()->at(i);
4530  switch (elm.text_type()) {
4531  case TextElement::ATOM: {
4532  stream()->Add("'%w'", elm.atom()->data());
4533  break;
4534  }
4535  case TextElement::CHAR_CLASS: {
4536  RegExpCharacterClass* node = elm.char_class();
4537  stream()->Add("[");
4538  if (node->is_negated())
4539  stream()->Add("^");
4540  for (int j = 0; j < node->ranges(zone)->length(); j++) {
4541  CharacterRange range = node->ranges(zone)->at(j);
4542  stream()->Add("%k-%k", range.from(), range.to());
4543  }
4544  stream()->Add("]");
4545  break;
4546  }
4547  default:
4548  UNREACHABLE();
4549  }
4550  }
4551  stream()->Add("\", shape=box, peripheries=2];\n");
4552  PrintAttributes(that);
4553  stream()->Add(" n%p -> n%p;\n", that, that->on_success());
4554  Visit(that->on_success());
4555 }
4556 
4557 
4558 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
4559  stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
4560  that,
4561  that->start_register(),
4562  that->end_register());
4563  PrintAttributes(that);
4564  stream()->Add(" n%p -> n%p;\n", that, that->on_success());
4565  Visit(that->on_success());
4566 }
4567 
4568 
4569 void DotPrinter::VisitEnd(EndNode* that) {
4570  stream()->Add(" n%p [style=bold, shape=point];\n", that);
4571  PrintAttributes(that);
4572 }
4573 
4574 
4575 void DotPrinter::VisitAssertion(AssertionNode* that) {
4576  stream()->Add(" n%p [", that);
4577  switch (that->assertion_type()) {
4578  case AssertionNode::AT_END:
4579  stream()->Add("label=\"$\", shape=septagon");
4580  break;
4582  stream()->Add("label=\"^\", shape=septagon");
4583  break;
4585  stream()->Add("label=\"\\b\", shape=septagon");
4586  break;
4588  stream()->Add("label=\"\\B\", shape=septagon");
4589  break;
4591  stream()->Add("label=\"(?<=\\n)\", shape=septagon");
4592  break;
4593  }
4594  stream()->Add("];\n");
4595  PrintAttributes(that);
4596  RegExpNode* successor = that->on_success();
4597  stream()->Add(" n%p -> n%p;\n", that, successor);
4598  Visit(successor);
4599 }
4600 
4601 
4602 void DotPrinter::VisitAction(ActionNode* that) {
4603  stream()->Add(" n%p [", that);
4604  switch (that->action_type_) {
4606  stream()->Add("label=\"$%i:=%i\", shape=octagon",
4607  that->data_.u_store_register.reg,
4608  that->data_.u_store_register.value);
4609  break;
4611  stream()->Add("label=\"$%i++\", shape=octagon",
4612  that->data_.u_increment_register.reg);
4613  break;
4615  stream()->Add("label=\"$%i:=$pos\", shape=octagon",
4616  that->data_.u_position_register.reg);
4617  break;
4619  stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
4620  that->data_.u_submatch.current_position_register);
4621  break;
4623  stream()->Add("label=\"escape\", shape=septagon");
4624  break;
4626  stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
4627  that->data_.u_empty_match_check.start_register,
4628  that->data_.u_empty_match_check.repetition_register,
4629  that->data_.u_empty_match_check.repetition_limit);
4630  break;
4632  stream()->Add("label=\"clear $%i to $%i\", shape=septagon",
4633  that->data_.u_clear_captures.range_from,
4634  that->data_.u_clear_captures.range_to);
4635  break;
4636  }
4637  }
4638  stream()->Add("];\n");
4639  PrintAttributes(that);
4640  RegExpNode* successor = that->on_success();
4641  stream()->Add(" n%p -> n%p;\n", that, successor);
4642  Visit(successor);
4643 }
4644 
4645 
4646 class DispatchTableDumper {
4647  public:
4648  explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
4649  void Call(uc16 key, DispatchTable::Entry entry);
4650  StringStream* stream() { return stream_; }
4651  private:
4652  StringStream* stream_;
4653 };
4654 
4655 
4656 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
4657  stream()->Add("[%k-%k]: {", key, entry.to());
4658  OutSet* set = entry.out_set();
4659  bool first = true;
4660  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4661  if (set->Get(i)) {
4662  if (first) {
4663  first = false;
4664  } else {
4665  stream()->Add(", ");
4666  }
4667  stream()->Add("%i", i);
4668  }
4669  }
4670  stream()->Add("}\n");
4671 }
4672 
4673 
4674 void DispatchTable::Dump() {
4675  HeapStringAllocator alloc;
4676  StringStream stream(&alloc);
4677  DispatchTableDumper dumper(&stream);
4678  tree()->ForEach(&dumper);
4679  OS::PrintError("%s", stream.ToCString().get());
4680 }
4681 
4682 
4683 void RegExpEngine::DotPrint(const char* label,
4684  RegExpNode* node,
4685  bool ignore_case) {
4686  DotPrinter printer(ignore_case);
4687  printer.PrintNode(label, node);
4688 }
4689 
4690 
4691 #endif // DEBUG
4692 
4693 
4694 // -------------------------------------------------------------------
4695 // Tree to graph conversion
4696 
4697 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
4698  RegExpNode* on_success) {
4699  ZoneList<TextElement>* elms =
4700  new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
4701  elms->Add(TextElement::Atom(this), compiler->zone());
4702  return new(compiler->zone()) TextNode(elms, on_success);
4703 }
4704 
4705 
4706 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4707  RegExpNode* on_success) {
4708  return new(compiler->zone()) TextNode(elements(), on_success);
4709 }
4710 
4711 
4712 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4713  const int* special_class,
4714  int length) {
4715  length--; // Remove final 0x10000.
4716  ASSERT(special_class[length] == 0x10000);
4717  ASSERT(ranges->length() != 0);
4718  ASSERT(length != 0);
4719  ASSERT(special_class[0] != 0);
4720  if (ranges->length() != (length >> 1) + 1) {
4721  return false;
4722  }
4723  CharacterRange range = ranges->at(0);
4724  if (range.from() != 0) {
4725  return false;
4726  }
4727  for (int i = 0; i < length; i += 2) {
4728  if (special_class[i] != (range.to() + 1)) {
4729  return false;
4730  }
4731  range = ranges->at((i >> 1) + 1);
4732  if (special_class[i+1] != range.from()) {
4733  return false;
4734  }
4735  }
4736  if (range.to() != 0xffff) {
4737  return false;
4738  }
4739  return true;
4740 }
4741 
4742 
4743 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4744  const int* special_class,
4745  int length) {
4746  length--; // Remove final 0x10000.
4747  ASSERT(special_class[length] == 0x10000);
4748  if (ranges->length() * 2 != length) {
4749  return false;
4750  }
4751  for (int i = 0; i < length; i += 2) {
4752  CharacterRange range = ranges->at(i >> 1);
4753  if (range.from() != special_class[i] ||
4754  range.to() != special_class[i + 1] - 1) {
4755  return false;
4756  }
4757  }
4758  return true;
4759 }
4760 
4761 
4762 bool RegExpCharacterClass::is_standard(Zone* zone) {
4763  // TODO(lrn): Remove need for this function, by not throwing away information
4764  // along the way.
4765  if (is_negated_) {
4766  return false;
4767  }
4768  if (set_.is_standard()) {
4769  return true;
4770  }
4771  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4772  set_.set_standard_set_type('s');
4773  return true;
4774  }
4775  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4776  set_.set_standard_set_type('S');
4777  return true;
4778  }
4779  if (CompareInverseRanges(set_.ranges(zone),
4780  kLineTerminatorRanges,
4781  kLineTerminatorRangeCount)) {
4782  set_.set_standard_set_type('.');
4783  return true;
4784  }
4785  if (CompareRanges(set_.ranges(zone),
4786  kLineTerminatorRanges,
4787  kLineTerminatorRangeCount)) {
4788  set_.set_standard_set_type('n');
4789  return true;
4790  }
4791  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4792  set_.set_standard_set_type('w');
4793  return true;
4794  }
4795  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4796  set_.set_standard_set_type('W');
4797  return true;
4798  }
4799  return false;
4800 }
4801 
4802 
4803 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4804  RegExpNode* on_success) {
4805  return new(compiler->zone()) TextNode(this, on_success);
4806 }
4807 
4808 
4809 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
4810  RegExpNode* on_success) {
4811  ZoneList<RegExpTree*>* alternatives = this->alternatives();
4812  int length = alternatives->length();
4813  ChoiceNode* result =
4814  new(compiler->zone()) ChoiceNode(length, compiler->zone());
4815  for (int i = 0; i < length; i++) {
4816  GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
4817  on_success));
4818  result->AddAlternative(alternative);
4819  }
4820  return result;
4821 }
4822 
4823 
4824 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
4825  RegExpNode* on_success) {
4826  return ToNode(min(),
4827  max(),
4828  is_greedy(),
4829  body(),
4830  compiler,
4831  on_success);
4832 }
4833 
4834 
4835 // Scoped object to keep track of how much we unroll quantifier loops in the
4836 // regexp graph generator.
4838  public:
4839  static const int kMaxExpansionFactor = 6;
4841  : compiler_(compiler),
4842  saved_expansion_factor_(compiler->current_expansion_factor()),
4843  ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
4844  ASSERT(factor > 0);
4845  if (ok_to_expand_) {
4846  if (factor > kMaxExpansionFactor) {
4847  // Avoid integer overflow of the current expansion factor.
4848  ok_to_expand_ = false;
4850  } else {
4851  int new_factor = saved_expansion_factor_ * factor;
4852  ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
4853  compiler->set_current_expansion_factor(new_factor);
4854  }
4855  }
4856  }
4857 
4859  compiler_->set_current_expansion_factor(saved_expansion_factor_);
4860  }
4861 
4862  bool ok_to_expand() { return ok_to_expand_; }
4863 
4864  private:
4865  RegExpCompiler* compiler_;
4866  int saved_expansion_factor_;
4867  bool ok_to_expand_;
4868 
4869  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
4870 };
4871 
4872 
4873 RegExpNode* RegExpQuantifier::ToNode(int min,
4874  int max,
4875  bool is_greedy,
4876  RegExpTree* body,
4877  RegExpCompiler* compiler,
4878  RegExpNode* on_success,
4879  bool not_at_start) {
4880  // x{f, t} becomes this:
4881  //
4882  // (r++)<-.
4883  // | `
4884  // | (x)
4885  // v ^
4886  // (r=0)-->(?)---/ [if r < t]
4887  // |
4888  // [if r >= f] \----> ...
4889  //
4890 
4891  // 15.10.2.5 RepeatMatcher algorithm.
4892  // The parser has already eliminated the case where max is 0. In the case
4893  // where max_match is zero the parser has removed the quantifier if min was
4894  // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
4895 
4896  // If we know that we cannot match zero length then things are a little
4897  // simpler since we don't need to make the special zero length match check
4898  // from step 2.1. If the min and max are small we can unroll a little in
4899  // this case.
4900  static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
4901  static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
4902  if (max == 0) return on_success; // This can happen due to recursion.
4903  bool body_can_be_empty = (body->min_match() == 0);
4904  int body_start_reg = RegExpCompiler::kNoRegister;
4905  Interval capture_registers = body->CaptureRegisters();
4906  bool needs_capture_clearing = !capture_registers.is_empty();
4907  Zone* zone = compiler->zone();
4908 
4909  if (body_can_be_empty) {
4910  body_start_reg = compiler->AllocateRegister();
4911  } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
4912  // Only unroll if there are no captures and the body can't be
4913  // empty.
4914  {
4915  RegExpExpansionLimiter limiter(
4916  compiler, min + ((max != min) ? 1 : 0));
4917  if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4918  int new_max = (max == kInfinity) ? max : max - min;
4919  // Recurse once to get the loop or optional matches after the fixed
4920  // ones.
4921  RegExpNode* answer = ToNode(
4922  0, new_max, is_greedy, body, compiler, on_success, true);
4923  // Unroll the forced matches from 0 to min. This can cause chains of
4924  // TextNodes (which the parser does not generate). These should be
4925  // combined if it turns out they hinder good code generation.
4926  for (int i = 0; i < min; i++) {
4927  answer = body->ToNode(compiler, answer);
4928  }
4929  return answer;
4930  }
4931  }
4932  if (max <= kMaxUnrolledMaxMatches && min == 0) {
4933  ASSERT(max > 0); // Due to the 'if' above.
4934  RegExpExpansionLimiter limiter(compiler, max);
4935  if (limiter.ok_to_expand()) {
4936  // Unroll the optional matches up to max.
4937  RegExpNode* answer = on_success;
4938  for (int i = 0; i < max; i++) {
4939  ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
4940  if (is_greedy) {
4941  alternation->AddAlternative(
4942  GuardedAlternative(body->ToNode(compiler, answer)));
4943  alternation->AddAlternative(GuardedAlternative(on_success));
4944  } else {
4945  alternation->AddAlternative(GuardedAlternative(on_success));
4946  alternation->AddAlternative(
4947  GuardedAlternative(body->ToNode(compiler, answer)));
4948  }
4949  answer = alternation;
4950  if (not_at_start) alternation->set_not_at_start();
4951  }
4952  return answer;
4953  }
4954  }
4955  }
4956  bool has_min = min > 0;
4957  bool has_max = max < RegExpTree::kInfinity;
4958  bool needs_counter = has_min || has_max;
4959  int reg_ctr = needs_counter
4960  ? compiler->AllocateRegister()
4962  LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
4963  zone);
4964  if (not_at_start) center->set_not_at_start();
4965  RegExpNode* loop_return = needs_counter
4966  ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
4967  : static_cast<RegExpNode*>(center);
4968  if (body_can_be_empty) {
4969  // If the body can be empty we need to check if it was and then
4970  // backtrack.
4971  loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
4972  reg_ctr,
4973  min,
4974  loop_return);
4975  }
4976  RegExpNode* body_node = body->ToNode(compiler, loop_return);
4977  if (body_can_be_empty) {
4978  // If the body can be empty we need to store the start position
4979  // so we can bail out if it was empty.
4980  body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
4981  }
4982  if (needs_capture_clearing) {
4983  // Before entering the body of this loop we need to clear captures.
4984  body_node = ActionNode::ClearCaptures(capture_registers, body_node);
4985  }
4986  GuardedAlternative body_alt(body_node);
4987  if (has_max) {
4988  Guard* body_guard =
4989  new(zone) Guard(reg_ctr, Guard::LT, max);
4990  body_alt.AddGuard(body_guard, zone);
4991  }
4992  GuardedAlternative rest_alt(on_success);
4993  if (has_min) {
4994  Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
4995  rest_alt.AddGuard(rest_guard, zone);
4996  }
4997  if (is_greedy) {
4998  center->AddLoopAlternative(body_alt);
4999  center->AddContinueAlternative(rest_alt);
5000  } else {
5001  center->AddContinueAlternative(rest_alt);
5002  center->AddLoopAlternative(body_alt);
5003  }
5004  if (needs_counter) {
5005  return ActionNode::SetRegister(reg_ctr, 0, center);
5006  } else {
5007  return center;
5008  }
5009 }
5010 
5011 
5012 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
5013  RegExpNode* on_success) {
5014  NodeInfo info;
5015  Zone* zone = compiler->zone();
5016 
5017  switch (assertion_type()) {
5018  case START_OF_LINE:
5019  return AssertionNode::AfterNewline(on_success);
5020  case START_OF_INPUT:
5021  return AssertionNode::AtStart(on_success);
5022  case BOUNDARY:
5023  return AssertionNode::AtBoundary(on_success);
5024  case NON_BOUNDARY:
5025  return AssertionNode::AtNonBoundary(on_success);
5026  case END_OF_INPUT:
5027  return AssertionNode::AtEnd(on_success);
5028  case END_OF_LINE: {
5029  // Compile $ in multiline regexps as an alternation with a positive
5030  // lookahead in one side and an end-of-input on the other side.
5031  // We need two registers for the lookahead.
5032  int stack_pointer_register = compiler->AllocateRegister();
5033  int position_register = compiler->AllocateRegister();
5034  // The ChoiceNode to distinguish between a newline and end-of-input.
5035  ChoiceNode* result = new(zone) ChoiceNode(2, zone);
5036  // Create a newline atom.
5037  ZoneList<CharacterRange>* newline_ranges =
5038  new(zone) ZoneList<CharacterRange>(3, zone);
5039  CharacterRange::AddClassEscape('n', newline_ranges, zone);
5040  RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
5041  TextNode* newline_matcher = new(zone) TextNode(
5042  newline_atom,
5043  ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
5044  position_register,
5045  0, // No captures inside.
5046  -1, // Ignored if no captures.
5047  on_success));
5048  // Create an end-of-input matcher.
5049  RegExpNode* end_of_line = ActionNode::BeginSubmatch(
5050  stack_pointer_register,
5051  position_register,
5052  newline_matcher);
5053  // Add the two alternatives to the ChoiceNode.
5054  GuardedAlternative eol_alternative(end_of_line);
5055  result->AddAlternative(eol_alternative);
5056  GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
5057  result->AddAlternative(end_alternative);
5058  return result;
5059  }
5060  default:
5061  UNREACHABLE();
5062  }
5063  return on_success;
5064 }
5065 
5066 
5067 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
5068  RegExpNode* on_success) {
5069  return new(compiler->zone())
5070  BackReferenceNode(RegExpCapture::StartRegister(index()),
5071  RegExpCapture::EndRegister(index()),
5072  on_success);
5073 }
5074 
5075 
5076 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5077  RegExpNode* on_success) {
5078  return on_success;
5079 }
5080 
5081 
5082 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
5083  RegExpNode* on_success) {
5084  int stack_pointer_register = compiler->AllocateRegister();
5085  int position_register = compiler->AllocateRegister();
5086 
5087  const int registers_per_capture = 2;
5088  const int register_of_first_capture = 2;
5089  int register_count = capture_count_ * registers_per_capture;
5090  int register_start =
5091  register_of_first_capture + capture_from_ * registers_per_capture;
5092 
5093  RegExpNode* success;
5094  if (is_positive()) {
5095  RegExpNode* node = ActionNode::BeginSubmatch(
5096  stack_pointer_register,
5097  position_register,
5098  body()->ToNode(
5099  compiler,
5100  ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
5101  position_register,
5102  register_count,
5103  register_start,
5104  on_success)));
5105  return node;
5106  } else {
5107  // We use a ChoiceNode for a negative lookahead because it has most of
5108  // the characteristics we need. It has the body of the lookahead as its
5109  // first alternative and the expression after the lookahead of the second
5110  // alternative. If the first alternative succeeds then the
5111  // NegativeSubmatchSuccess will unwind the stack including everything the
5112  // choice node set up and backtrack. If the first alternative fails then
5113  // the second alternative is tried, which is exactly the desired result
5114  // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
5115  // ChoiceNode that knows to ignore the first exit when calculating quick
5116  // checks.
5117  Zone* zone = compiler->zone();
5118 
5119  GuardedAlternative body_alt(
5120  body()->ToNode(
5121  compiler,
5122  success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
5123  position_register,
5124  register_count,
5125  register_start,
5126  zone)));
5127  ChoiceNode* choice_node =
5128  new(zone) NegativeLookaheadChoiceNode(body_alt,
5129  GuardedAlternative(on_success),
5130  zone);
5131  return ActionNode::BeginSubmatch(stack_pointer_register,
5132  position_register,
5133  choice_node);
5134  }
5135 }
5136 
5137 
5138 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5139  RegExpNode* on_success) {
5140  return ToNode(body(), index(), compiler, on_success);
5141 }
5142 
5143 
5144 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
5145  int index,
5146  RegExpCompiler* compiler,
5147  RegExpNode* on_success) {
5148  int start_reg = RegExpCapture::StartRegister(index);
5149  int end_reg = RegExpCapture::EndRegister(index);
5150  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
5151  RegExpNode* body_node = body->ToNode(compiler, store_end);
5152  return ActionNode::StorePosition(start_reg, true, body_node);
5153 }
5154 
5155 
5156 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
5157  RegExpNode* on_success) {
5158  ZoneList<RegExpTree*>* children = nodes();
5159  RegExpNode* current = on_success;
5160  for (int i = children->length() - 1; i >= 0; i--) {
5161  current = children->at(i)->ToNode(compiler, current);
5162  }
5163  return current;
5164 }
5165 
5166 
5167 static void AddClass(const int* elmv,
5168  int elmc,
5169  ZoneList<CharacterRange>* ranges,
5170  Zone* zone) {
5171  elmc--;
5172  ASSERT(elmv[elmc] == 0x10000);
5173  for (int i = 0; i < elmc; i += 2) {
5174  ASSERT(elmv[i] < elmv[i + 1]);
5175  ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
5176  }
5177 }
5178 
5179 
5180 static void AddClassNegated(const int *elmv,
5181  int elmc,
5182  ZoneList<CharacterRange>* ranges,
5183  Zone* zone) {
5184  elmc--;
5185  ASSERT(elmv[elmc] == 0x10000);
5186  ASSERT(elmv[0] != 0x0000);
5187  ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
5188  uc16 last = 0x0000;
5189  for (int i = 0; i < elmc; i += 2) {
5190  ASSERT(last <= elmv[i] - 1);
5191  ASSERT(elmv[i] < elmv[i + 1]);
5192  ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
5193  last = elmv[i + 1];
5194  }
5195  ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);
5196 }
5197 
5198 
5200  ZoneList<CharacterRange>* ranges,
5201  Zone* zone) {
5202  switch (type) {
5203  case 's':
5204  AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5205  break;
5206  case 'S':
5207  AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5208  break;
5209  case 'w':
5210  AddClass(kWordRanges, kWordRangeCount, ranges, zone);
5211  break;
5212  case 'W':
5213  AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
5214  break;
5215  case 'd':
5216  AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
5217  break;
5218  case 'D':
5219  AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
5220  break;
5221  case '.':
5222  AddClassNegated(kLineTerminatorRanges,
5223  kLineTerminatorRangeCount,
5224  ranges,
5225  zone);
5226  break;
5227  // This is not a character range as defined by the spec but a
5228  // convenient shorthand for a character class that matches any
5229  // character.
5230  case '*':
5231  ranges->Add(CharacterRange::Everything(), zone);
5232  break;
5233  // This is the set of characters matched by the $ and ^ symbols
5234  // in multiline mode.
5235  case 'n':
5236  AddClass(kLineTerminatorRanges,
5237  kLineTerminatorRangeCount,
5238  ranges,
5239  zone);
5240  break;
5241  default:
5242  UNREACHABLE();
5243  }
5244 }
5245 
5246 
5248  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5249 }
5250 
5251 
5253  public:
5255  ZoneList<CharacterRange>** excluded,
5256  Zone* zone)
5257  : included_(included),
5258  excluded_(excluded),
5259  zone_(zone) { }
5260  void Call(uc16 from, DispatchTable::Entry entry);
5261 
5262  static const int kInBase = 0;
5263  static const int kInOverlay = 1;
5264 
5265  private:
5266  ZoneList<CharacterRange>** included_;
5267  ZoneList<CharacterRange>** excluded_;
5268  Zone* zone_;
5269 };
5270 
5271 
5273  if (!entry.out_set()->Get(kInBase)) return;
5274  ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
5275  ? included_
5276  : excluded_;
5277  if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);
5278  (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
5279 }
5280 
5281 
5283  Vector<const int> overlay,
5284  ZoneList<CharacterRange>** included,
5285  ZoneList<CharacterRange>** excluded,
5286  Zone* zone) {
5287  ASSERT_EQ(NULL, *included);
5288  ASSERT_EQ(NULL, *excluded);
5289  DispatchTable table(zone);
5290  for (int i = 0; i < base->length(); i++)
5291  table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
5292  for (int i = 0; i < overlay.length(); i += 2) {
5293  table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5295  }
5296  CharacterRangeSplitter callback(included, excluded, zone);
5297  table.ForEach(&callback);
5298 }
5299 
5300 
5302  bool is_ascii,
5303  Zone* zone) {
5304  Isolate* isolate = zone->isolate();
5305  uc16 bottom = from();
5306  uc16 top = to();
5307  if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
5308  if (bottom > String::kMaxOneByteCharCode) return;
5310  }
5312  if (top == bottom) {
5313  // If this is a singleton we just expand the one character.
5314  int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5315  for (int i = 0; i < length; i++) {
5316  uc32 chr = chars[i];
5317  if (chr != bottom) {
5318  ranges->Add(CharacterRange::Singleton(chars[i]), zone);
5319  }
5320  }
5321  } else {
5322  // If this is a range we expand the characters block by block,
5323  // expanding contiguous subranges (blocks) one at a time.
5324  // The approach is as follows. For a given start character we
5325  // look up the remainder of the block that contains it (represented
5326  // by the end point), for instance we find 'z' if the character
5327  // is 'c'. A block is characterized by the property
5328  // that all characters uncanonicalize in the same way, except that
5329  // each entry in the result is incremented by the distance from the first
5330  // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
5331  // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
5332  // Once we've found the end point we look up its uncanonicalization
5333  // and produce a range for each element. For instance for [c-f]
5334  // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
5335  // add a range if it is not already contained in the input, so [c-f]
5336  // will be skipped but [C-F] will be added. If this range is not
5337  // completely contained in a block we do this for all the blocks
5338  // covered by the range (handling characters that is not in a block
5339  // as a "singleton block").
5341  int pos = bottom;
5342  while (pos <= top) {
5343  int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
5344  uc16 block_end;
5345  if (length == 0) {
5346  block_end = pos;
5347  } else {
5348  ASSERT_EQ(1, length);
5349  block_end = range[0];
5350  }
5351  int end = (block_end > top) ? top : block_end;
5352  length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
5353  for (int i = 0; i < length; i++) {
5354  uc32 c = range[i];
5355  uc16 range_from = c - (block_end - pos);
5356  uc16 range_to = c - (block_end - end);
5357  if (!(bottom <= range_from && range_to <= top)) {
5358  ranges->Add(CharacterRange(range_from, range_to), zone);
5359  }
5360  }
5361  pos = end + 1;
5362  }
5363  }
5364 }
5365 
5366 
5368  ASSERT_NOT_NULL(ranges);
5369  int n = ranges->length();
5370  if (n <= 1) return true;
5371  int max = ranges->at(0).to();
5372  for (int i = 1; i < n; i++) {
5373  CharacterRange next_range = ranges->at(i);
5374  if (next_range.from() <= max + 1) return false;
5375  max = next_range.to();
5376  }
5377  return true;
5378 }
5379 
5380 
5381 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
5382  if (ranges_ == NULL) {
5383  ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
5384  CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);
5385  }
5386  return ranges_;
5387 }
5388 
5389 
5390 // Move a number of elements in a zonelist to another position
5391 // in the same list. Handles overlapping source and target areas.
5392 static void MoveRanges(ZoneList<CharacterRange>* list,
5393  int from,
5394  int to,
5395  int count) {
5396  // Ranges are potentially overlapping.
5397  if (from < to) {
5398  for (int i = count - 1; i >= 0; i--) {
5399  list->at(to + i) = list->at(from + i);
5400  }
5401  } else {
5402  for (int i = 0; i < count; i++) {
5403  list->at(to + i) = list->at(from + i);
5404  }
5405  }
5406 }
5407 
5408 
5409 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
5410  int count,
5411  CharacterRange insert) {
5412  // Inserts a range into list[0..count[, which must be sorted
5413  // by from value and non-overlapping and non-adjacent, using at most
5414  // list[0..count] for the result. Returns the number of resulting
5415  // canonicalized ranges. Inserting a range may collapse existing ranges into
5416  // fewer ranges, so the return value can be anything in the range 1..count+1.
5417  uc16 from = insert.from();
5418  uc16 to = insert.to();
5419  int start_pos = 0;
5420  int end_pos = count;
5421  for (int i = count - 1; i >= 0; i--) {
5422  CharacterRange current = list->at(i);
5423  if (current.from() > to + 1) {
5424  end_pos = i;
5425  } else if (current.to() + 1 < from) {
5426  start_pos = i + 1;
5427  break;
5428  }
5429  }
5430 
5431  // Inserted range overlaps, or is adjacent to, ranges at positions
5432  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
5433  // not affected by the insertion.
5434  // If start_pos == end_pos, the range must be inserted before start_pos.
5435  // if start_pos < end_pos, the entire range from start_pos to end_pos
5436  // must be merged with the insert range.
5437 
5438  if (start_pos == end_pos) {
5439  // Insert between existing ranges at position start_pos.
5440  if (start_pos < count) {
5441  MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
5442  }
5443  list->at(start_pos) = insert;
5444  return count + 1;
5445  }
5446  if (start_pos + 1 == end_pos) {
5447  // Replace single existing range at position start_pos.
5448  CharacterRange to_replace = list->at(start_pos);
5449  int new_from = Min(to_replace.from(), from);
5450  int new_to = Max(to_replace.to(), to);
5451  list->at(start_pos) = CharacterRange(new_from, new_to);
5452  return count;
5453  }
5454  // Replace a number of existing ranges from start_pos to end_pos - 1.
5455  // Move the remaining ranges down.
5456 
5457  int new_from = Min(list->at(start_pos).from(), from);
5458  int new_to = Max(list->at(end_pos - 1).to(), to);
5459  if (end_pos < count) {
5460  MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
5461  }
5462  list->at(start_pos) = CharacterRange(new_from, new_to);
5463  return count - (end_pos - start_pos) + 1;
5464 }
5465 
5466 
5468  // Special/default classes are always considered canonical. The result
5469  // of calling ranges() will be sorted.
5470  if (ranges_ == NULL) return;
5472 }
5473 
5474 
5476  if (character_ranges->length() <= 1) return;
5477  // Check whether ranges are already canonical (increasing, non-overlapping,
5478  // non-adjacent).
5479  int n = character_ranges->length();
5480  int max = character_ranges->at(0).to();
5481  int i = 1;
5482  while (i < n) {
5483  CharacterRange current = character_ranges->at(i);
5484  if (current.from() <= max + 1) {
5485  break;
5486  }
5487  max = current.to();
5488  i++;
5489  }
5490  // Canonical until the i'th range. If that's all of them, we are done.
5491  if (i == n) return;
5492 
5493  // The ranges at index i and forward are not canonicalized. Make them so by
5494  // doing the equivalent of insertion sort (inserting each into the previous
5495  // list, in order).
5496  // Notice that inserting a range can reduce the number of ranges in the
5497  // result due to combining of adjacent and overlapping ranges.
5498  int read = i; // Range to insert.
5499  int num_canonical = i; // Length of canonicalized part of list.
5500  do {
5501  num_canonical = InsertRangeInCanonicalList(character_ranges,
5502  num_canonical,
5503  character_ranges->at(read));
5504  read++;
5505  } while (read < n);
5506  character_ranges->Rewind(num_canonical);
5507 
5508  ASSERT(CharacterRange::IsCanonical(character_ranges));
5509 }
5510 
5511 
5513  ZoneList<CharacterRange>* negated_ranges,
5514  Zone* zone) {
5516  ASSERT_EQ(0, negated_ranges->length());
5517  int range_count = ranges->length();
5518  uc16 from = 0;
5519  int i = 0;
5520  if (range_count > 0 && ranges->at(0).from() == 0) {
5521  from = ranges->at(0).to();
5522  i = 1;
5523  }
5524  while (i < range_count) {
5525  CharacterRange range = ranges->at(i);
5526  negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
5527  from = range.to();
5528  i++;
5529  }
5530  if (from < String::kMaxUtf16CodeUnit) {
5531  negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
5532  zone);
5533  }
5534 }
5535 
5536 
5537 // -------------------------------------------------------------------
5538 // Splay tree
5539 
5540 
5541 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
5542  if (Get(value))
5543  return this;
5544  if (successors(zone) != NULL) {
5545  for (int i = 0; i < successors(zone)->length(); i++) {
5546  OutSet* successor = successors(zone)->at(i);
5547  if (successor->Get(value))
5548  return successor;
5549  }
5550  } else {
5551  successors_ = new(zone) ZoneList<OutSet*>(2, zone);
5552  }
5553  OutSet* result = new(zone) OutSet(first_, remaining_);
5554  result->Set(value, zone);
5555  successors(zone)->Add(result, zone);
5556  return result;
5557 }
5558 
5559 
5560 void OutSet::Set(unsigned value, Zone *zone) {
5561  if (value < kFirstLimit) {
5562  first_ |= (1 << value);
5563  } else {
5564  if (remaining_ == NULL)
5565  remaining_ = new(zone) ZoneList<unsigned>(1, zone);
5566  if (remaining_->is_empty() || !remaining_->Contains(value))
5567  remaining_->Add(value, zone);
5568  }
5569 }
5570 
5571 
5572 bool OutSet::Get(unsigned value) {
5573  if (value < kFirstLimit) {
5574  return (first_ & (1 << value)) != 0;
5575  } else if (remaining_ == NULL) {
5576  return false;
5577  } else {
5578  return remaining_->Contains(value);
5579  }
5580 }
5581 
5582 
5584 
5585 
5586 void DispatchTable::AddRange(CharacterRange full_range, int value,
5587  Zone* zone) {
5588  CharacterRange current = full_range;
5589  if (tree()->is_empty()) {
5590  // If this is the first range we just insert into the table.
5592  ASSERT_RESULT(tree()->Insert(current.from(), &loc));
5593  loc.set_value(Entry(current.from(), current.to(),
5594  empty()->Extend(value, zone)));
5595  return;
5596  }
5597  // First see if there is a range to the left of this one that
5598  // overlaps.
5600  if (tree()->FindGreatestLessThan(current.from(), &loc)) {
5601  Entry* entry = &loc.value();
5602  // If we've found a range that overlaps with this one, and it
5603  // starts strictly to the left of this one, we have to fix it
5604  // because the following code only handles ranges that start on
5605  // or after the start point of the range we're adding.
5606  if (entry->from() < current.from() && entry->to() >= current.from()) {
5607  // Snap the overlapping range in half around the start point of
5608  // the range we're adding.
5609  CharacterRange left(entry->from(), current.from() - 1);
5610  CharacterRange right(current.from(), entry->to());
5611  // The left part of the overlapping range doesn't overlap.
5612  // Truncate the whole entry to be just the left part.
5613  entry->set_to(left.to());
5614  // The right part is the one that overlaps. We add this part
5615  // to the map and let the next step deal with merging it with
5616  // the range we're adding.
5618  ASSERT_RESULT(tree()->Insert(right.from(), &loc));
5619  loc.set_value(Entry(right.from(),
5620  right.to(),
5621  entry->out_set()));
5622  }
5623  }
5624  while (current.is_valid()) {
5625  if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
5626  (loc.value().from() <= current.to()) &&
5627  (loc.value().to() >= current.from())) {
5628  Entry* entry = &loc.value();
5629  // We have overlap. If there is space between the start point of
5630  // the range we're adding and where the overlapping range starts
5631  // then we have to add a range covering just that space.
5632  if (current.from() < entry->from()) {
5634  ASSERT_RESULT(tree()->Insert(current.from(), &ins));
5635  ins.set_value(Entry(current.from(),
5636  entry->from() - 1,
5637  empty()->Extend(value, zone)));
5638  current.set_from(entry->from());
5639  }
5640  ASSERT_EQ(current.from(), entry->from());
5641  // If the overlapping range extends beyond the one we want to add
5642  // we have to snap the right part off and add it separately.
5643  if (entry->to() > current.to()) {
5645  ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
5646  ins.set_value(Entry(current.to() + 1,
5647  entry->to(),
5648  entry->out_set()));
5649  entry->set_to(current.to());
5650  }
5651  ASSERT(entry->to() <= current.to());
5652  // The overlapping range is now completely contained by the range
5653  // we're adding so we can just update it and move the start point
5654  // of the range we're adding just past it.
5655  entry->AddValue(value, zone);
5656  // Bail out if the last interval ended at 0xFFFF since otherwise
5657  // adding 1 will wrap around to 0.
5658  if (entry->to() == String::kMaxUtf16CodeUnit)
5659  break;
5660  ASSERT(entry->to() + 1 > current.from());
5661  current.set_from(entry->to() + 1);
5662  } else {
5663  // There is no overlap so we can just add the range
5665  ASSERT_RESULT(tree()->Insert(current.from(), &ins));
5666  ins.set_value(Entry(current.from(),
5667  current.to(),
5668  empty()->Extend(value, zone)));
5669  break;
5670  }
5671  }
5672 }
5673 
5674 
5677  if (!tree()->FindGreatestLessThan(value, &loc))
5678  return empty();
5679  Entry* entry = &loc.value();
5680  if (value <= entry->to())
5681  return entry->out_set();
5682  else
5683  return empty();
5684 }
5685 
5686 
5687 // -------------------------------------------------------------------
5688 // Analysis
5689 
5690 
5692  StackLimitCheck check(that->zone()->isolate());
5693  if (check.HasOverflowed()) {
5694  fail("Stack overflow");
5695  return;
5696  }
5697  if (that->info()->been_analyzed || that->info()->being_analyzed)
5698  return;
5699  that->info()->being_analyzed = true;
5700  that->Accept(this);
5701  that->info()->being_analyzed = false;
5702  that->info()->been_analyzed = true;
5703 }
5704 
5705 
5706 void Analysis::VisitEnd(EndNode* that) {
5707  // nothing to do
5708 }
5709 
5710 
5712  int element_count = elements()->length();
5713  // Set up the offsets of the elements relative to the start. This is a fixed
5714  // quantity since a TextNode can only contain fixed-width things.
5715  int cp_offset = 0;
5716  for (int i = 0; i < element_count; i++) {
5717  TextElement& elm = elements()->at(i);
5718  elm.set_cp_offset(cp_offset);
5719  cp_offset += elm.length();
5720  }
5721 }
5722 
5723 
5724 void Analysis::VisitText(TextNode* that) {
5725  if (ignore_case_) {
5726  that->MakeCaseIndependent(is_ascii_);
5727  }
5728  EnsureAnalyzed(that->on_success());
5729  if (!has_failed()) {
5730  that->CalculateOffsets();
5731  }
5732 }
5733 
5734 
5735 void Analysis::VisitAction(ActionNode* that) {
5736  RegExpNode* target = that->on_success();
5737  EnsureAnalyzed(target);
5738  if (!has_failed()) {
5739  // If the next node is interested in what it follows then this node
5740  // has to be interested too so it can pass the information on.
5741  that->info()->AddFromFollowing(target->info());
5742  }
5743 }
5744 
5745 
5746 void Analysis::VisitChoice(ChoiceNode* that) {
5747  NodeInfo* info = that->info();
5748  for (int i = 0; i < that->alternatives()->length(); i++) {
5749  RegExpNode* node = that->alternatives()->at(i).node();
5750  EnsureAnalyzed(node);
5751  if (has_failed()) return;
5752  // Anything the following nodes need to know has to be known by
5753  // this node also, so it can pass it on.
5754  info->AddFromFollowing(node->info());
5755  }
5756 }
5757 
5758 
5760  NodeInfo* info = that->info();
5761  for (int i = 0; i < that->alternatives()->length(); i++) {
5762  RegExpNode* node = that->alternatives()->at(i).node();
5763  if (node != that->loop_node()) {
5764  EnsureAnalyzed(node);
5765  if (has_failed()) return;
5766  info->AddFromFollowing(node->info());
5767  }
5768  }
5769  // Check the loop last since it may need the value of this node
5770  // to get a correct result.
5771  EnsureAnalyzed(that->loop_node());
5772  if (!has_failed()) {
5773  info->AddFromFollowing(that->loop_node()->info());
5774  }
5775 }
5776 
5777 
5778 void Analysis::VisitBackReference(BackReferenceNode* that) {
5779  EnsureAnalyzed(that->on_success());
5780 }
5781 
5782 
5783 void Analysis::VisitAssertion(AssertionNode* that) {
5784  EnsureAnalyzed(that->on_success());
5785 }
5786 
5787 
5789  int budget,
5790  BoyerMooreLookahead* bm,
5791  bool not_at_start) {
5792  // Working out the set of characters that a backreference can match is too
5793  // hard, so we just say that any character can match.
5794  bm->SetRest(offset);
5795  SaveBMInfo(bm, not_at_start, offset);
5796 }
5797 
5798 
5801 
5802 
5804  int budget,
5805  BoyerMooreLookahead* bm,
5806  bool not_at_start) {
5808  budget = (budget - 1) / alts->length();
5809  for (int i = 0; i < alts->length(); i++) {
5810  GuardedAlternative& alt = alts->at(i);
5811  if (alt.guards() != NULL && alt.guards()->length() != 0) {
5812  bm->SetRest(offset); // Give up trying to fill in info.
5813  SaveBMInfo(bm, not_at_start, offset);
5814  return;
5815  }
5816  alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
5817  }
5818  SaveBMInfo(bm, not_at_start, offset);
5819 }
5820 
5821 
5822 void TextNode::FillInBMInfo(int initial_offset,
5823  int budget,
5824  BoyerMooreLookahead* bm,
5825  bool not_at_start) {
5826  if (initial_offset >= bm->length()) return;
5827  int offset = initial_offset;
5828  int max_char = bm->max_char();
5829  for (int i = 0; i < elements()->length(); i++) {
5830  if (offset >= bm->length()) {
5831  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5832  return;
5833  }
5834  TextElement text = elements()->at(i);
5835  if (text.text_type() == TextElement::ATOM) {
5836  RegExpAtom* atom = text.atom();
5837  for (int j = 0; j < atom->length(); j++, offset++) {
5838  if (offset >= bm->length()) {
5839  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5840  return;
5841  }
5842  uc16 character = atom->data()[j];
5843  if (bm->compiler()->ignore_case()) {
5845  int length = GetCaseIndependentLetters(
5846  Isolate::Current(),
5847  character,
5849  chars);
5850  for (int j = 0; j < length; j++) {
5851  bm->Set(offset, chars[j]);
5852  }
5853  } else {
5854  if (character <= max_char) bm->Set(offset, character);
5855  }
5856  }
5857  } else {
5858  ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type());
5859  RegExpCharacterClass* char_class = text.char_class();
5860  ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
5861  if (char_class->is_negated()) {
5862  bm->SetAll(offset);
5863  } else {
5864  for (int k = 0; k < ranges->length(); k++) {
5865  CharacterRange& range = ranges->at(k);
5866  if (range.from() > max_char) continue;
5867  int to = Min(max_char, static_cast<int>(range.to()));
5868  bm->SetInterval(offset, Interval(range.from(), to));
5869  }
5870  }
5871  offset++;
5872  }
5873  }
5874  if (offset >= bm->length()) {
5875  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5876  return;
5877  }
5878  on_success()->FillInBMInfo(offset,
5879  budget - 1,
5880  bm,
5881  true); // Not at start after a text node.
5882  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5883 }
5884 
5885 
5886 // -------------------------------------------------------------------
5887 // Dispatch table construction
5888 
5889 
5890 void DispatchTableConstructor::VisitEnd(EndNode* that) {
5892 }
5893 
5894 
5896  node->set_being_calculated(true);
5897  ZoneList<GuardedAlternative>* alternatives = node->alternatives();
5898  for (int i = 0; i < alternatives->length(); i++) {
5899  set_choice_index(i);
5900  alternatives->at(i).node()->Accept(this);
5901  }
5902  node->set_being_calculated(false);
5903 }
5904 
5905 
5907  public:
5909  : constructor_(constructor) { }
5910  void Call(uc32 from, DispatchTable::Entry entry);
5911  private:
5912  DispatchTableConstructor* constructor_;
5913 };
5914 
5915 
5917  CharacterRange range(from, entry.to());
5918  constructor_->AddRange(range);
5919 }
5920 
5921 
5922 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
5923  if (node->being_calculated())
5924  return;
5926  AddDispatchRange adder(this);
5927  table->ForEach(&adder);
5928 }
5929 
5930 
5931 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
5932  // TODO(160): Find the node that we refer back to and propagate its start
5933  // set back to here. For now we just accept anything.
5935 }
5936 
5937 
5938 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
5939  RegExpNode* target = that->on_success();
5940  target->Accept(this);
5941 }
5942 
5943 
5944 static int CompareRangeByFrom(const CharacterRange* a,
5945  const CharacterRange* b) {
5946  return Compare<uc16>(a->from(), b->from());
5947 }
5948 
5949 
5951  ranges->Sort(CompareRangeByFrom);
5952  uc16 last = 0;
5953  for (int i = 0; i < ranges->length(); i++) {
5954  CharacterRange range = ranges->at(i);
5955  if (last < range.from())
5956  AddRange(CharacterRange(last, range.from() - 1));
5957  if (range.to() >= last) {
5958  if (range.to() == String::kMaxUtf16CodeUnit) {
5959  return;
5960  } else {
5961  last = range.to() + 1;
5962  }
5963  }
5964  }
5966 }
5967 
5968 
5969 void DispatchTableConstructor::VisitText(TextNode* that) {
5970  TextElement elm = that->elements()->at(0);
5971  switch (elm.text_type()) {
5972  case TextElement::ATOM: {
5973  uc16 c = elm.atom()->data()[0];
5974  AddRange(CharacterRange(c, c));
5975  break;
5976  }
5977  case TextElement::CHAR_CLASS: {
5978  RegExpCharacterClass* tree = elm.char_class();
5979  ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
5980  if (tree->is_negated()) {
5981  AddInverse(ranges);
5982  } else {
5983  for (int i = 0; i < ranges->length(); i++)
5984  AddRange(ranges->at(i));
5985  }
5986  break;
5987  }
5988  default: {
5989  UNIMPLEMENTED();
5990  }
5991  }
5992 }
5993 
5994 
5995 void DispatchTableConstructor::VisitAction(ActionNode* that) {
5996  RegExpNode* target = that->on_success();
5997  target->Accept(this);
5998 }
5999 
6000 
6002  RegExpCompileData* data,
6003  bool ignore_case,
6004  bool is_global,
6005  bool is_multiline,
6006  Handle<String> pattern,
6007  Handle<String> sample_subject,
6008  bool is_ascii,
6009  Zone* zone) {
6010  if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6011  return IrregexpRegExpTooBig(zone->isolate());
6012  }
6013  RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);
6014 
6015  // Sample some characters from the middle of the string.
6016  static const int kSampleSize = 128;
6017 
6018  FlattenString(sample_subject);
6019  int chars_sampled = 0;
6020  int half_way = (sample_subject->length() - kSampleSize) / 2;
6021  for (int i = Max(0, half_way);
6022  i < sample_subject->length() && chars_sampled < kSampleSize;
6023  i++, chars_sampled++) {
6024  compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
6025  }
6026 
6027  // Wrap the body of the regexp in capture #0.
6028  RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
6029  0,
6030  &compiler,
6031  compiler.accept());
6032  RegExpNode* node = captured_body;
6033  bool is_end_anchored = data->tree->IsAnchoredAtEnd();
6034  bool is_start_anchored = data->tree->IsAnchoredAtStart();
6035  int max_length = data->tree->max_match();
6036  if (!is_start_anchored) {
6037  // Add a .*? at the beginning, outside the body capture, unless
6038  // this expression is anchored at the beginning.
6039  RegExpNode* loop_node =
6040  RegExpQuantifier::ToNode(0,
6041  RegExpTree::kInfinity,
6042  false,
6043  new(zone) RegExpCharacterClass('*'),
6044  &compiler,
6045  captured_body,
6046  data->contains_anchor);
6047 
6048  if (data->contains_anchor) {
6049  // Unroll loop once, to take care of the case that might start
6050  // at the start of input.
6051  ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
6052  first_step_node->AddAlternative(GuardedAlternative(captured_body));
6053  first_step_node->AddAlternative(GuardedAlternative(
6054  new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
6055  node = first_step_node;
6056  } else {
6057  node = loop_node;
6058  }
6059  }
6060  if (is_ascii) {
6061  node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
6062  // Do it again to propagate the new nodes to places where they were not
6063  // put because they had not been calculated yet.
6064  if (node != NULL) {
6065  node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
6066  }
6067  }
6068 
6069  if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
6070  data->node = node;
6071  Analysis analysis(ignore_case, is_ascii);
6072  analysis.EnsureAnalyzed(node);
6073  if (analysis.has_failed()) {
6074  const char* error_message = analysis.error_message();
6075  return CompilationResult(zone->isolate(), error_message);
6076  }
6077 
6078  // Create the correct assembler for the architecture.
6079 #ifndef V8_INTERPRETED_REGEXP
6080  // Native regexp implementation.
6081 
6085 
6086 #if V8_TARGET_ARCH_IA32
6087  RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
6088  zone);
6089 #elif V8_TARGET_ARCH_X64
6090  RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,
6091  zone);
6092 #elif V8_TARGET_ARCH_ARM
6093  RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,
6094  zone);
6095 #elif V8_TARGET_ARCH_ARM64
6096  RegExpMacroAssemblerARM64 macro_assembler(mode, (data->capture_count + 1) * 2,
6097  zone);
6098 #elif V8_TARGET_ARCH_MIPS
6099  RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
6100  zone);
6101 #else
6102 #error "Unsupported architecture"
6103 #endif
6104 
6105 #else // V8_INTERPRETED_REGEXP
6106  // Interpreted regexp implementation.
6108  RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
6109 #endif // V8_INTERPRETED_REGEXP
6110 
6111  // Inserted here, instead of in Assembler, because it depends on information
6112  // in the AST that isn't replicated in the Node structure.
6113  static const int kMaxBacksearchLimit = 1024;
6114  if (is_end_anchored &&
6115  !is_start_anchored &&
6116  max_length < kMaxBacksearchLimit) {
6117  macro_assembler.SetCurrentPositionFromEnd(max_length);
6118  }
6119 
6120  if (is_global) {
6121  macro_assembler.set_global_mode(
6122  (data->tree->min_match() > 0)
6125  }
6126 
6127  return compiler.Assemble(&macro_assembler,
6128  node,
6129  data->capture_count,
6130  pattern);
6131 }
6132 
6133 
6134 }} // namespace v8::internal
void SetInterval(const Interval &interval)
Definition: jsregexp.cc:3629
AlternativeGenerationList(int count, Zone *zone)
Definition: jsregexp.cc:3576
virtual void WriteStackPointerToRegister(int reg)=0
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
Definition: flags.cc:269
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5675
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.h:648
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5803
static Handle< Object > New(Handle< JSFunction > func, int argc, Handle< Object > argv[], bool *pending_exception)
Definition: execution.cc:190
bool EmitCharacterFunction(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1731
Failure * StackOverflow()
Definition: isolate.cc:871
struct v8::internal::ActionNode::@17::@22 u_empty_match_check
void FlattenString(Handle< String > string)
Definition: handles.cc:151
RegExpNode * stop_node()
Definition: jsregexp.h:1455
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
Definition: jsregexp.cc:3476
static const int kIrregexpMaxRegisterCountIndex
Definition: objects.h:7954
double total_regexp_code_generated()
Definition: heap.h:1729
bool Contains(const T &elm) const
Definition: list-inl.h:196
#define DECLARE_VISIT(type)
Definition: full-codegen.h:629
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5247
#define ASSERT_RESULT(expr)
Definition: checks.h:328
virtual void GoTo(Label *label)=0
static AssertionNode * AtStart(RegExpNode *on_success)
Definition: jsregexp.h:909
const char * ToCString(const v8::String::Utf8Value &value)
void SetInterval(int map_number, const Interval &interval)
Definition: jsregexp.h:1299
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
Definition: jsregexp.cc:1497
RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler *assembler, RegExpNode *start, int capture_count, Handle< String > pattern)
Definition: jsregexp.cc:1115
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:511
void set(int index, Object *value)
Definition: objects-inl.h:2147
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths true
Definition: flags.cc:208
CompilationCache * compilation_cache()
Definition: isolate.h:867
CharacterRangeSplitter(ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5254
void PrintF(const char *format,...)
Definition: v8utils.cc:40
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf map
Definition: flags.cc:350
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5788
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1439
static String * cast(Object *obj)
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
Definition: jsregexp.cc:1537
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2356
CodeTracer * GetCodeTracer()
Definition: isolate.cc:2229
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3345
static const int kMaxUtf16CodeUnit
Definition: objects.h:8916
void set_characters(int characters)
Definition: jsregexp.h:562
unibrow::Mapping< unibrow::Ecma262UnCanonicalize > * jsregexp_uncanonicalize()
Definition: isolate.h:926
VisitMarker(NodeInfo *info)
Definition: jsregexp.cc:2767
const int kNumRegisters
Definition: constants-arm.h:57
Isolate * isolate()
Definition: heap-inl.h:624
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.h:627
static Smi * FromInt(int value)
Definition: objects-inl.h:1209
#define LOG(isolate, Call)
Definition: log.h:86
bool Contains(int value)
Definition: jsregexp.h:723
int to() const
Definition: jsregexp.h:728
void set_at_start(bool at_start)
Definition: jsregexp.h:1450
GlobalCache(Handle< JSRegExp > regexp, Handle< String > subject, bool is_global, Isolate *isolate)
Definition: jsregexp.cc:711
virtual void CheckNotBackReference(int start_reg, Label *on_no_match)=0
#define ASSERT_NOT_NULL(p)
Definition: checks.h:343
struct v8::internal::ActionNode::@17::@20 u_position_register
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
Definition: jsregexp.cc:2477
void set_loop_label(Label *label)
Definition: jsregexp.h:1474
static Handle< T > cast(Handle< S > that)
Definition: handles.h:75
static const int kJSRegexpStaticOffsetsVectorSize
Definition: isolate.h:997
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
Definition: jsregexp.cc:1563
T Max(T a, T b)
Definition: utils.h:227
virtual void ClearRegisters(int reg_from, int reg_to)=0
Position * positions(int index)
Definition: jsregexp.h:563
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.cc:3448
int32_t uc32
Definition: globals.h:310
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2261
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1466
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Definition: jsregexp.cc:69
void AddFromFollowing(NodeInfo *that)
Definition: jsregexp.h:507
kSerializedDataOffset Object
Definition: objects-inl.h:5016
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5586
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
Definition: jsregexp.cc:1508
uc16 to()
Definition: jsregexp.h:354
static ByteArray * cast(Object *obj)
static int IrregexpNumberOfCaptures(FixedArray *re)
Definition: jsregexp.cc:496
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long expose natives in global object expose freeBuffer extension expose gc extension under the specified name expose externalize string extension number of stack frames to capture disable builtin natives files print name of functions for which code is generated use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations always try to OSR functions trace optimize function deoptimization minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions trace debugging JSON request response trace out of bounds accesses to external arrays trace_js_array_abuse automatically set the debug break flag when debugger commands are in the queue abort by crashing maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print statistics of the maximum memory committed for the heap in only print modified registers Don t break for ASM_UNIMPLEMENTED_BREAK macros print stack trace when an illegal exception is thrown randomize hashes to avoid predictable hash Fixed seed to use to hash property Print the time it takes to deserialize the snapshot testing_bool_flag testing_int_flag string flag tmp file in which to serialize heap Print the time it takes to lazily compile hydrogen code stubs concurrent_recompilation concurrent_sweeping Print usage message
Definition: flags.cc:665
void set_to(uc16 value)
Definition: jsregexp.h:355
int int32_t
Definition: unicode.cc:47
static void EnsureSize(Handle< JSArray > array, int minimum_size_of_backing_fixed_array)
Definition: objects-inl.h:6610
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2779
virtual void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1)=0
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5475
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2959
Handle< FixedArray > LookupRegExp(Handle< String > source, JSRegExp::Flags flags)
RegExpNode * FilterSuccessor(int depth, bool ignore_case)
Definition: jsregexp.cc:2788
void EnsureAnalyzed(RegExpNode *node)
Definition: jsregexp.cc:5691
void set_flush_budget(int to)
Definition: jsregexp.h:1477
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2972
static CharacterRange Everything()
Definition: jsregexp.h:274
virtual bool IsAnchoredAtEnd()
Definition: ast.h:2509
RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:1099
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1099
void AddValue(int value, Zone *zone)
Definition: jsregexp.h:356
bool Get(unsigned value)
Definition: jsregexp.cc:5572
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2366
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
static uint16_t ConvertNonLatin1ToLatin1(uint16_t)
Definition: unicode-inl.h:83
#define ASSERT(condition)
Definition: checks.h:329
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
Definition: jsregexp.cc:1516
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2316
#define ASSERT_GE(v1, v2)
Definition: checks.h:332
void set_characters_preloaded(int count)
Definition: jsregexp.h:1475
const int kPatternTooShortForBoyerMoore
Definition: jsregexp.cc:142
unsigned short uint16_t
Definition: unicode.cc:46
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
Definition: jsregexp.cc:3404
const char * error_message()
Definition: jsregexp.h:1582
static const uint32_t kMaxOneByteCharCodeU
Definition: objects.h:8915
RegExpCompiler * compiler()
Definition: jsregexp.h:1285
void BuildTable(ChoiceNode *node)
Definition: jsregexp.cc:5895
virtual void ReadCurrentPositionFromRegister(int reg)=0
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
Definition: jsregexp.cc:3782
#define DEFINE_ACCEPT(Type)
Definition: jsregexp.cc:1576
ZoneList< TextElement > * elements()
Definition: jsregexp.h:863
Factory * factory()
Definition: isolate.h:995
void SetAll(int map_number)
Definition: jsregexp.h:1309
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2379
static Code * cast(Object *obj)
static void SetLastSubject(FixedArray *array, String *to)
Definition: jsregexp.h:206
Isolate * isolate()
Definition: zone.h:90
struct v8::internal::ActionNode::@17::@21 u_submatch
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2867
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
Definition: jsregexp.cc:5512
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:342
static Smi * cast(Object *object)
Handle< String > FlattenGetString(Handle< String > string)
Definition: handles.cc:156
static const int kRegExpExecutableMemoryLimit
Definition: jsregexp.h:235
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
Definition: jsregexp.cc:1548
static const int kNoRegister
Definition: jsregexp.cc:1064
virtual void AdvanceCurrentPosition(int by)=0
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:50
RegExpMacroAssembler * macro_assembler()
Definition: jsregexp.cc:1043
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5282
SmartArrayPointer< char > ToCString(AllowNullsFlag allow_nulls, RobustnessFlag robustness_flag, int offset, int length, int *length_output=0)
Definition: objects.cc:8272
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2936
static void SetLastInput(FixedArray *array, String *to)
Definition: jsregexp.h:210
static const int kRegWxpCompiledLimit
Definition: jsregexp.h:236
void Advance(int by, bool ascii)
Definition: jsregexp.cc:2715
void Merge(QuickCheckDetails *other, int from_index)
Definition: jsregexp.cc:2736
Label * loop_label()
Definition: jsregexp.h:1454
#define UNREACHABLE()
Definition: checks.h:52
RegExpExpansionLimiter(RegExpCompiler *compiler, int factor)
Definition: jsregexp.cc:4840
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject)
Definition: jsregexp.cc:529
void IncreaseTotalRegexpCodeGenerated(int size)
Definition: heap.h:1730
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long mode(MIPS only)") DEFINE_string(expose_natives_as
T * start() const
Definition: utils.h:426
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4314
static const int kRecursionBudget
Definition: jsregexp.h:637
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5822
static const unsigned kFirstLimit
Definition: jsregexp.h:320
void AddAlternative(GuardedAlternative node)
Definition: jsregexp.h:1075
STATIC_ASSERT(sizeof(CPURegister)==sizeof(Register))
static void SetLastCaptureCount(FixedArray *array, int to)
Definition: jsregexp.h:202
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long expose natives in global object expose freeBuffer extension expose gc extension under the specified name expose externalize string extension number of stack frames to capture disable builtin natives files print name of functions for which code is generated use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations always try to OSR functions trace optimize function deoptimization minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions trace debugging JSON request response trace out of bounds accesses to external arrays trace_js_array_abuse automatically set the debug break flag when debugger commands are in the queue abort by crashing maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print statistics of the maximum memory committed for the heap in only print modified registers Don t break for ASM_UNIMPLEMENTED_BREAK macros print stack trace when an illegal exception is thrown randomize hashes to avoid predictable hash Fixed seed to use to hash property Print the time it takes to deserialize the snapshot testing_bool_flag testing_int_flag string flag tmp file in which to serialize heap Print the time it takes to lazily compile hydrogen code stubs concurrent_recompilation concurrent_sweeping Print usage including flags
Definition: flags.cc:665
void fail(const char *error_message)
Definition: jsregexp.h:1586
AlternativeGeneration * at(int i)
Definition: jsregexp.cc:3592
static void MemCopy(void *dest, const void *src, size_t size)
Definition: platform.h:399
virtual void CheckCharacterGT(uc16 limit, Label *on_greater)=0
void Call(uc32 from, DispatchTable::Entry entry)
Definition: jsregexp.cc:5916
void set_bound_checked_up_to(int to)
Definition: jsregexp.h:1476
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2885
void set_backtrack(Label *backtrack)
Definition: jsregexp.h:1472
static const int kInfinity
Definition: ast.h:2502
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2986
virtual void Accept(NodeVisitor *visitor)
Definition: jsregexp.cc:1584
v8::Handle< v8::Object > bottom
Definition: test-api.cc:2280
MemoryAllocator * memory_allocator()
Definition: isolate.h:884
void check(i::Vector< const uint8_t > string)
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2390
void AddWork(RegExpNode *node)
Definition: jsregexp.cc:1037
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5367
QuickCheckDetails * quick_check_performed()
Definition: jsregexp.h:1459
static Handle< JSArray > SetLastMatchInfo(Handle< JSArray > last_match_info, Handle< String > subject, int capture_count, int32_t *match)
Definition: jsregexp.cc:688
void AddGuard(Guard *guard, Zone *zone)
Definition: jsregexp.cc:1490
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2329
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:690
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5950
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.h:1516
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5301
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4192
virtual void ReadStackPointerFromRegister(int reg)=0
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2305
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
Definition: jsregexp.cc:516
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, Label *on_no_match)=0
TriBool at_start()
Definition: jsregexp.h:1449
Label * backtrack()
Definition: jsregexp.h:1453
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:282
Handle< ByteArray > NewByteArray(int length, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:738
const Register pc
QuickCheckDetails quick_check_details
Definition: jsregexp.cc:3568
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:234
Vector< const uc16 > ToUC16Vector()
Definition: objects.h:8757
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2432
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:552
const int kMaxLookaheadForBoyerMoore
Definition: jsregexp.cc:139
Zone * zone() const
Definition: jsregexp.h:682
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
Definition: jsregexp.h:1231
int length() const
Definition: utils.h:420
#define ASSERT_LE(v1, v2)
Definition: checks.h:334
virtual void CheckNotAtStart(Label *on_not_at_start)=0
void add_action(DeferredAction *new_action)
Definition: jsregexp.h:1467
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3513
bool has_pending_exception()
Definition: isolate.h:587
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
Definition: jsregexp.cc:3662
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
Definition: jsregexp.h:664
static const int kIrregexpCaptureCountIndex
Definition: objects.h:7956
void AddRange(CharacterRange range)
Definition: jsregexp.h:1533
void set_being_calculated(bool b)
Definition: jsregexp.h:1098
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
Definition: jsregexp.cc:258
int characters_preloaded()
Definition: jsregexp.h:1456
DeferredAction * actions()
Definition: jsregexp.h:1429
static const int kNodeIsTooComplexForGreedyLoops
Definition: jsregexp.h:623
uc16 from()
Definition: jsregexp.h:353
void PutRegExp(Handle< String > source, JSRegExp::Flags flags, Handle< FixedArray > data)
Vector< const char > CStrVector(const char *data)
Definition: utils.h:574
DispatchTable * GetTable(bool ignore_case)
Definition: jsregexp.cc:962
static int IrregexpMaxRegisterCount(FixedArray *re)
Definition: jsregexp.cc:485
int EatsAtLeastHelper(int still_to_find, int budget, RegExpNode *ignore_this_node, bool not_at_start)
Definition: jsregexp.cc:2402
void CountCharacter(int character)
Definition: jsregexp.cc:980
RegExpNode * on_success()
Definition: jsregexp.h:741
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
Definition: jsregexp.cc:491
BoyerMooreLookahead * bm_info(bool not_at_start)
Definition: jsregexp.h:678
static int AtomExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:283
#define ASSERT_LT(v1, v2)
Definition: checks.h:333
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2813
static const int kLastMatchOverhead
Definition: jsregexp.h:185
struct v8::internal::ActionNode::@17::@19 u_increment_register
void set_from(uc16 value)
Definition: jsregexp.h:279
FlatContent GetFlatContent()
Definition: objects.cc:8229
int bound_checked_up_to()
Definition: jsregexp.h:1457
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:624
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2422
virtual bool CheckSpecialCharacterClass(uc16 type, Label *on_no_match)
static const int kMaxRecursion
Definition: jsregexp.cc:1046
void AddContinueAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3506
bool GetStoredPosition(int reg, int *cp_offset)
Definition: jsregexp.cc:1190
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2345
static AssertionNode * AfterNewline(RegExpNode *on_success)
Definition: jsregexp.h:918
int from() const
Definition: jsregexp.h:727
FrequencyCollator * frequency_collator()
Definition: jsregexp.cc:1055
void Sort(int(*cmp)(const T *x, const T *y))
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
Definition: jsregexp.h:915
bool is_null() const
Definition: handles.h:81
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset)=0
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function info
Definition: flags.cc:317
static const int kMaxWidth
Definition: unicode.h:268
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.cc:5759
static const int kAtomPatternIndex
Definition: objects.h:7932
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2553
virtual void Bind(Label *label)=0
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
int Count(int map_number)
Definition: jsregexp.h:1287
#define UNIMPLEMENTED()
Definition: checks.h:50
uint16_t uc16
Definition: globals.h:309
void MakeCaseIndependent(bool is_ascii)
Definition: jsregexp.cc:3423
virtual void CheckCharacterLT(uc16 limit, Label *on_less)=0
ZoneList< GuardedAlternative > * alternatives()
Definition: jsregexp.h:1078
virtual bool IsAnchoredAtStart()
Definition: ast.h:2508
static int code_index(bool is_ascii)
Definition: objects.h:7900
static void PrintError(const char *format,...)
static Handle< T > null()
Definition: handles.h:80
int SearchString(Isolate *isolate, Vector< const SubjectChar > subject, Vector< const PatternChar > pattern, int start_index)
void SetRest(int from_map)
Definition: jsregexp.h:1313
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
#define ASSERT_EQ(v1, v2)
Definition: checks.h:330
void ForEach(Callback *callback)
Definition: jsregexp.h:387
virtual void AppendToText(RegExpText *text, Zone *zone)
Definition: jsregexp.cc:918
OutSet * Extend(unsigned value, Zone *zone)
Definition: jsregexp.cc:5541
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
static const uchar kBadChar
Definition: unicode.h:162
void USE(T)
Definition: globals.h:341
struct v8::internal::ActionNode::@17::@18 u_store_register
StringCharacterStream *const stream_
ZoneList< GuardedAlternative > * alternatives_
Definition: jsregexp.h:1104
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
Definition: jsregexp.cc:114
virtual void IfRegisterEqPos(int reg, Label *if_eq)=0
virtual int GreedyLoopTextLength()
Definition: jsregexp.cc:3442
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:506
static FixedArray * cast(Object *obj)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3169
void set_current_expansion_factor(int value)
Definition: jsregexp.cc:1058
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
Definition: jsregexp.cc:1527
void Set(int map_number, int character)
Definition: jsregexp.h:1293
static int IrregexpNumberOfRegisters(FixedArray *re)
Definition: jsregexp.cc:501
virtual void CheckBitInTable(Handle< ByteArray > table, Label *on_bit_set)=0
unibrow::Mapping< unibrow::CanonicalizationRange > * jsregexp_canonrange()
Definition: isolate.h:930
static const int kCompilationErrorValue
Definition: objects.h:7983
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:39
Definition: jsregexp.h:348
bool Rationalize(bool ascii)
Definition: jsregexp.cc:2453
Object * get(int index)
Definition: objects-inl.h:2127
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
void AddLoopAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3499
virtual void Accept(NodeVisitor *visitor)=0
RegExpNode * loop_node()
Definition: jsregexp.h:1178
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags)
Definition: jsregexp.cc:171
ActionNode::ActionType action_type()
Definition: jsregexp.h:1364
ZoneList< Guard * > * guards()
Definition: jsregexp.h:1054
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3928
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:640
#define FOR_EACH_NODE_TYPE(VISIT)
Definition: jsregexp.h:401
Vector< const uint8_t > ToOneByteVector()
Definition: objects.h:8751
T Min(T a, T b)
Definition: utils.h:234
void Call(uc16 from, DispatchTable::Entry entry)
Definition: jsregexp.cc:5272
virtual int max_match()=0
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
Definition: jsregexp.cc:1378
static const int kUninitializedValue
Definition: objects.h:7979
void set_quick_check_performed(QuickCheckDetails *d)
Definition: jsregexp.h:1478
struct v8::internal::ActionNode::@17::@23 u_clear_captures
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long expose natives in global object expose freeBuffer extension expose gc extension under the specified name expose externalize string extension number of stack frames to capture disable builtin natives files print name of functions for which code is generated use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations always try to OSR functions trace optimize function deoptimization minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions trace debugging JSON request response trace out of bounds accesses to external arrays trace_js_array_abuse automatically set the debug break flag when debugger commands are in the queue abort by crashing maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print statistics of the maximum memory committed for the heap in name
Definition: flags.cc:505
const int kSampleSize
Definition: process.cc:616
void InvalidateCurrentCharacter()
Definition: jsregexp.cc:3399
#define ARRAY_SIZE(a)
Definition: globals.h:333
virtual void CheckNotCharacter(unsigned c, Label *on_not_equal)=0
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5199
OutSet * out_set()
Definition: jsregexp.h:359
static int saved_code_index(bool is_ascii)
Definition: objects.h:7908
static const int32_t kMaxOneByteCharCode
Definition: objects.h:8914
int Frequency(int in_character)
Definition: jsregexp.cc:988
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:638
RecursionCheck(RegExpCompiler *compiler)
Definition: jsregexp.cc:1083
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)=0
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:267
unsigned int uchar
Definition: unicode.h:40
static AssertionNode * AtEnd(RegExpNode *on_success)
Definition: jsregexp.h:906
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_not_equal)=0
virtual Handle< HeapObject > GetCode(Handle< String > source)=0
AddDispatchRange(DispatchTableConstructor *constructor)
Definition: jsregexp.cc:5908
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.cc:3154
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
void set_stop_node(RegExpNode *node)
Definition: jsregexp.h:1473
virtual void PushBacktrack(Label *label)=0
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:6001
static void SetCapture(FixedArray *array, int index, int to)
Definition: jsregexp.h:214
static AssertionNode * AtBoundary(RegExpNode *on_success)
Definition: jsregexp.h:912
bool mentions_reg(int reg)
Definition: jsregexp.cc:1179
const int kNoRegister
Definition: constants-arm.h:66