v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
jsregexp.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 
30 #include "ast.h"
31 #include "compiler.h"
32 #include "execution.h"
33 #include "factory.h"
34 #include "jsregexp.h"
35 #include "platform.h"
36 #include "string-search.h"
37 #include "runtime.h"
38 #include "compilation-cache.h"
39 #include "string-stream.h"
40 #include "parser.h"
41 #include "regexp-macro-assembler.h"
44 #include "regexp-stack.h"
45 
46 #ifndef V8_INTERPRETED_REGEXP
47 #if V8_TARGET_ARCH_IA32
49 #elif V8_TARGET_ARCH_X64
51 #elif V8_TARGET_ARCH_ARM
53 #elif V8_TARGET_ARCH_MIPS
55 #else
56 #error Unsupported target architecture.
57 #endif
58 #endif
59 
60 #include "interpreter-irregexp.h"
61 
62 
63 namespace v8 {
64 namespace internal {
65 
67  Handle<String> pattern,
69  bool* has_pending_exception) {
70  // Call the construct code with 2 arguments.
71  Handle<Object> argv[] = { pattern, flags };
72  return Execution::New(constructor, ARRAY_SIZE(argv), argv,
73  has_pending_exception);
74 }
75 
76 
77 static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
78  int flags = JSRegExp::NONE;
79  for (int i = 0; i < str->length(); i++) {
80  switch (str->Get(i)) {
81  case 'i':
82  flags |= JSRegExp::IGNORE_CASE;
83  break;
84  case 'g':
85  flags |= JSRegExp::GLOBAL;
86  break;
87  case 'm':
88  flags |= JSRegExp::MULTILINE;
89  break;
90  }
91  }
92  return JSRegExp::Flags(flags);
93 }
94 
95 
96 static inline void ThrowRegExpException(Handle<JSRegExp> re,
97  Handle<String> pattern,
98  Handle<String> error_text,
99  const char* message) {
100  Isolate* isolate = re->GetIsolate();
101  Factory* factory = isolate->factory();
102  Handle<FixedArray> elements = factory->NewFixedArray(2);
103  elements->set(0, *pattern);
104  elements->set(1, *error_text);
105  Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
106  Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
107  isolate->Throw(*regexp_err);
108 }
109 
110 
112  const int* ranges,
113  int ranges_length,
114  Interval new_range) {
115  ASSERT((ranges_length & 1) == 1);
116  ASSERT(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);
117  if (containment == kLatticeUnknown) return containment;
118  bool inside = false;
119  int last = 0;
120  for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
121  // Consider the range from last to ranges[i].
122  // We haven't got to the new range yet.
123  if (ranges[i] <= new_range.from()) continue;
124  // New range is wholly inside last-ranges[i]. Note that new_range.to() is
125  // inclusive, but the values in ranges are not.
126  if (last <= new_range.from() && new_range.to() < ranges[i]) {
127  return Combine(containment, inside ? kLatticeIn : kLatticeOut);
128  }
129  return kLatticeUnknown;
130  }
131  return containment;
132 }
133 
134 
135 // More makes code generation slower, less makes V8 benchmark score lower.
137 // In a 3-character pattern you can maximally step forwards 3 characters
138 // at a time, which is not always enough to pay for the extra logic.
140 
141 
142 // Identifies the sort of regexps where the regexp engine is faster
143 // than the code used for atom matches.
144 static bool HasFewDifferentCharacters(Handle<String> pattern) {
145  int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
146  if (length <= kPatternTooShortForBoyerMoore) return false;
147  const int kMod = 128;
148  bool character_found[kMod];
149  int different = 0;
150  memset(&character_found[0], 0, sizeof(character_found));
151  for (int i = 0; i < length; i++) {
152  int ch = (pattern->Get(i) & (kMod - 1));
153  if (!character_found[ch]) {
154  character_found[ch] = true;
155  different++;
156  // We declare a regexp low-alphabet if it has at least 3 times as many
157  // characters as it has different characters.
158  if (different * 3 > length) return false;
159  }
160  }
161  return true;
162 }
163 
164 
165 // Generic RegExp methods. Dispatches to implementation specific methods.
166 
167 
169  Handle<String> pattern,
170  Handle<String> flag_str) {
171  Isolate* isolate = re->GetIsolate();
172  JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
173  CompilationCache* compilation_cache = isolate->compilation_cache();
174  Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
175  bool in_cache = !cached.is_null();
176  LOG(isolate, RegExpCompileEvent(re, in_cache));
177 
178  Handle<Object> result;
179  if (in_cache) {
180  re->set_data(*cached);
181  return re;
182  }
183  pattern = FlattenGetString(pattern);
184  ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
185  PostponeInterruptsScope postpone(isolate);
186  RegExpCompileData parse_result;
187  FlatStringReader reader(isolate, pattern);
188  if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
189  &parse_result)) {
190  // Throw an exception if we fail to parse the pattern.
191  ThrowRegExpException(re,
192  pattern,
193  parse_result.error,
194  "malformed_regexp");
195  return Handle<Object>::null();
196  }
197 
198  bool has_been_compiled = false;
199 
200  if (parse_result.simple &&
201  !flags.is_ignore_case() &&
202  !HasFewDifferentCharacters(pattern)) {
203  // Parse-tree is a single atom that is equal to the pattern.
204  AtomCompile(re, pattern, flags, pattern);
205  has_been_compiled = true;
206  } else if (parse_result.tree->IsAtom() &&
207  !flags.is_ignore_case() &&
208  parse_result.capture_count == 0) {
209  RegExpAtom* atom = parse_result.tree->AsAtom();
210  Vector<const uc16> atom_pattern = atom->data();
211  Handle<String> atom_string =
212  isolate->factory()->NewStringFromTwoByte(atom_pattern);
213  if (!HasFewDifferentCharacters(atom_string)) {
214  AtomCompile(re, pattern, flags, atom_string);
215  has_been_compiled = true;
216  }
217  }
218  if (!has_been_compiled) {
219  IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
220  }
221  ASSERT(re->data()->IsFixedArray());
222  // Compilation succeeded so the data is set on the regexp
223  // and we can store it in the cache.
224  Handle<FixedArray> data(FixedArray::cast(re->data()));
225  compilation_cache->PutRegExp(pattern, flags, data);
226 
227  return re;
228 }
229 
230 
232  Handle<String> subject,
233  int index,
234  Handle<JSArray> last_match_info,
235  Zone* zone) {
236  switch (regexp->TypeTag()) {
237  case JSRegExp::ATOM:
238  return AtomExec(regexp, subject, index, last_match_info);
239  case JSRegExp::IRREGEXP: {
240  Handle<Object> result =
241  IrregexpExec(regexp, subject, index, last_match_info, zone);
242  ASSERT(!result.is_null() ||
243  regexp->GetIsolate()->has_pending_exception());
244  return result;
245  }
246  default:
247  UNREACHABLE();
248  return Handle<Object>::null();
249  }
250 }
251 
252 
253 // RegExp Atom implementation: Simple string search using indexOf.
254 
255 
257  Handle<String> pattern,
258  JSRegExp::Flags flags,
259  Handle<String> match_pattern) {
260  re->GetIsolate()->factory()->SetRegExpAtomData(re,
262  pattern,
263  flags,
264  match_pattern);
265 }
266 
267 
268 static void SetAtomLastCapture(FixedArray* array,
269  String* subject,
270  int from,
271  int to) {
272  NoHandleAllocation no_handles;
274  RegExpImpl::SetLastSubject(array, subject);
275  RegExpImpl::SetLastInput(array, subject);
276  RegExpImpl::SetCapture(array, 0, from);
277  RegExpImpl::SetCapture(array, 1, to);
278 }
279 
280 
282  Handle<String> subject,
283  int index,
284  Handle<JSArray> last_match_info) {
285  Isolate* isolate = re->GetIsolate();
286 
287  ASSERT(0 <= index);
288  ASSERT(index <= subject->length());
289 
290  if (!subject->IsFlat()) FlattenString(subject);
291  AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
292 
293  String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
294  int needle_len = needle->length();
295  ASSERT(needle->IsFlat());
296 
297  if (needle_len != 0) {
298  if (index + needle_len > subject->length()) {
299  return isolate->factory()->null_value();
300  }
301 
302  String::FlatContent needle_content = needle->GetFlatContent();
303  String::FlatContent subject_content = subject->GetFlatContent();
304  ASSERT(needle_content.IsFlat());
305  ASSERT(subject_content.IsFlat());
306  // dispatch on type of strings
307  index = (needle_content.IsAscii()
308  ? (subject_content.IsAscii()
309  ? SearchString(isolate,
310  subject_content.ToAsciiVector(),
311  needle_content.ToAsciiVector(),
312  index)
313  : SearchString(isolate,
314  subject_content.ToUC16Vector(),
315  needle_content.ToAsciiVector(),
316  index))
317  : (subject_content.IsAscii()
318  ? SearchString(isolate,
319  subject_content.ToAsciiVector(),
320  needle_content.ToUC16Vector(),
321  index)
322  : SearchString(isolate,
323  subject_content.ToUC16Vector(),
324  needle_content.ToUC16Vector(),
325  index)));
326  if (index == -1) return isolate->factory()->null_value();
327  }
328  ASSERT(last_match_info->HasFastObjectElements());
329 
330  {
331  NoHandleAllocation no_handles;
332  FixedArray* array = FixedArray::cast(last_match_info->elements());
333  SetAtomLastCapture(array, *subject, index, index + needle_len);
334  }
335  return last_match_info;
336 }
337 
338 
339 // Irregexp implementation.
340 
341 // Ensures that the regexp object contains a compiled version of the
342 // source for either ASCII or non-ASCII strings.
343 // If the compiled version doesn't already exist, it is compiled
344 // from the source pattern.
345 // If compilation fails, an exception is thrown and this function
346 // returns false.
347 bool RegExpImpl::EnsureCompiledIrregexp(
348  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii,
349  Zone* zone) {
350  Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
351 #ifdef V8_INTERPRETED_REGEXP
352  if (compiled_code->IsByteArray()) return true;
353 #else // V8_INTERPRETED_REGEXP (RegExp native code)
354  if (compiled_code->IsCode()) return true;
355 #endif
356  // We could potentially have marked this as flushable, but have kept
357  // a saved version if we did not flush it yet.
358  Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
359  if (saved_code->IsCode()) {
360  // Reinstate the code in the original place.
361  re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
362  ASSERT(compiled_code->IsSmi());
363  return true;
364  }
365  return CompileIrregexp(re, sample_subject, is_ascii, zone);
366 }
367 
368 
369 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
370  bool is_ascii,
371  Handle<String> error_message,
372  Isolate* isolate) {
373  Factory* factory = isolate->factory();
374  Handle<FixedArray> elements = factory->NewFixedArray(2);
375  elements->set(0, re->Pattern());
376  elements->set(1, *error_message);
377  Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
378  Handle<Object> regexp_err =
379  factory->NewSyntaxError("malformed_regexp", array);
380  isolate->Throw(*regexp_err);
381  return false;
382 }
383 
384 
385 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
386  Handle<String> sample_subject,
387  bool is_ascii,
388  Zone* zone) {
389  // Compile the RegExp.
390  Isolate* isolate = re->GetIsolate();
391  ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
392  PostponeInterruptsScope postpone(isolate);
393  // If we had a compilation error the last time this is saved at the
394  // saved code index.
395  Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
396  // When arriving here entry can only be a smi, either representing an
397  // uncompiled regexp, a previous compilation error, or code that has
398  // been flushed.
399  ASSERT(entry->IsSmi());
400  int entry_value = Smi::cast(entry)->value();
401  ASSERT(entry_value == JSRegExp::kUninitializedValue ||
402  entry_value == JSRegExp::kCompilationErrorValue ||
403  (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
404 
405  if (entry_value == JSRegExp::kCompilationErrorValue) {
406  // A previous compilation failed and threw an error which we store in
407  // the saved code index (we store the error message, not the actual
408  // error). Recreate the error object and throw it.
409  Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
410  ASSERT(error_string->IsString());
411  Handle<String> error_message(String::cast(error_string));
412  CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
413  return false;
414  }
415 
416  JSRegExp::Flags flags = re->GetFlags();
417 
418  Handle<String> pattern(re->Pattern());
419  if (!pattern->IsFlat()) FlattenString(pattern);
420  RegExpCompileData compile_data;
421  FlatStringReader reader(isolate, pattern);
422  if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
423  &compile_data)) {
424  // Throw an exception if we fail to parse the pattern.
425  // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
426  ThrowRegExpException(re,
427  pattern,
428  compile_data.error,
429  "malformed_regexp");
430  return false;
431  }
432  RegExpEngine::CompilationResult result =
433  RegExpEngine::Compile(&compile_data,
434  flags.is_ignore_case(),
435  flags.is_global(),
436  flags.is_multiline(),
437  pattern,
438  sample_subject,
439  is_ascii,
440  zone);
441  if (result.error_message != NULL) {
442  // Unable to compile regexp.
443  Handle<String> error_message =
444  isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
445  CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
446  return false;
447  }
448 
449  Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
450  data->set(JSRegExp::code_index(is_ascii), result.code);
451  int register_max = IrregexpMaxRegisterCount(*data);
452  if (result.num_registers > register_max) {
453  SetIrregexpMaxRegisterCount(*data, result.num_registers);
454  }
455 
456  return true;
457 }
458 
459 
461  return Smi::cast(
463 }
464 
465 
468 }
469 
470 
472  return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
473 }
474 
475 
478 }
479 
480 
482  return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
483 }
484 
485 
487  return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
488 }
489 
490 
492  Handle<String> pattern,
493  JSRegExp::Flags flags,
494  int capture_count) {
495  // Initialize compiled code entries to null.
496  re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
498  pattern,
499  flags,
500  capture_count);
501 }
502 
503 
505  Handle<String> subject,
506  Zone* zone) {
507  if (!subject->IsFlat()) FlattenString(subject);
508 
509  // Check the asciiness of the underlying storage.
510  bool is_ascii = subject->IsAsciiRepresentationUnderneath();
511  if (!EnsureCompiledIrregexp(regexp, subject, is_ascii, zone)) return -1;
512 
513 #ifdef V8_INTERPRETED_REGEXP
514  // Byte-code regexp needs space allocated for all its registers.
515  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
516 #else // V8_INTERPRETED_REGEXP
517  // Native regexp only needs room to output captures. Registers are handled
518  // internally.
519  return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
520 #endif // V8_INTERPRETED_REGEXP
521 }
522 
523 
525  int registers_per_match,
526  int* max_matches) {
527 #ifdef V8_INTERPRETED_REGEXP
528  // Global loop in interpreted regexp is not implemented. Therefore we choose
529  // the size of the offsets vector so that it can only store one match.
530  *max_matches = 1;
531  return registers_per_match;
532 #else // V8_INTERPRETED_REGEXP
533  int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
534  *max_matches = size / registers_per_match;
535  return size;
536 #endif // V8_INTERPRETED_REGEXP
537 }
538 
539 
541  Handle<JSRegExp> regexp,
542  Handle<String> subject,
543  int index,
544  Vector<int> output,
545  Zone* zone) {
546  Isolate* isolate = regexp->GetIsolate();
547 
548  Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
549 
550  ASSERT(index >= 0);
551  ASSERT(index <= subject->length());
552  ASSERT(subject->IsFlat());
553 
554  bool is_ascii = subject->IsAsciiRepresentationUnderneath();
555 
556 #ifndef V8_INTERPRETED_REGEXP
557  ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
558  do {
559  EnsureCompiledIrregexp(regexp, subject, is_ascii, zone);
560  Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
563  subject,
564  output.start(),
565  output.length(),
566  index,
567  isolate);
570  isolate->has_pending_exception());
572  static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
574  static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
576  == RE_EXCEPTION);
577  return static_cast<IrregexpResult>(res);
578  }
579  // If result is RETRY, the string has changed representation, and we
580  // must restart from scratch.
581  // In this case, it means we must make sure we are prepared to handle
582  // the, potentially, different subject (the string can switch between
583  // being internal and external, and even between being ASCII and UC16,
584  // but the characters are always the same).
585  IrregexpPrepare(regexp, subject, zone);
586  is_ascii = subject->IsAsciiRepresentationUnderneath();
587  } while (true);
588  UNREACHABLE();
589  return RE_EXCEPTION;
590 #else // V8_INTERPRETED_REGEXP
591 
592  ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
593  // We must have done EnsureCompiledIrregexp, so we can get the number of
594  // registers.
595  int* register_vector = output.start();
596  int number_of_capture_registers =
597  (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
598  for (int i = number_of_capture_registers - 1; i >= 0; i--) {
599  register_vector[i] = -1;
600  }
601  Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
602 
604  byte_codes,
605  subject,
606  register_vector,
607  index);
608  if (result == RE_EXCEPTION) {
609  ASSERT(!isolate->has_pending_exception());
610  isolate->StackOverflow();
611  }
612  return result;
613 #endif // V8_INTERPRETED_REGEXP
614 }
615 
616 
618  Handle<String> subject,
619  int previous_index,
620  Handle<JSArray> last_match_info,
621  Zone* zone) {
622  Isolate* isolate = jsregexp->GetIsolate();
623  ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
624 
625  // Prepare space for the return values.
626 #ifdef V8_INTERPRETED_REGEXP
627 #ifdef DEBUG
628  if (FLAG_trace_regexp_bytecodes) {
629  String* pattern = jsregexp->Pattern();
630  PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
631  PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
632  }
633 #endif
634 #endif
635  int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject, zone);
636  if (required_registers < 0) {
637  // Compiling failed with an exception.
638  ASSERT(isolate->has_pending_exception());
639  return Handle<Object>::null();
640  }
641 
642  OffsetsVector registers(required_registers, isolate);
643 
644  int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index,
645  Vector<int>(registers.vector(),
646  registers.length()),
647  zone);
648  if (res == RE_SUCCESS) {
649  int capture_register_count =
650  (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
651  last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
652  AssertNoAllocation no_gc;
653  int* register_vector = registers.vector();
654  FixedArray* array = FixedArray::cast(last_match_info->elements());
655  for (int i = 0; i < capture_register_count; i += 2) {
656  SetCapture(array, i, register_vector[i]);
657  SetCapture(array, i + 1, register_vector[i + 1]);
658  }
659  SetLastCaptureCount(array, capture_register_count);
660  SetLastSubject(array, *subject);
661  SetLastInput(array, *subject);
662  return last_match_info;
663  }
664  if (res == RE_EXCEPTION) {
665  ASSERT(isolate->has_pending_exception());
666  return Handle<Object>::null();
667  }
668  ASSERT(res == RE_FAILURE);
669  return isolate->factory()->null_value();
670 }
671 
672 
673 // -------------------------------------------------------------------
674 // Implementation of the Irregexp regular expression engine.
675 //
676 // The Irregexp regular expression engine is intended to be a complete
677 // implementation of ECMAScript regular expressions. It generates either
678 // bytecodes or native code.
679 
680 // The Irregexp regexp engine is structured in three steps.
681 // 1) The parser generates an abstract syntax tree. See ast.cc.
682 // 2) From the AST a node network is created. The nodes are all
683 // subclasses of RegExpNode. The nodes represent states when
684 // executing a regular expression. Several optimizations are
685 // performed on the node network.
686 // 3) From the nodes we generate either byte codes or native code
687 // that can actually execute the regular expression (perform
688 // the search). The code generation step is described in more
689 // detail below.
690 
691 // Code generation.
692 //
693 // The nodes are divided into four main categories.
694 // * Choice nodes
695 // These represent places where the regular expression can
696 // match in more than one way. For example on entry to an
697 // alternation (foo|bar) or a repetition (*, +, ? or {}).
698 // * Action nodes
699 // These represent places where some action should be
700 // performed. Examples include recording the current position
701 // in the input string to a register (in order to implement
702 // captures) or other actions on register for example in order
703 // to implement the counters needed for {} repetitions.
704 // * Matching nodes
705 // These attempt to match some element part of the input string.
706 // Examples of elements include character classes, plain strings
707 // or back references.
708 // * End nodes
709 // These are used to implement the actions required on finding
710 // a successful match or failing to find a match.
711 //
712 // The code generated (whether as byte codes or native code) maintains
713 // some state as it runs. This consists of the following elements:
714 //
715 // * The capture registers. Used for string captures.
716 // * Other registers. Used for counters etc.
717 // * The current position.
718 // * The stack of backtracking information. Used when a matching node
719 // fails to find a match and needs to try an alternative.
720 //
721 // Conceptual regular expression execution model:
722 //
723 // There is a simple conceptual model of regular expression execution
724 // which will be presented first. The actual code generated is a more
725 // efficient simulation of the simple conceptual model:
726 //
727 // * Choice nodes are implemented as follows:
728 // For each choice except the last {
729 // push current position
730 // push backtrack code location
731 // <generate code to test for choice>
732 // backtrack code location:
733 // pop current position
734 // }
735 // <generate code to test for last choice>
736 //
737 // * Actions nodes are generated as follows
738 // <push affected registers on backtrack stack>
739 // <generate code to perform action>
740 // push backtrack code location
741 // <generate code to test for following nodes>
742 // backtrack code location:
743 // <pop affected registers to restore their state>
744 // <pop backtrack location from stack and go to it>
745 //
746 // * Matching nodes are generated as follows:
747 // if input string matches at current position
748 // update current position
749 // <generate code to test for following nodes>
750 // else
751 // <pop backtrack location from stack and go to it>
752 //
753 // Thus it can be seen that the current position is saved and restored
754 // by the choice nodes, whereas the registers are saved and restored by
755 // by the action nodes that manipulate them.
756 //
757 // The other interesting aspect of this model is that nodes are generated
758 // at the point where they are needed by a recursive call to Emit(). If
759 // the node has already been code generated then the Emit() call will
760 // generate a jump to the previously generated code instead. In order to
761 // limit recursion it is possible for the Emit() function to put the node
762 // on a work list for later generation and instead generate a jump. The
763 // destination of the jump is resolved later when the code is generated.
764 //
765 // Actual regular expression code generation.
766 //
767 // Code generation is actually more complicated than the above. In order
768 // to improve the efficiency of the generated code some optimizations are
769 // performed
770 //
771 // * Choice nodes have 1-character lookahead.
772 // A choice node looks at the following character and eliminates some of
773 // the choices immediately based on that character. This is not yet
774 // implemented.
775 // * Simple greedy loops store reduced backtracking information.
776 // A quantifier like /.*foo/m will greedily match the whole input. It will
777 // then need to backtrack to a point where it can match "foo". The naive
778 // implementation of this would push each character position onto the
779 // backtracking stack, then pop them off one by one. This would use space
780 // proportional to the length of the input string. However since the "."
781 // can only match in one way and always has a constant length (in this case
782 // of 1) it suffices to store the current position on the top of the stack
783 // once. Matching now becomes merely incrementing the current position and
784 // backtracking becomes decrementing the current position and checking the
785 // result against the stored current position. This is faster and saves
786 // space.
787 // * The current state is virtualized.
788 // This is used to defer expensive operations until it is clear that they
789 // are needed and to generate code for a node more than once, allowing
790 // specialized an efficient versions of the code to be created. This is
791 // explained in the section below.
792 //
793 // Execution state virtualization.
794 //
795 // Instead of emitting code, nodes that manipulate the state can record their
796 // manipulation in an object called the Trace. The Trace object can record a
797 // current position offset, an optional backtrack code location on the top of
798 // the virtualized backtrack stack and some register changes. When a node is
799 // to be emitted it can flush the Trace or update it. Flushing the Trace
800 // will emit code to bring the actual state into line with the virtual state.
801 // Avoiding flushing the state can postpone some work (e.g. updates of capture
802 // registers). Postponing work can save time when executing the regular
803 // expression since it may be found that the work never has to be done as a
804 // failure to match can occur. In addition it is much faster to jump to a
805 // known backtrack code location than it is to pop an unknown backtrack
806 // location from the stack and jump there.
807 //
808 // The virtual state found in the Trace affects code generation. For example
809 // the virtual state contains the difference between the actual current
810 // position and the virtual current position, and matching code needs to use
811 // this offset to attempt a match in the correct location of the input
812 // string. Therefore code generated for a non-trivial trace is specialized
813 // to that trace. The code generator therefore has the ability to generate
814 // code for each node several times. In order to limit the size of the
815 // generated code there is an arbitrary limit on how many specialized sets of
816 // code may be generated for a given node. If the limit is reached, the
817 // trace is flushed and a generic version of the code for a node is emitted.
818 // This is subsequently used for that node. The code emitted for non-generic
819 // trace is not recorded in the node and so it cannot currently be reused in
820 // the event that code generation is requested for an identical trace.
821 
822 
824  UNREACHABLE();
825 }
826 
827 
829  text->AddElement(TextElement::Atom(this), zone);
830 }
831 
832 
834  text->AddElement(TextElement::CharClass(this), zone);
835 }
836 
837 
839  for (int i = 0; i < elements()->length(); i++)
840  text->AddElement(elements()->at(i), zone);
841 }
842 
843 
845  TextElement result = TextElement(ATOM);
846  result.data.u_atom = atom;
847  return result;
848 }
849 
850 
852  RegExpCharacterClass* char_class) {
854  result.data.u_char_class = char_class;
855  return result;
856 }
857 
858 
860  if (type == ATOM) {
861  return data.u_atom->length();
862  } else {
863  ASSERT(type == CHAR_CLASS);
864  return 1;
865  }
866 }
867 
868 
870  if (table_ == NULL) {
871  table_ = new(zone()) DispatchTable(zone());
872  DispatchTableConstructor cons(table_, ignore_case, zone());
873  cons.BuildTable(this);
874  }
875  return table_;
876 }
877 
878 
880  public:
881  FrequencyCollator() : total_samples_(0) {
882  for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
883  frequencies_[i] = CharacterFrequency(i);
884  }
885  }
886 
887  void CountCharacter(int character) {
888  int index = (character & RegExpMacroAssembler::kTableMask);
889  frequencies_[index].Increment();
890  total_samples_++;
891  }
892 
893  // Does not measure in percent, but rather per-128 (the table size from the
894  // regexp macro assembler).
895  int Frequency(int in_character) {
896  ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
897  if (total_samples_ < 1) return 1; // Division by zero.
898  int freq_in_per128 =
899  (frequencies_[in_character].counter() * 128) / total_samples_;
900  return freq_in_per128;
901  }
902 
903  private:
904  class CharacterFrequency {
905  public:
906  CharacterFrequency() : counter_(0), character_(-1) { }
907  explicit CharacterFrequency(int character)
908  : counter_(0), character_(character) { }
909 
910  void Increment() { counter_++; }
911  int counter() { return counter_; }
912  int character() { return character_; }
913 
914  private:
915  int counter_;
916  int character_;
917  };
918 
919 
920  private:
921  CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
922  int total_samples_;
923 };
924 
925 
927  public:
928  RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
929  Zone* zone);
930 
932  if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
933  reg_exp_too_big_ = true;
934  return next_register_;
935  }
936  return next_register_++;
937  }
938 
940  RegExpNode* start,
941  int capture_count,
942  Handle<String> pattern);
943 
944  inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
945 
946  static const int kImplementationOffset = 0;
947  static const int kNumberOfRegistersOffset = 0;
948  static const int kCodeOffset = 1;
949 
950  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
951  EndNode* accept() { return accept_; }
952 
953  static const int kMaxRecursion = 100;
954  inline int recursion_depth() { return recursion_depth_; }
955  inline void IncrementRecursionDepth() { recursion_depth_++; }
956  inline void DecrementRecursionDepth() { recursion_depth_--; }
957 
958  void SetRegExpTooBig() { reg_exp_too_big_ = true; }
959 
960  inline bool ignore_case() { return ignore_case_; }
961  inline bool ascii() { return ascii_; }
962  FrequencyCollator* frequency_collator() { return &frequency_collator_; }
963 
964  int current_expansion_factor() { return current_expansion_factor_; }
966  current_expansion_factor_ = value;
967  }
968 
969  Zone* zone() const { return zone_; }
970 
971  static const int kNoRegister = -1;
972 
973  private:
974  EndNode* accept_;
975  int next_register_;
976  List<RegExpNode*>* work_list_;
977  int recursion_depth_;
978  RegExpMacroAssembler* macro_assembler_;
979  bool ignore_case_;
980  bool ascii_;
981  bool reg_exp_too_big_;
982  int current_expansion_factor_;
983  FrequencyCollator frequency_collator_;
984  Zone* zone_;
985 };
986 
987 
989  public:
990  explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
991  compiler->IncrementRecursionDepth();
992  }
994  private:
995  RegExpCompiler* compiler_;
996 };
997 
998 
999 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
1000  return RegExpEngine::CompilationResult("RegExp too big");
1001 }
1002 
1003 
1004 // Attempts to compile the regexp using an Irregexp code generator. Returns
1005 // a fixed array or a null handle depending on whether it succeeded.
1006 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
1007  Zone* zone)
1008  : next_register_(2 * (capture_count + 1)),
1009  work_list_(NULL),
1010  recursion_depth_(0),
1011  ignore_case_(ignore_case),
1012  ascii_(ascii),
1013  reg_exp_too_big_(false),
1014  current_expansion_factor_(1),
1015  frequency_collator_(),
1016  zone_(zone) {
1017  accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
1018  ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1019 }
1020 
1021 
1023  RegExpMacroAssembler* macro_assembler,
1024  RegExpNode* start,
1025  int capture_count,
1026  Handle<String> pattern) {
1027  Heap* heap = pattern->GetHeap();
1028 
1029  bool use_slow_safe_regexp_compiler = false;
1030  if (heap->total_regexp_code_generated() >
1032  heap->isolate()->memory_allocator()->SizeExecutable() >
1034  use_slow_safe_regexp_compiler = true;
1035  }
1036 
1037  macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
1038 
1039 #ifdef DEBUG
1040  if (FLAG_trace_regexp_assembler)
1041  macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1042  else
1043 #endif
1044  macro_assembler_ = macro_assembler;
1045 
1046  List <RegExpNode*> work_list(0);
1047  work_list_ = &work_list;
1048  Label fail;
1049  macro_assembler_->PushBacktrack(&fail);
1050  Trace new_trace;
1051  start->Emit(this, &new_trace);
1052  macro_assembler_->Bind(&fail);
1053  macro_assembler_->Fail();
1054  while (!work_list.is_empty()) {
1055  work_list.RemoveLast()->Emit(this, &new_trace);
1056  }
1057  if (reg_exp_too_big_) return IrregexpRegExpTooBig();
1058 
1059  Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
1060  heap->IncreaseTotalRegexpCodeGenerated(code->Size());
1061  work_list_ = NULL;
1062 #ifdef DEBUG
1063  if (FLAG_print_code) {
1064  Handle<Code>::cast(code)->Disassemble(*pattern->ToCString());
1065  }
1066  if (FLAG_trace_regexp_assembler) {
1067  delete macro_assembler_;
1068  }
1069 #endif
1070  return RegExpEngine::CompilationResult(*code, next_register_);
1071 }
1072 
1073 
1075  if (type() == ActionNode::CLEAR_CAPTURES) {
1076  Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1077  return range.Contains(that);
1078  } else {
1079  return reg() == that;
1080  }
1081 }
1082 
1083 
1084 bool Trace::mentions_reg(int reg) {
1085  for (DeferredAction* action = actions_;
1086  action != NULL;
1087  action = action->next()) {
1088  if (action->Mentions(reg))
1089  return true;
1090  }
1091  return false;
1092 }
1093 
1094 
1096  ASSERT_EQ(0, *cp_offset);
1097  for (DeferredAction* action = actions_;
1098  action != NULL;
1099  action = action->next()) {
1100  if (action->Mentions(reg)) {
1101  if (action->type() == ActionNode::STORE_POSITION) {
1102  *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
1103  return true;
1104  } else {
1105  return false;
1106  }
1107  }
1108  }
1109  return false;
1110 }
1111 
1112 
1113 int Trace::FindAffectedRegisters(OutSet* affected_registers,
1114  Zone* zone) {
1115  int max_register = RegExpCompiler::kNoRegister;
1116  for (DeferredAction* action = actions_;
1117  action != NULL;
1118  action = action->next()) {
1119  if (action->type() == ActionNode::CLEAR_CAPTURES) {
1120  Interval range = static_cast<DeferredClearCaptures*>(action)->range();
1121  for (int i = range.from(); i <= range.to(); i++)
1122  affected_registers->Set(i, zone);
1123  if (range.to() > max_register) max_register = range.to();
1124  } else {
1125  affected_registers->Set(action->reg(), zone);
1126  if (action->reg() > max_register) max_register = action->reg();
1127  }
1128  }
1129  return max_register;
1130 }
1131 
1132 
1133 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
1134  int max_register,
1135  OutSet& registers_to_pop,
1136  OutSet& registers_to_clear) {
1137  for (int reg = max_register; reg >= 0; reg--) {
1138  if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
1139  else if (registers_to_clear.Get(reg)) {
1140  int clear_to = reg;
1141  while (reg > 0 && registers_to_clear.Get(reg - 1)) {
1142  reg--;
1143  }
1144  assembler->ClearRegisters(reg, clear_to);
1145  }
1146  }
1147 }
1148 
1149 
1150 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
1151  int max_register,
1152  OutSet& affected_registers,
1153  OutSet* registers_to_pop,
1154  OutSet* registers_to_clear,
1155  Zone* zone) {
1156  // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
1157  const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1158 
1159  // Count pushes performed to force a stack limit check occasionally.
1160  int pushes = 0;
1161 
1162  for (int reg = 0; reg <= max_register; reg++) {
1163  if (!affected_registers.Get(reg)) {
1164  continue;
1165  }
1166 
1167  // The chronologically first deferred action in the trace
1168  // is used to infer the action needed to restore a register
1169  // to its previous state (or not, if it's safe to ignore it).
1170  enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1171  DeferredActionUndoType undo_action = IGNORE;
1172 
1173  int value = 0;
1174  bool absolute = false;
1175  bool clear = false;
1176  int store_position = -1;
1177  // This is a little tricky because we are scanning the actions in reverse
1178  // historical order (newest first).
1179  for (DeferredAction* action = actions_;
1180  action != NULL;
1181  action = action->next()) {
1182  if (action->Mentions(reg)) {
1183  switch (action->type()) {
1184  case ActionNode::SET_REGISTER: {
1185  Trace::DeferredSetRegister* psr =
1186  static_cast<Trace::DeferredSetRegister*>(action);
1187  if (!absolute) {
1188  value += psr->value();
1189  absolute = true;
1190  }
1191  // SET_REGISTER is currently only used for newly introduced loop
1192  // counters. They can have a significant previous value if they
1193  // occour in a loop. TODO(lrn): Propagate this information, so
1194  // we can set undo_action to IGNORE if we know there is no value to
1195  // restore.
1196  undo_action = RESTORE;
1197  ASSERT_EQ(store_position, -1);
1198  ASSERT(!clear);
1199  break;
1200  }
1202  if (!absolute) {
1203  value++;
1204  }
1205  ASSERT_EQ(store_position, -1);
1206  ASSERT(!clear);
1207  undo_action = RESTORE;
1208  break;
1210  Trace::DeferredCapture* pc =
1211  static_cast<Trace::DeferredCapture*>(action);
1212  if (!clear && store_position == -1) {
1213  store_position = pc->cp_offset();
1214  }
1215 
1216  // For captures we know that stores and clears alternate.
1217  // Other register, are never cleared, and if the occur
1218  // inside a loop, they might be assigned more than once.
1219  if (reg <= 1) {
1220  // Registers zero and one, aka "capture zero", is
1221  // always set correctly if we succeed. There is no
1222  // need to undo a setting on backtrack, because we
1223  // will set it again or fail.
1224  undo_action = IGNORE;
1225  } else {
1226  undo_action = pc->is_capture() ? CLEAR : RESTORE;
1227  }
1228  ASSERT(!absolute);
1229  ASSERT_EQ(value, 0);
1230  break;
1231  }
1233  // Since we're scanning in reverse order, if we've already
1234  // set the position we have to ignore historically earlier
1235  // clearing operations.
1236  if (store_position == -1) {
1237  clear = true;
1238  }
1239  undo_action = RESTORE;
1240  ASSERT(!absolute);
1241  ASSERT_EQ(value, 0);
1242  break;
1243  }
1244  default:
1245  UNREACHABLE();
1246  break;
1247  }
1248  }
1249  }
1250  // Prepare for the undo-action (e.g., push if it's going to be popped).
1251  if (undo_action == RESTORE) {
1252  pushes++;
1255  if (pushes == push_limit) {
1257  pushes = 0;
1258  }
1259 
1260  assembler->PushRegister(reg, stack_check);
1261  registers_to_pop->Set(reg, zone);
1262  } else if (undo_action == CLEAR) {
1263  registers_to_clear->Set(reg, zone);
1264  }
1265  // Perform the chronologically last action (or accumulated increment)
1266  // for the register.
1267  if (store_position != -1) {
1268  assembler->WriteCurrentPositionToRegister(reg, store_position);
1269  } else if (clear) {
1270  assembler->ClearRegisters(reg, reg);
1271  } else if (absolute) {
1272  assembler->SetRegister(reg, value);
1273  } else if (value != 0) {
1274  assembler->AdvanceRegister(reg, value);
1275  }
1276  }
1277 }
1278 
1279 
1280 // This is called as we come into a loop choice node and some other tricky
1281 // nodes. It normalizes the state of the code generator to ensure we can
1282 // generate generic code.
1283 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1284  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1285 
1286  ASSERT(!is_trivial());
1287 
1288  if (actions_ == NULL && backtrack() == NULL) {
1289  // Here we just have some deferred cp advances to fix and we are back to
1290  // a normal situation. We may also have to forget some information gained
1291  // through a quick check that was already performed.
1292  if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1293  // Create a new trivial state and generate the node with that.
1294  Trace new_state;
1295  successor->Emit(compiler, &new_state);
1296  return;
1297  }
1298 
1299  // Generate deferred actions here along with code to undo them again.
1300  OutSet affected_registers;
1301 
1302  if (backtrack() != NULL) {
1303  // Here we have a concrete backtrack location. These are set up by choice
1304  // nodes and so they indicate that we have a deferred save of the current
1305  // position which we may need to emit here.
1306  assembler->PushCurrentPosition();
1307  }
1308 
1309  int max_register = FindAffectedRegisters(&affected_registers,
1310  compiler->zone());
1311  OutSet registers_to_pop;
1312  OutSet registers_to_clear;
1313  PerformDeferredActions(assembler,
1314  max_register,
1315  affected_registers,
1316  &registers_to_pop,
1317  &registers_to_clear,
1318  compiler->zone());
1319  if (cp_offset_ != 0) {
1320  assembler->AdvanceCurrentPosition(cp_offset_);
1321  }
1322 
1323  // Create a new trivial state and generate the node with that.
1324  Label undo;
1325  assembler->PushBacktrack(&undo);
1326  Trace new_state;
1327  successor->Emit(compiler, &new_state);
1328 
1329  // On backtrack we need to restore state.
1330  assembler->Bind(&undo);
1331  RestoreAffectedRegisters(assembler,
1332  max_register,
1333  registers_to_pop,
1334  registers_to_clear);
1335  if (backtrack() == NULL) {
1336  assembler->Backtrack();
1337  } else {
1338  assembler->PopCurrentPosition();
1339  assembler->GoTo(backtrack());
1340  }
1341 }
1342 
1343 
1345  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1346 
1347  // Omit flushing the trace. We discard the entire stack frame anyway.
1348 
1349  if (!label()->is_bound()) {
1350  // We are completely independent of the trace, since we ignore it,
1351  // so this code can be used as the generic version.
1352  assembler->Bind(label());
1353  }
1354 
1355  // Throw away everything on the backtrack stack since the start
1356  // of the negative submatch and restore the character position.
1357  assembler->ReadCurrentPositionFromRegister(current_position_register_);
1358  assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1359  if (clear_capture_count_ > 0) {
1360  // Clear any captures that might have been performed during the success
1361  // of the body of the negative look-ahead.
1362  int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
1363  assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
1364  }
1365  // Now that we have unwound the stack we find at the top of the stack the
1366  // backtrack that the BeginSubmatch node got.
1367  assembler->Backtrack();
1368 }
1369 
1370 
1371 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1372  if (!trace->is_trivial()) {
1373  trace->Flush(compiler, this);
1374  return;
1375  }
1376  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1377  if (!label()->is_bound()) {
1378  assembler->Bind(label());
1379  }
1380  switch (action_) {
1381  case ACCEPT:
1382  assembler->Succeed();
1383  return;
1384  case BACKTRACK:
1385  assembler->GoTo(trace->backtrack());
1386  return;
1387  case NEGATIVE_SUBMATCH_SUCCESS:
1388  // This case is handled in a different virtual method.
1389  UNREACHABLE();
1390  }
1391  UNIMPLEMENTED();
1392 }
1393 
1394 
1396  if (guards_ == NULL)
1397  guards_ = new(zone) ZoneList<Guard*>(1, zone);
1398  guards_->Add(guard, zone);
1399 }
1400 
1401 
1403  int val,
1404  RegExpNode* on_success) {
1405  ActionNode* result =
1406  new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
1407  result->data_.u_store_register.reg = reg;
1408  result->data_.u_store_register.value = val;
1409  return result;
1410 }
1411 
1412 
1414  ActionNode* result =
1415  new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
1416  result->data_.u_increment_register.reg = reg;
1417  return result;
1418 }
1419 
1420 
1422  bool is_capture,
1423  RegExpNode* on_success) {
1424  ActionNode* result =
1425  new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
1426  result->data_.u_position_register.reg = reg;
1427  result->data_.u_position_register.is_capture = is_capture;
1428  return result;
1429 }
1430 
1431 
1433  RegExpNode* on_success) {
1434  ActionNode* result =
1435  new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
1436  result->data_.u_clear_captures.range_from = range.from();
1437  result->data_.u_clear_captures.range_to = range.to();
1438  return result;
1439 }
1440 
1441 
1443  int position_reg,
1444  RegExpNode* on_success) {
1445  ActionNode* result =
1446  new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
1447  result->data_.u_submatch.stack_pointer_register = stack_reg;
1448  result->data_.u_submatch.current_position_register = position_reg;
1449  return result;
1450 }
1451 
1452 
1454  int position_reg,
1455  int clear_register_count,
1456  int clear_register_from,
1457  RegExpNode* on_success) {
1458  ActionNode* result =
1459  new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1460  result->data_.u_submatch.stack_pointer_register = stack_reg;
1461  result->data_.u_submatch.current_position_register = position_reg;
1462  result->data_.u_submatch.clear_register_count = clear_register_count;
1463  result->data_.u_submatch.clear_register_from = clear_register_from;
1464  return result;
1465 }
1466 
1467 
1469  int repetition_register,
1470  int repetition_limit,
1471  RegExpNode* on_success) {
1472  ActionNode* result =
1473  new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
1474  result->data_.u_empty_match_check.start_register = start_register;
1475  result->data_.u_empty_match_check.repetition_register = repetition_register;
1476  result->data_.u_empty_match_check.repetition_limit = repetition_limit;
1477  return result;
1478 }
1479 
1480 
1481 #define DEFINE_ACCEPT(Type) \
1482  void Type##Node::Accept(NodeVisitor* visitor) { \
1483  visitor->Visit##Type(this); \
1484  }
1486 #undef DEFINE_ACCEPT
1487 
1488 
1490  visitor->VisitLoopChoice(this);
1491 }
1492 
1493 
1494 // -------------------------------------------------------------------
1495 // Emit code.
1496 
1497 
1498 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
1499  Guard* guard,
1500  Trace* trace) {
1501  switch (guard->op()) {
1502  case Guard::LT:
1503  ASSERT(!trace->mentions_reg(guard->reg()));
1504  macro_assembler->IfRegisterGE(guard->reg(),
1505  guard->value(),
1506  trace->backtrack());
1507  break;
1508  case Guard::GEQ:
1509  ASSERT(!trace->mentions_reg(guard->reg()));
1510  macro_assembler->IfRegisterLT(guard->reg(),
1511  guard->value(),
1512  trace->backtrack());
1513  break;
1514  }
1515 }
1516 
1517 
1518 // Returns the number of characters in the equivalence class, omitting those
1519 // that cannot occur in the source string because it is ASCII.
1520 static int GetCaseIndependentLetters(Isolate* isolate,
1521  uc16 character,
1522  bool ascii_subject,
1523  unibrow::uchar* letters) {
1524  int length =
1525  isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1526  // Unibrow returns 0 or 1 for characters where case independence is
1527  // trivial.
1528  if (length == 0) {
1529  letters[0] = character;
1530  length = 1;
1531  }
1532  if (!ascii_subject || character <= String::kMaxAsciiCharCode) {
1533  return length;
1534  }
1535  // The standard requires that non-ASCII characters cannot have ASCII
1536  // character codes in their equivalence class.
1537  return 0;
1538 }
1539 
1540 
1541 static inline bool EmitSimpleCharacter(Isolate* isolate,
1542  RegExpCompiler* compiler,
1543  uc16 c,
1544  Label* on_failure,
1545  int cp_offset,
1546  bool check,
1547  bool preloaded) {
1548  RegExpMacroAssembler* assembler = compiler->macro_assembler();
1549  bool bound_checked = false;
1550  if (!preloaded) {
1551  assembler->LoadCurrentCharacter(
1552  cp_offset,
1553  on_failure,
1554  check);
1555  bound_checked = true;
1556  }
1557  assembler->CheckNotCharacter(c, on_failure);
1558  return bound_checked;
1559 }
1560 
1561 
1562 // Only emits non-letters (things that don't have case). Only used for case
1563 // independent matches.
1564 static inline bool EmitAtomNonLetter(Isolate* isolate,
1565  RegExpCompiler* compiler,
1566  uc16 c,
1567  Label* on_failure,
1568  int cp_offset,
1569  bool check,
1570  bool preloaded) {
1571  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1572  bool ascii = compiler->ascii();
1574  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1575  if (length < 1) {
1576  // This can't match. Must be an ASCII subject and a non-ASCII character.
1577  // We do not need to do anything since the ASCII pass already handled this.
1578  return false; // Bounds not checked.
1579  }
1580  bool checked = false;
1581  // We handle the length > 1 case in a later pass.
1582  if (length == 1) {
1583  if (ascii && c > String::kMaxAsciiCharCodeU) {
1584  // Can't match - see above.
1585  return false; // Bounds not checked.
1586  }
1587  if (!preloaded) {
1588  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1589  checked = check;
1590  }
1591  macro_assembler->CheckNotCharacter(c, on_failure);
1592  }
1593  return checked;
1594 }
1595 
1596 
1597 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1598  bool ascii,
1599  uc16 c1,
1600  uc16 c2,
1601  Label* on_failure) {
1602  uc16 char_mask;
1603  if (ascii) {
1604  char_mask = String::kMaxAsciiCharCode;
1605  } else {
1606  char_mask = String::kMaxUtf16CodeUnit;
1607  }
1608  uc16 exor = c1 ^ c2;
1609  // Check whether exor has only one bit set.
1610  if (((exor - 1) & exor) == 0) {
1611  // If c1 and c2 differ only by one bit.
1612  // Ecma262UnCanonicalize always gives the highest number last.
1613  ASSERT(c2 > c1);
1614  uc16 mask = char_mask ^ exor;
1615  macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1616  return true;
1617  }
1618  ASSERT(c2 > c1);
1619  uc16 diff = c2 - c1;
1620  if (((diff - 1) & diff) == 0 && c1 >= diff) {
1621  // If the characters differ by 2^n but don't differ by one bit then
1622  // subtract the difference from the found character, then do the or
1623  // trick. We avoid the theoretical case where negative numbers are
1624  // involved in order to simplify code generation.
1625  uc16 mask = char_mask ^ diff;
1626  macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1627  diff,
1628  mask,
1629  on_failure);
1630  return true;
1631  }
1632  return false;
1633 }
1634 
1635 
1636 typedef bool EmitCharacterFunction(Isolate* isolate,
1637  RegExpCompiler* compiler,
1638  uc16 c,
1639  Label* on_failure,
1640  int cp_offset,
1641  bool check,
1642  bool preloaded);
1643 
1644 // Only emits letters (things that have case). Only used for case independent
1645 // matches.
1646 static inline bool EmitAtomLetter(Isolate* isolate,
1647  RegExpCompiler* compiler,
1648  uc16 c,
1649  Label* on_failure,
1650  int cp_offset,
1651  bool check,
1652  bool preloaded) {
1653  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1654  bool ascii = compiler->ascii();
1656  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1657  if (length <= 1) return false;
1658  // We may not need to check against the end of the input string
1659  // if this character lies before a character that matched.
1660  if (!preloaded) {
1661  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1662  }
1663  Label ok;
1665  switch (length) {
1666  case 2: {
1667  if (ShortCutEmitCharacterPair(macro_assembler,
1668  ascii,
1669  chars[0],
1670  chars[1],
1671  on_failure)) {
1672  } else {
1673  macro_assembler->CheckCharacter(chars[0], &ok);
1674  macro_assembler->CheckNotCharacter(chars[1], on_failure);
1675  macro_assembler->Bind(&ok);
1676  }
1677  break;
1678  }
1679  case 4:
1680  macro_assembler->CheckCharacter(chars[3], &ok);
1681  // Fall through!
1682  case 3:
1683  macro_assembler->CheckCharacter(chars[0], &ok);
1684  macro_assembler->CheckCharacter(chars[1], &ok);
1685  macro_assembler->CheckNotCharacter(chars[2], on_failure);
1686  macro_assembler->Bind(&ok);
1687  break;
1688  default:
1689  UNREACHABLE();
1690  break;
1691  }
1692  return true;
1693 }
1694 
1695 
1696 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1697  int border,
1698  Label* fall_through,
1699  Label* above_or_equal,
1700  Label* below) {
1701  if (below != fall_through) {
1702  masm->CheckCharacterLT(border, below);
1703  if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1704  } else {
1705  masm->CheckCharacterGT(border - 1, above_or_equal);
1706  }
1707 }
1708 
1709 
1710 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1711  int first,
1712  int last,
1713  Label* fall_through,
1714  Label* in_range,
1715  Label* out_of_range) {
1716  if (in_range == fall_through) {
1717  if (first == last) {
1718  masm->CheckNotCharacter(first, out_of_range);
1719  } else {
1720  masm->CheckCharacterNotInRange(first, last, out_of_range);
1721  }
1722  } else {
1723  if (first == last) {
1724  masm->CheckCharacter(first, in_range);
1725  } else {
1726  masm->CheckCharacterInRange(first, last, in_range);
1727  }
1728  if (out_of_range != fall_through) masm->GoTo(out_of_range);
1729  }
1730 }
1731 
1732 
1733 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1734 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1735 static void EmitUseLookupTable(
1736  RegExpMacroAssembler* masm,
1737  ZoneList<int>* ranges,
1738  int start_index,
1739  int end_index,
1740  int min_char,
1741  Label* fall_through,
1742  Label* even_label,
1743  Label* odd_label) {
1744  static const int kSize = RegExpMacroAssembler::kTableSize;
1745  static const int kMask = RegExpMacroAssembler::kTableMask;
1746 
1747  int base = (min_char & ~kMask);
1748  USE(base);
1749 
1750  // Assert that everything is on one kTableSize page.
1751  for (int i = start_index; i <= end_index; i++) {
1752  ASSERT_EQ(ranges->at(i) & ~kMask, base);
1753  }
1754  ASSERT(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1755 
1756  char templ[kSize];
1757  Label* on_bit_set;
1758  Label* on_bit_clear;
1759  int bit;
1760  if (even_label == fall_through) {
1761  on_bit_set = odd_label;
1762  on_bit_clear = even_label;
1763  bit = 1;
1764  } else {
1765  on_bit_set = even_label;
1766  on_bit_clear = odd_label;
1767  bit = 0;
1768  }
1769  for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
1770  templ[i] = bit;
1771  }
1772  int j = 0;
1773  bit ^= 1;
1774  for (int i = start_index; i < end_index; i++) {
1775  for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1776  templ[j] = bit;
1777  }
1778  bit ^= 1;
1779  }
1780  for (int i = j; i < kSize; i++) {
1781  templ[i] = bit;
1782  }
1783  // TODO(erikcorry): Cache these.
1784  Handle<ByteArray> ba = FACTORY->NewByteArray(kSize, TENURED);
1785  for (int i = 0; i < kSize; i++) {
1786  ba->set(i, templ[i]);
1787  }
1788  masm->CheckBitInTable(ba, on_bit_set);
1789  if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1790 }
1791 
1792 
1793 static void CutOutRange(RegExpMacroAssembler* masm,
1794  ZoneList<int>* ranges,
1795  int start_index,
1796  int end_index,
1797  int cut_index,
1798  Label* even_label,
1799  Label* odd_label) {
1800  bool odd = (((cut_index - start_index) & 1) == 1);
1801  Label* in_range_label = odd ? odd_label : even_label;
1802  Label dummy;
1803  EmitDoubleBoundaryTest(masm,
1804  ranges->at(cut_index),
1805  ranges->at(cut_index + 1) - 1,
1806  &dummy,
1807  in_range_label,
1808  &dummy);
1809  ASSERT(!dummy.is_linked());
1810  // Cut out the single range by rewriting the array. This creates a new
1811  // range that is a merger of the two ranges on either side of the one we
1812  // are cutting out. The oddity of the labels is preserved.
1813  for (int j = cut_index; j > start_index; j--) {
1814  ranges->at(j) = ranges->at(j - 1);
1815  }
1816  for (int j = cut_index + 1; j < end_index; j++) {
1817  ranges->at(j) = ranges->at(j + 1);
1818  }
1819 }
1820 
1821 
1822 // Unicode case. Split the search space into kSize spaces that are handled
1823 // with recursion.
1824 static void SplitSearchSpace(ZoneList<int>* ranges,
1825  int start_index,
1826  int end_index,
1827  int* new_start_index,
1828  int* new_end_index,
1829  int* border) {
1830  static const int kSize = RegExpMacroAssembler::kTableSize;
1831  static const int kMask = RegExpMacroAssembler::kTableMask;
1832 
1833  int first = ranges->at(start_index);
1834  int last = ranges->at(end_index) - 1;
1835 
1836  *new_start_index = start_index;
1837  *border = (ranges->at(start_index) & ~kMask) + kSize;
1838  while (*new_start_index < end_index) {
1839  if (ranges->at(*new_start_index) > *border) break;
1840  (*new_start_index)++;
1841  }
1842  // new_start_index is the index of the first edge that is beyond the
1843  // current kSize space.
1844 
1845  // For very large search spaces we do a binary chop search of the non-ASCII
1846  // space instead of just going to the end of the current kSize space. The
1847  // heuristics are complicated a little by the fact that any 128-character
1848  // encoding space can be quickly tested with a table lookup, so we don't
1849  // wish to do binary chop search at a smaller granularity than that. A
1850  // 128-character space can take up a lot of space in the ranges array if,
1851  // for example, we only want to match every second character (eg. the lower
1852  // case characters on some Unicode pages).
1853  int binary_chop_index = (end_index + start_index) / 2;
1854  // The first test ensures that we get to the code that handles the ASCII
1855  // range with a single not-taken branch, speeding up this important
1856  // character range (even non-ASCII charset-based text has spaces and
1857  // punctuation).
1858  if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case.
1859  end_index - start_index > (*new_start_index - start_index) * 2 &&
1860  last - first > kSize * 2 &&
1861  binary_chop_index > *new_start_index &&
1862  ranges->at(binary_chop_index) >= first + 2 * kSize) {
1863  int scan_forward_for_section_border = binary_chop_index;;
1864  int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1865 
1866  while (scan_forward_for_section_border < end_index) {
1867  if (ranges->at(scan_forward_for_section_border) > new_border) {
1868  *new_start_index = scan_forward_for_section_border;
1869  *border = new_border;
1870  break;
1871  }
1872  scan_forward_for_section_border++;
1873  }
1874  }
1875 
1876  ASSERT(*new_start_index > start_index);
1877  *new_end_index = *new_start_index - 1;
1878  if (ranges->at(*new_end_index) == *border) {
1879  (*new_end_index)--;
1880  }
1881  if (*border >= ranges->at(end_index)) {
1882  *border = ranges->at(end_index);
1883  *new_start_index = end_index; // Won't be used.
1884  *new_end_index = end_index - 1;
1885  }
1886 }
1887 
1888 
1889 // Gets a series of segment boundaries representing a character class. If the
1890 // character is in the range between an even and an odd boundary (counting from
1891 // start_index) then go to even_label, otherwise go to odd_label. We already
1892 // know that the character is in the range of min_char to max_char inclusive.
1893 // Either label can be NULL indicating backtracking. Either label can also be
1894 // equal to the fall_through label.
1895 static void GenerateBranches(RegExpMacroAssembler* masm,
1896  ZoneList<int>* ranges,
1897  int start_index,
1898  int end_index,
1899  uc16 min_char,
1900  uc16 max_char,
1901  Label* fall_through,
1902  Label* even_label,
1903  Label* odd_label) {
1904  int first = ranges->at(start_index);
1905  int last = ranges->at(end_index) - 1;
1906 
1907  ASSERT_LT(min_char, first);
1908 
1909  // Just need to test if the character is before or on-or-after
1910  // a particular character.
1911  if (start_index == end_index) {
1912  EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1913  return;
1914  }
1915 
1916  // Another almost trivial case: There is one interval in the middle that is
1917  // different from the end intervals.
1918  if (start_index + 1 == end_index) {
1919  EmitDoubleBoundaryTest(
1920  masm, first, last, fall_through, even_label, odd_label);
1921  return;
1922  }
1923 
1924  // It's not worth using table lookup if there are very few intervals in the
1925  // character class.
1926  if (end_index - start_index <= 6) {
1927  // It is faster to test for individual characters, so we look for those
1928  // first, then try arbitrary ranges in the second round.
1929  static int kNoCutIndex = -1;
1930  int cut = kNoCutIndex;
1931  for (int i = start_index; i < end_index; i++) {
1932  if (ranges->at(i) == ranges->at(i + 1) - 1) {
1933  cut = i;
1934  break;
1935  }
1936  }
1937  if (cut == kNoCutIndex) cut = start_index;
1938  CutOutRange(
1939  masm, ranges, start_index, end_index, cut, even_label, odd_label);
1940  ASSERT_GE(end_index - start_index, 2);
1941  GenerateBranches(masm,
1942  ranges,
1943  start_index + 1,
1944  end_index - 1,
1945  min_char,
1946  max_char,
1947  fall_through,
1948  even_label,
1949  odd_label);
1950  return;
1951  }
1952 
1953  // If there are a lot of intervals in the regexp, then we will use tables to
1954  // determine whether the character is inside or outside the character class.
1955  static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1956 
1957  if ((max_char >> kBits) == (min_char >> kBits)) {
1958  EmitUseLookupTable(masm,
1959  ranges,
1960  start_index,
1961  end_index,
1962  min_char,
1963  fall_through,
1964  even_label,
1965  odd_label);
1966  return;
1967  }
1968 
1969  if ((min_char >> kBits) != (first >> kBits)) {
1970  masm->CheckCharacterLT(first, odd_label);
1971  GenerateBranches(masm,
1972  ranges,
1973  start_index + 1,
1974  end_index,
1975  first,
1976  max_char,
1977  fall_through,
1978  odd_label,
1979  even_label);
1980  return;
1981  }
1982 
1983  int new_start_index = 0;
1984  int new_end_index = 0;
1985  int border = 0;
1986 
1987  SplitSearchSpace(ranges,
1988  start_index,
1989  end_index,
1990  &new_start_index,
1991  &new_end_index,
1992  &border);
1993 
1994  Label handle_rest;
1995  Label* above = &handle_rest;
1996  if (border == last + 1) {
1997  // We didn't find any section that started after the limit, so everything
1998  // above the border is one of the terminal labels.
1999  above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
2000  ASSERT(new_end_index == end_index - 1);
2001  }
2002 
2003  ASSERT_LE(start_index, new_end_index);
2004  ASSERT_LE(new_start_index, end_index);
2005  ASSERT_LT(start_index, new_start_index);
2006  ASSERT_LT(new_end_index, end_index);
2007  ASSERT(new_end_index + 1 == new_start_index ||
2008  (new_end_index + 2 == new_start_index &&
2009  border == ranges->at(new_end_index + 1)));
2010  ASSERT_LT(min_char, border - 1);
2011  ASSERT_LT(border, max_char);
2012  ASSERT_LT(ranges->at(new_end_index), border);
2013  ASSERT(border < ranges->at(new_start_index) ||
2014  (border == ranges->at(new_start_index) &&
2015  new_start_index == end_index &&
2016  new_end_index == end_index - 1 &&
2017  border == last + 1));
2018  ASSERT(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
2019 
2020  masm->CheckCharacterGT(border - 1, above);
2021  Label dummy;
2022  GenerateBranches(masm,
2023  ranges,
2024  start_index,
2025  new_end_index,
2026  min_char,
2027  border - 1,
2028  &dummy,
2029  even_label,
2030  odd_label);
2031  if (handle_rest.is_linked()) {
2032  masm->Bind(&handle_rest);
2033  bool flip = (new_start_index & 1) != (start_index & 1);
2034  GenerateBranches(masm,
2035  ranges,
2036  new_start_index,
2037  end_index,
2038  border,
2039  max_char,
2040  &dummy,
2041  flip ? odd_label : even_label,
2042  flip ? even_label : odd_label);
2043  }
2044 }
2045 
2046 
2047 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2048  RegExpCharacterClass* cc,
2049  bool ascii,
2050  Label* on_failure,
2051  int cp_offset,
2052  bool check_offset,
2053  bool preloaded,
2054  Zone* zone) {
2055  ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2056  if (!CharacterRange::IsCanonical(ranges)) {
2058  }
2059 
2060  int max_char;
2061  if (ascii) {
2062  max_char = String::kMaxAsciiCharCode;
2063  } else {
2064  max_char = String::kMaxUtf16CodeUnit;
2065  }
2066 
2067  int range_count = ranges->length();
2068 
2069  int last_valid_range = range_count - 1;
2070  while (last_valid_range >= 0) {
2071  CharacterRange& range = ranges->at(last_valid_range);
2072  if (range.from() <= max_char) {
2073  break;
2074  }
2075  last_valid_range--;
2076  }
2077 
2078  if (last_valid_range < 0) {
2079  if (!cc->is_negated()) {
2080  macro_assembler->GoTo(on_failure);
2081  }
2082  if (check_offset) {
2083  macro_assembler->CheckPosition(cp_offset, on_failure);
2084  }
2085  return;
2086  }
2087 
2088  if (last_valid_range == 0 &&
2089  ranges->at(0).IsEverything(max_char)) {
2090  if (cc->is_negated()) {
2091  macro_assembler->GoTo(on_failure);
2092  } else {
2093  // This is a common case hit by non-anchored expressions.
2094  if (check_offset) {
2095  macro_assembler->CheckPosition(cp_offset, on_failure);
2096  }
2097  }
2098  return;
2099  }
2100  if (last_valid_range == 0 &&
2101  !cc->is_negated() &&
2102  ranges->at(0).IsEverything(max_char)) {
2103  // This is a common case hit by non-anchored expressions.
2104  if (check_offset) {
2105  macro_assembler->CheckPosition(cp_offset, on_failure);
2106  }
2107  return;
2108  }
2109 
2110  if (!preloaded) {
2111  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2112  }
2113 
2114  if (cc->is_standard(zone) &&
2115  macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2116  on_failure)) {
2117  return;
2118  }
2119 
2120 
2121  // A new list with ascending entries. Each entry is a code unit
2122  // where there is a boundary between code units that are part of
2123  // the class and code units that are not. Normally we insert an
2124  // entry at zero which goes to the failure label, but if there
2125  // was already one there we fall through for success on that entry.
2126  // Subsequent entries have alternating meaning (success/failure).
2127  ZoneList<int>* range_boundaries =
2128  new(zone) ZoneList<int>(last_valid_range, zone);
2129 
2130  bool zeroth_entry_is_failure = !cc->is_negated();
2131 
2132  for (int i = 0; i <= last_valid_range; i++) {
2133  CharacterRange& range = ranges->at(i);
2134  if (range.from() == 0) {
2135  ASSERT_EQ(i, 0);
2136  zeroth_entry_is_failure = !zeroth_entry_is_failure;
2137  } else {
2138  range_boundaries->Add(range.from(), zone);
2139  }
2140  range_boundaries->Add(range.to() + 1, zone);
2141  }
2142  int end_index = range_boundaries->length() - 1;
2143  if (range_boundaries->at(end_index) > max_char) {
2144  end_index--;
2145  }
2146 
2147  Label fall_through;
2148  GenerateBranches(macro_assembler,
2149  range_boundaries,
2150  0, // start_index.
2151  end_index,
2152  0, // min_char.
2153  max_char,
2154  &fall_through,
2155  zeroth_entry_is_failure ? &fall_through : on_failure,
2156  zeroth_entry_is_failure ? on_failure : &fall_through);
2157  macro_assembler->Bind(&fall_through);
2158 }
2159 
2160 
2162 }
2163 
2164 
2166  Trace* trace) {
2167  // If we are generating a greedy loop then don't stop and don't reuse code.
2168  if (trace->stop_node() != NULL) {
2169  return CONTINUE;
2170  }
2171 
2172  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2173  if (trace->is_trivial()) {
2174  if (label_.is_bound()) {
2175  // We are being asked to generate a generic version, but that's already
2176  // been done so just go to it.
2177  macro_assembler->GoTo(&label_);
2178  return DONE;
2179  }
2180  if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
2181  // To avoid too deep recursion we push the node to the work queue and just
2182  // generate a goto here.
2183  compiler->AddWork(this);
2184  macro_assembler->GoTo(&label_);
2185  return DONE;
2186  }
2187  // Generate generic version of the node and bind the label for later use.
2188  macro_assembler->Bind(&label_);
2189  return CONTINUE;
2190  }
2191 
2192  // We are being asked to make a non-generic version. Keep track of how many
2193  // non-generic versions we generate so as not to overdo it.
2194  trace_count_++;
2195  if (FLAG_regexp_optimization &&
2196  trace_count_ < kMaxCopiesCodeGenerated &&
2198  return CONTINUE;
2199  }
2200 
2201  // If we get here code has been generated for this node too many times or
2202  // recursion is too deep. Time to switch to a generic version. The code for
2203  // generic versions above can handle deep recursion properly.
2204  trace->Flush(compiler, this);
2205  return DONE;
2206 }
2207 
2208 
2209 int ActionNode::EatsAtLeast(int still_to_find,
2210  int recursion_depth,
2211  bool not_at_start) {
2212  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2213  if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2214  return on_success()->EatsAtLeast(still_to_find,
2215  recursion_depth + 1,
2216  not_at_start);
2217 }
2218 
2219 
2221  int recursion_depth,
2222  int budget,
2223  BoyerMooreLookahead* bm,
2224  bool not_at_start) {
2225  if (type_ == BEGIN_SUBMATCH) {
2226  bm->SetRest(offset);
2227  } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2228  on_success()->FillInBMInfo(
2229  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2230  }
2231  SaveBMInfo(bm, not_at_start, offset);
2232 }
2233 
2234 
2235 int AssertionNode::EatsAtLeast(int still_to_find,
2236  int recursion_depth,
2237  bool not_at_start) {
2238  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2239  // If we know we are not at the start and we are asked "how many characters
2240  // will you match if you succeed?" then we can answer anything since false
2241  // implies false. So lets just return the max answer (still_to_find) since
2242  // that won't prevent us from preloading a lot of characters for the other
2243  // branches in the node graph.
2244  if (type() == AT_START && not_at_start) return still_to_find;
2245  return on_success()->EatsAtLeast(still_to_find,
2246  recursion_depth + 1,
2247  not_at_start);
2248 }
2249 
2250 
2252  int recursion_depth,
2253  int budget,
2254  BoyerMooreLookahead* bm,
2255  bool not_at_start) {
2256  // Match the behaviour of EatsAtLeast on this node.
2257  if (type() == AT_START && not_at_start) return;
2258  on_success()->FillInBMInfo(
2259  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2260  SaveBMInfo(bm, not_at_start, offset);
2261 }
2262 
2263 
2264 int BackReferenceNode::EatsAtLeast(int still_to_find,
2265  int recursion_depth,
2266  bool not_at_start) {
2267  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2268  return on_success()->EatsAtLeast(still_to_find,
2269  recursion_depth + 1,
2270  not_at_start);
2271 }
2272 
2273 
2274 int TextNode::EatsAtLeast(int still_to_find,
2275  int recursion_depth,
2276  bool not_at_start) {
2277  int answer = Length();
2278  if (answer >= still_to_find) return answer;
2279  if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
2280  // We are not at start after this node so we set the last argument to 'true'.
2281  return answer + on_success()->EatsAtLeast(still_to_find - answer,
2282  recursion_depth + 1,
2283  true);
2284 }
2285 
2286 
2288  int recursion_depth,
2289  bool not_at_start) {
2290  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2291  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2292  // afterwards.
2293  RegExpNode* node = alternatives_->at(1).node();
2294  return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
2295 }
2296 
2297 
2299  QuickCheckDetails* details,
2300  RegExpCompiler* compiler,
2301  int filled_in,
2302  bool not_at_start) {
2303  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2304  // afterwards.
2305  RegExpNode* node = alternatives_->at(1).node();
2306  return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
2307 }
2308 
2309 
2310 int ChoiceNode::EatsAtLeastHelper(int still_to_find,
2311  int recursion_depth,
2312  RegExpNode* ignore_this_node,
2313  bool not_at_start) {
2314  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2315  int min = 100;
2316  int choice_count = alternatives_->length();
2317  for (int i = 0; i < choice_count; i++) {
2318  RegExpNode* node = alternatives_->at(i).node();
2319  if (node == ignore_this_node) continue;
2320  int node_eats_at_least = node->EatsAtLeast(still_to_find,
2321  recursion_depth + 1,
2322  not_at_start);
2323  if (node_eats_at_least < min) min = node_eats_at_least;
2324  }
2325  return min;
2326 }
2327 
2328 
2329 int LoopChoiceNode::EatsAtLeast(int still_to_find,
2330  int recursion_depth,
2331  bool not_at_start) {
2332  return EatsAtLeastHelper(still_to_find,
2333  recursion_depth,
2334  loop_node_,
2335  not_at_start);
2336 }
2337 
2338 
2339 int ChoiceNode::EatsAtLeast(int still_to_find,
2340  int recursion_depth,
2341  bool not_at_start) {
2342  return EatsAtLeastHelper(still_to_find,
2343  recursion_depth,
2344  NULL,
2345  not_at_start);
2346 }
2347 
2348 
2349 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
2350 static inline uint32_t SmearBitsRight(uint32_t v) {
2351  v |= v >> 1;
2352  v |= v >> 2;
2353  v |= v >> 4;
2354  v |= v >> 8;
2355  v |= v >> 16;
2356  return v;
2357 }
2358 
2359 
2361  bool found_useful_op = false;
2362  uint32_t char_mask;
2363  if (asc) {
2364  char_mask = String::kMaxAsciiCharCode;
2365  } else {
2366  char_mask = String::kMaxUtf16CodeUnit;
2367  }
2368  mask_ = 0;
2369  value_ = 0;
2370  int char_shift = 0;
2371  for (int i = 0; i < characters_; i++) {
2372  Position* pos = &positions_[i];
2373  if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
2374  found_useful_op = true;
2375  }
2376  mask_ |= (pos->mask & char_mask) << char_shift;
2377  value_ |= (pos->value & char_mask) << char_shift;
2378  char_shift += asc ? 8 : 16;
2379  }
2380  return found_useful_op;
2381 }
2382 
2383 
2385  Trace* trace,
2386  bool preload_has_checked_bounds,
2387  Label* on_possible_success,
2388  QuickCheckDetails* details,
2389  bool fall_through_on_failure) {
2390  if (details->characters() == 0) return false;
2391  GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE);
2392  if (details->cannot_match()) return false;
2393  if (!details->Rationalize(compiler->ascii())) return false;
2394  ASSERT(details->characters() == 1 ||
2395  compiler->macro_assembler()->CanReadUnaligned());
2396  uint32_t mask = details->mask();
2397  uint32_t value = details->value();
2398 
2399  RegExpMacroAssembler* assembler = compiler->macro_assembler();
2400 
2401  if (trace->characters_preloaded() != details->characters()) {
2402  assembler->LoadCurrentCharacter(trace->cp_offset(),
2403  trace->backtrack(),
2404  !preload_has_checked_bounds,
2405  details->characters());
2406  }
2407 
2408 
2409  bool need_mask = true;
2410 
2411  if (details->characters() == 1) {
2412  // If number of characters preloaded is 1 then we used a byte or 16 bit
2413  // load so the value is already masked down.
2414  uint32_t char_mask;
2415  if (compiler->ascii()) {
2416  char_mask = String::kMaxAsciiCharCode;
2417  } else {
2418  char_mask = String::kMaxUtf16CodeUnit;
2419  }
2420  if ((mask & char_mask) == char_mask) need_mask = false;
2421  mask &= char_mask;
2422  } else {
2423  // For 2-character preloads in ASCII mode or 1-character preloads in
2424  // TWO_BYTE mode we also use a 16 bit load with zero extend.
2425  if (details->characters() == 2 && compiler->ascii()) {
2426  if ((mask & 0x7f7f) == 0x7f7f) need_mask = false;
2427  } else if (details->characters() == 1 && !compiler->ascii()) {
2428  if ((mask & 0xffff) == 0xffff) need_mask = false;
2429  } else {
2430  if (mask == 0xffffffff) need_mask = false;
2431  }
2432  }
2433 
2434  if (fall_through_on_failure) {
2435  if (need_mask) {
2436  assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2437  } else {
2438  assembler->CheckCharacter(value, on_possible_success);
2439  }
2440  } else {
2441  if (need_mask) {
2442  assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
2443  } else {
2444  assembler->CheckNotCharacter(value, trace->backtrack());
2445  }
2446  }
2447  return true;
2448 }
2449 
2450 
2451 // Here is the meat of GetQuickCheckDetails (see also the comment on the
2452 // super-class in the .h file).
2453 //
2454 // We iterate along the text object, building up for each character a
2455 // mask and value that can be used to test for a quick failure to match.
2456 // The masks and values for the positions will be combined into a single
2457 // machine word for the current character width in order to be used in
2458 // generating a quick check.
2460  RegExpCompiler* compiler,
2461  int characters_filled_in,
2462  bool not_at_start) {
2463  Isolate* isolate = Isolate::Current();
2464  ASSERT(characters_filled_in < details->characters());
2465  int characters = details->characters();
2466  int char_mask;
2467  if (compiler->ascii()) {
2468  char_mask = String::kMaxAsciiCharCode;
2469  } else {
2470  char_mask = String::kMaxUtf16CodeUnit;
2471  }
2472  for (int k = 0; k < elms_->length(); k++) {
2473  TextElement elm = elms_->at(k);
2474  if (elm.type == TextElement::ATOM) {
2475  Vector<const uc16> quarks = elm.data.u_atom->data();
2476  for (int i = 0; i < characters && i < quarks.length(); i++) {
2478  details->positions(characters_filled_in);
2479  uc16 c = quarks[i];
2480  if (c > char_mask) {
2481  // If we expect a non-ASCII character from an ASCII string,
2482  // there is no way we can match. Not even case independent
2483  // matching can turn an ASCII character into non-ASCII or
2484  // vice versa.
2485  details->set_cannot_match();
2486  pos->determines_perfectly = false;
2487  return;
2488  }
2489  if (compiler->ignore_case()) {
2491  int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
2492  chars);
2493  ASSERT(length != 0); // Can only happen if c > char_mask (see above).
2494  if (length == 1) {
2495  // This letter has no case equivalents, so it's nice and simple
2496  // and the mask-compare will determine definitely whether we have
2497  // a match at this character position.
2498  pos->mask = char_mask;
2499  pos->value = c;
2500  pos->determines_perfectly = true;
2501  } else {
2502  uint32_t common_bits = char_mask;
2503  uint32_t bits = chars[0];
2504  for (int j = 1; j < length; j++) {
2505  uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2506  common_bits ^= differing_bits;
2507  bits &= common_bits;
2508  }
2509  // If length is 2 and common bits has only one zero in it then
2510  // our mask and compare instruction will determine definitely
2511  // whether we have a match at this character position. Otherwise
2512  // it can only be an approximate check.
2513  uint32_t one_zero = (common_bits | ~char_mask);
2514  if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2515  pos->determines_perfectly = true;
2516  }
2517  pos->mask = common_bits;
2518  pos->value = bits;
2519  }
2520  } else {
2521  // Don't ignore case. Nice simple case where the mask-compare will
2522  // determine definitely whether we have a match at this character
2523  // position.
2524  pos->mask = char_mask;
2525  pos->value = c;
2526  pos->determines_perfectly = true;
2527  }
2528  characters_filled_in++;
2529  ASSERT(characters_filled_in <= details->characters());
2530  if (characters_filled_in == details->characters()) {
2531  return;
2532  }
2533  }
2534  } else {
2536  details->positions(characters_filled_in);
2538  ZoneList<CharacterRange>* ranges = tree->ranges(zone());
2539  if (tree->is_negated()) {
2540  // A quick check uses multi-character mask and compare. There is no
2541  // useful way to incorporate a negative char class into this scheme
2542  // so we just conservatively create a mask and value that will always
2543  // succeed.
2544  pos->mask = 0;
2545  pos->value = 0;
2546  } else {
2547  int first_range = 0;
2548  while (ranges->at(first_range).from() > char_mask) {
2549  first_range++;
2550  if (first_range == ranges->length()) {
2551  details->set_cannot_match();
2552  pos->determines_perfectly = false;
2553  return;
2554  }
2555  }
2556  CharacterRange range = ranges->at(first_range);
2557  uc16 from = range.from();
2558  uc16 to = range.to();
2559  if (to > char_mask) {
2560  to = char_mask;
2561  }
2562  uint32_t differing_bits = (from ^ to);
2563  // A mask and compare is only perfect if the differing bits form a
2564  // number like 00011111 with one single block of trailing 1s.
2565  if ((differing_bits & (differing_bits + 1)) == 0 &&
2566  from + differing_bits == to) {
2567  pos->determines_perfectly = true;
2568  }
2569  uint32_t common_bits = ~SmearBitsRight(differing_bits);
2570  uint32_t bits = (from & common_bits);
2571  for (int i = first_range + 1; i < ranges->length(); i++) {
2572  CharacterRange range = ranges->at(i);
2573  uc16 from = range.from();
2574  uc16 to = range.to();
2575  if (from > char_mask) continue;
2576  if (to > char_mask) to = char_mask;
2577  // Here we are combining more ranges into the mask and compare
2578  // value. With each new range the mask becomes more sparse and
2579  // so the chances of a false positive rise. A character class
2580  // with multiple ranges is assumed never to be equivalent to a
2581  // mask and compare operation.
2582  pos->determines_perfectly = false;
2583  uint32_t new_common_bits = (from ^ to);
2584  new_common_bits = ~SmearBitsRight(new_common_bits);
2585  common_bits &= new_common_bits;
2586  bits &= new_common_bits;
2587  uint32_t differing_bits = (from & common_bits) ^ bits;
2588  common_bits ^= differing_bits;
2589  bits &= common_bits;
2590  }
2591  pos->mask = common_bits;
2592  pos->value = bits;
2593  }
2594  characters_filled_in++;
2595  ASSERT(characters_filled_in <= details->characters());
2596  if (characters_filled_in == details->characters()) {
2597  return;
2598  }
2599  }
2600  }
2601  ASSERT(characters_filled_in != details->characters());
2602  if (!details->cannot_match()) {
2603  on_success()-> GetQuickCheckDetails(details,
2604  compiler,
2605  characters_filled_in,
2606  true);
2607  }
2608 }
2609 
2610 
2612  for (int i = 0; i < characters_; i++) {
2613  positions_[i].mask = 0;
2614  positions_[i].value = 0;
2615  positions_[i].determines_perfectly = false;
2616  }
2617  characters_ = 0;
2618 }
2619 
2620 
2621 void QuickCheckDetails::Advance(int by, bool ascii) {
2622  ASSERT(by >= 0);
2623  if (by >= characters_) {
2624  Clear();
2625  return;
2626  }
2627  for (int i = 0; i < characters_ - by; i++) {
2628  positions_[i] = positions_[by + i];
2629  }
2630  for (int i = characters_ - by; i < characters_; i++) {
2631  positions_[i].mask = 0;
2632  positions_[i].value = 0;
2633  positions_[i].determines_perfectly = false;
2634  }
2635  characters_ -= by;
2636  // We could change mask_ and value_ here but we would never advance unless
2637  // they had already been used in a check and they won't be used again because
2638  // it would gain us nothing. So there's no point.
2639 }
2640 
2641 
2642 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
2643  ASSERT(characters_ == other->characters_);
2644  if (other->cannot_match_) {
2645  return;
2646  }
2647  if (cannot_match_) {
2648  *this = *other;
2649  return;
2650  }
2651  for (int i = from_index; i < characters_; i++) {
2652  QuickCheckDetails::Position* pos = positions(i);
2653  QuickCheckDetails::Position* other_pos = other->positions(i);
2654  if (pos->mask != other_pos->mask ||
2655  pos->value != other_pos->value ||
2656  !other_pos->determines_perfectly) {
2657  // Our mask-compare operation will be approximate unless we have the
2658  // exact same operation on both sides of the alternation.
2659  pos->determines_perfectly = false;
2660  }
2661  pos->mask &= other_pos->mask;
2662  pos->value &= pos->mask;
2663  other_pos->value &= pos->mask;
2664  uc16 differing_bits = (pos->value ^ other_pos->value);
2665  pos->mask &= ~differing_bits;
2666  pos->value &= pos->mask;
2667  }
2668 }
2669 
2670 
2672  public:
2673  explicit VisitMarker(NodeInfo* info) : info_(info) {
2674  ASSERT(!info->visited);
2675  info->visited = true;
2676  }
2678  info_->visited = false;
2679  }
2680  private:
2681  NodeInfo* info_;
2682 };
2683 
2684 
2686  if (info()->replacement_calculated) return replacement();
2687  if (depth < 0) return this;
2688  ASSERT(!info()->visited);
2689  VisitMarker marker(info());
2690  return FilterSuccessor(depth - 1);
2691 }
2692 
2693 
2695  RegExpNode* next = on_success_->FilterASCII(depth - 1);
2696  if (next == NULL) return set_replacement(NULL);
2697  on_success_ = next;
2698  return set_replacement(this);
2699 }
2700 
2701 
2703  if (info()->replacement_calculated) return replacement();
2704  if (depth < 0) return this;
2705  ASSERT(!info()->visited);
2706  VisitMarker marker(info());
2707  int element_count = elms_->length();
2708  for (int i = 0; i < element_count; i++) {
2709  TextElement elm = elms_->at(i);
2710  if (elm.type == TextElement::ATOM) {
2711  Vector<const uc16> quarks = elm.data.u_atom->data();
2712  for (int j = 0; j < quarks.length(); j++) {
2713  // We don't need special handling for case independence
2714  // because of the rule that case independence cannot make
2715  // a non-ASCII character match an ASCII character.
2716  if (quarks[j] > String::kMaxAsciiCharCode) {
2717  return set_replacement(NULL);
2718  }
2719  }
2720  } else {
2723  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2724  if (!CharacterRange::IsCanonical(ranges)) {
2726  }
2727  // Now they are in order so we only need to look at the first.
2728  int range_count = ranges->length();
2729  if (cc->is_negated()) {
2730  if (range_count != 0 &&
2731  ranges->at(0).from() == 0 &&
2732  ranges->at(0).to() >= String::kMaxAsciiCharCode) {
2733  return set_replacement(NULL);
2734  }
2735  } else {
2736  if (range_count == 0 ||
2737  ranges->at(0).from() > String::kMaxAsciiCharCode) {
2738  return set_replacement(NULL);
2739  }
2740  }
2741  }
2742  }
2743  return FilterSuccessor(depth - 1);
2744 }
2745 
2746 
2748  if (info()->replacement_calculated) return replacement();
2749  if (depth < 0) return this;
2750  if (info()->visited) return this;
2751  {
2752  VisitMarker marker(info());
2753 
2754  RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);
2755  // If we can't continue after the loop then there is no sense in doing the
2756  // loop.
2757  if (continue_replacement == NULL) return set_replacement(NULL);
2758  }
2759 
2760  return ChoiceNode::FilterASCII(depth - 1);
2761 }
2762 
2763 
2765  if (info()->replacement_calculated) return replacement();
2766  if (depth < 0) return this;
2767  if (info()->visited) return this;
2768  VisitMarker marker(info());
2769  int choice_count = alternatives_->length();
2770 
2771  for (int i = 0; i < choice_count; i++) {
2772  GuardedAlternative alternative = alternatives_->at(i);
2773  if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
2774  set_replacement(this);
2775  return this;
2776  }
2777  }
2778 
2779  int surviving = 0;
2780  RegExpNode* survivor = NULL;
2781  for (int i = 0; i < choice_count; i++) {
2782  GuardedAlternative alternative = alternatives_->at(i);
2783  RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1);
2784  ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
2785  if (replacement != NULL) {
2786  alternatives_->at(i).set_node(replacement);
2787  surviving++;
2788  survivor = replacement;
2789  }
2790  }
2791  if (surviving < 2) return set_replacement(survivor);
2792 
2793  set_replacement(this);
2794  if (surviving == choice_count) {
2795  return this;
2796  }
2797  // Only some of the nodes survived the filtering. We need to rebuild the
2798  // alternatives list.
2799  ZoneList<GuardedAlternative>* new_alternatives =
2800  new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
2801  for (int i = 0; i < choice_count; i++) {
2802  RegExpNode* replacement =
2803  alternatives_->at(i).node()->FilterASCII(depth - 1);
2804  if (replacement != NULL) {
2805  alternatives_->at(i).set_node(replacement);
2806  new_alternatives->Add(alternatives_->at(i), zone());
2807  }
2808  }
2809  alternatives_ = new_alternatives;
2810  return this;
2811 }
2812 
2813 
2815  if (info()->replacement_calculated) return replacement();
2816  if (depth < 0) return this;
2817  if (info()->visited) return this;
2818  VisitMarker marker(info());
2819  // Alternative 0 is the negative lookahead, alternative 1 is what comes
2820  // afterwards.
2821  RegExpNode* node = alternatives_->at(1).node();
2822  RegExpNode* replacement = node->FilterASCII(depth - 1);
2823  if (replacement == NULL) return set_replacement(NULL);
2824  alternatives_->at(1).set_node(replacement);
2825 
2826  RegExpNode* neg_node = alternatives_->at(0).node();
2827  RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1);
2828  // If the negative lookahead is always going to fail then
2829  // we don't need to check it.
2830  if (neg_replacement == NULL) return set_replacement(replacement);
2831  alternatives_->at(0).set_node(neg_replacement);
2832  return set_replacement(this);
2833 }
2834 
2835 
2837  RegExpCompiler* compiler,
2838  int characters_filled_in,
2839  bool not_at_start) {
2840  if (body_can_be_zero_length_ || info()->visited) return;
2841  VisitMarker marker(info());
2842  return ChoiceNode::GetQuickCheckDetails(details,
2843  compiler,
2844  characters_filled_in,
2845  not_at_start);
2846 }
2847 
2848 
2850  int recursion_depth,
2851  int budget,
2852  BoyerMooreLookahead* bm,
2853  bool not_at_start) {
2854  if (body_can_be_zero_length_ ||
2855  recursion_depth > RegExpCompiler::kMaxRecursion ||
2856  budget <= 0) {
2857  bm->SetRest(offset);
2858  SaveBMInfo(bm, not_at_start, offset);
2859  return;
2860  }
2862  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2863  SaveBMInfo(bm, not_at_start, offset);
2864 }
2865 
2866 
2868  RegExpCompiler* compiler,
2869  int characters_filled_in,
2870  bool not_at_start) {
2871  not_at_start = (not_at_start || not_at_start_);
2872  int choice_count = alternatives_->length();
2873  ASSERT(choice_count > 0);
2874  alternatives_->at(0).node()->GetQuickCheckDetails(details,
2875  compiler,
2876  characters_filled_in,
2877  not_at_start);
2878  for (int i = 1; i < choice_count; i++) {
2879  QuickCheckDetails new_details(details->characters());
2880  RegExpNode* node = alternatives_->at(i).node();
2881  node->GetQuickCheckDetails(&new_details, compiler,
2882  characters_filled_in,
2883  not_at_start);
2884  // Here we merge the quick match details of the two branches.
2885  details->Merge(&new_details, characters_filled_in);
2886  }
2887 }
2888 
2889 
2890 // Check for [0-9A-Z_a-z].
2891 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2892  Label* word,
2893  Label* non_word,
2894  bool fall_through_on_word) {
2895  if (assembler->CheckSpecialCharacterClass(
2896  fall_through_on_word ? 'w' : 'W',
2897  fall_through_on_word ? non_word : word)) {
2898  // Optimized implementation available.
2899  return;
2900  }
2901  assembler->CheckCharacterGT('z', non_word);
2902  assembler->CheckCharacterLT('0', non_word);
2903  assembler->CheckCharacterGT('a' - 1, word);
2904  assembler->CheckCharacterLT('9' + 1, word);
2905  assembler->CheckCharacterLT('A', non_word);
2906  assembler->CheckCharacterLT('Z' + 1, word);
2907  if (fall_through_on_word) {
2908  assembler->CheckNotCharacter('_', non_word);
2909  } else {
2910  assembler->CheckCharacter('_', word);
2911  }
2912 }
2913 
2914 
2915 // Emit the code to check for a ^ in multiline mode (1-character lookbehind
2916 // that matches newline or the start of input).
2917 static void EmitHat(RegExpCompiler* compiler,
2918  RegExpNode* on_success,
2919  Trace* trace) {
2920  RegExpMacroAssembler* assembler = compiler->macro_assembler();
2921  // We will be loading the previous character into the current character
2922  // register.
2923  Trace new_trace(*trace);
2924  new_trace.InvalidateCurrentCharacter();
2925 
2926  Label ok;
2927  if (new_trace.cp_offset() == 0) {
2928  // The start of input counts as a newline in this context, so skip to
2929  // ok if we are at the start.
2930  assembler->CheckAtStart(&ok);
2931  }
2932  // We already checked that we are not at the start of input so it must be
2933  // OK to load the previous character.
2934  assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
2935  new_trace.backtrack(),
2936  false);
2937  if (!assembler->CheckSpecialCharacterClass('n',
2938  new_trace.backtrack())) {
2939  // Newline means \n, \r, 0x2028 or 0x2029.
2940  if (!compiler->ascii()) {
2941  assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2942  }
2943  assembler->CheckCharacter('\n', &ok);
2944  assembler->CheckNotCharacter('\r', new_trace.backtrack());
2945  }
2946  assembler->Bind(&ok);
2947  on_success->Emit(compiler, &new_trace);
2948 }
2949 
2950 
2951 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2952 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2953  RegExpMacroAssembler* assembler = compiler->macro_assembler();
2954  Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2955  bool not_at_start = (trace->at_start() == Trace::FALSE);
2956  BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2957  if (lookahead == NULL) {
2958  int eats_at_least =
2960  EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
2961  if (eats_at_least >= 1) {
2962  BoyerMooreLookahead* bm =
2963  new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
2964  FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
2965  if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2966  if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2967  }
2968  } else {
2969  if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2970  if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2971  }
2972  bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);
2973  if (next_is_word_character == Trace::UNKNOWN) {
2974  Label before_non_word;
2975  Label before_word;
2976  if (trace->characters_preloaded() != 1) {
2977  assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2978  }
2979  // Fall through on non-word.
2980  EmitWordCheck(assembler, &before_word, &before_non_word, false);
2981  // Next character is not a word character.
2982  assembler->Bind(&before_non_word);
2983  Label ok;
2984  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2985  assembler->GoTo(&ok);
2986 
2987  assembler->Bind(&before_word);
2988  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2989  assembler->Bind(&ok);
2990  } else if (next_is_word_character == Trace::TRUE) {
2991  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2992  } else {
2993  ASSERT(next_is_word_character == Trace::FALSE);
2994  BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2995  }
2996 }
2997 
2998 
2999 void AssertionNode::BacktrackIfPrevious(
3000  RegExpCompiler* compiler,
3001  Trace* trace,
3002  AssertionNode::IfPrevious backtrack_if_previous) {
3003  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3004  Trace new_trace(*trace);
3005  new_trace.InvalidateCurrentCharacter();
3006 
3007  Label fall_through, dummy;
3008 
3009  Label* non_word = backtrack_if_previous == kIsNonWord ?
3010  new_trace.backtrack() :
3011  &fall_through;
3012  Label* word = backtrack_if_previous == kIsNonWord ?
3013  &fall_through :
3014  new_trace.backtrack();
3015 
3016  if (new_trace.cp_offset() == 0) {
3017  // The start of input counts as a non-word character, so the question is
3018  // decided if we are at the start.
3019  assembler->CheckAtStart(non_word);
3020  }
3021  // We already checked that we are not at the start of input so it must be
3022  // OK to load the previous character.
3023  assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
3024  EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
3025 
3026  assembler->Bind(&fall_through);
3027  on_success()->Emit(compiler, &new_trace);
3028 }
3029 
3030 
3032  RegExpCompiler* compiler,
3033  int filled_in,
3034  bool not_at_start) {
3035  if (type_ == AT_START && not_at_start) {
3036  details->set_cannot_match();
3037  return;
3038  }
3039  return on_success()->GetQuickCheckDetails(details,
3040  compiler,
3041  filled_in,
3042  not_at_start);
3043 }
3044 
3045 
3046 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3047  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3048  switch (type_) {
3049  case AT_END: {
3050  Label ok;
3051  assembler->CheckPosition(trace->cp_offset(), &ok);
3052  assembler->GoTo(trace->backtrack());
3053  assembler->Bind(&ok);
3054  break;
3055  }
3056  case AT_START: {
3057  if (trace->at_start() == Trace::FALSE) {
3058  assembler->GoTo(trace->backtrack());
3059  return;
3060  }
3061  if (trace->at_start() == Trace::UNKNOWN) {
3062  assembler->CheckNotAtStart(trace->backtrack());
3063  Trace at_start_trace = *trace;
3064  at_start_trace.set_at_start(true);
3065  on_success()->Emit(compiler, &at_start_trace);
3066  return;
3067  }
3068  }
3069  break;
3070  case AFTER_NEWLINE:
3071  EmitHat(compiler, on_success(), trace);
3072  return;
3073  case AT_BOUNDARY:
3074  case AT_NON_BOUNDARY: {
3075  EmitBoundaryCheck(compiler, trace);
3076  return;
3077  }
3078  }
3079  on_success()->Emit(compiler, trace);
3080 }
3081 
3082 
3083 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
3084  if (quick_check == NULL) return false;
3085  if (offset >= quick_check->characters()) return false;
3086  return quick_check->positions(offset)->determines_perfectly;
3087 }
3088 
3089 
3090 static void UpdateBoundsCheck(int index, int* checked_up_to) {
3091  if (index > *checked_up_to) {
3092  *checked_up_to = index;
3093  }
3094 }
3095 
3096 
3097 // We call this repeatedly to generate code for each pass over the text node.
3098 // The passes are in increasing order of difficulty because we hope one
3099 // of the first passes will fail in which case we are saved the work of the
3100 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
3101 // we will check the '%' in the first pass, the case independent 'a' in the
3102 // second pass and the character class in the last pass.
3103 //
3104 // The passes are done from right to left, so for example to test for /bar/
3105 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
3106 // and then a 'b' with offset 0. This means we can avoid the end-of-input
3107 // bounds check most of the time. In the example we only need to check for
3108 // end-of-input when loading the putative 'r'.
3109 //
3110 // A slight complication involves the fact that the first character may already
3111 // be fetched into a register by the previous node. In this case we want to
3112 // do the test for that character first. We do this in separate passes. The
3113 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a
3114 // pass has been performed then subsequent passes will have true in
3115 // first_element_checked to indicate that that character does not need to be
3116 // checked again.
3117 //
3118 // In addition to all this we are passed a Trace, which can
3119 // contain an AlternativeGeneration object. In this AlternativeGeneration
3120 // object we can see details of any quick check that was already passed in
3121 // order to get to the code we are now generating. The quick check can involve
3122 // loading characters, which means we do not need to recheck the bounds
3123 // up to the limit the quick check already checked. In addition the quick
3124 // check can have involved a mask and compare operation which may simplify
3125 // or obviate the need for further checks at some character positions.
3126 void TextNode::TextEmitPass(RegExpCompiler* compiler,
3127  TextEmitPassType pass,
3128  bool preloaded,
3129  Trace* trace,
3130  bool first_element_checked,
3131  int* checked_up_to) {
3132  Isolate* isolate = Isolate::Current();
3133  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3134  bool ascii = compiler->ascii();
3135  Label* backtrack = trace->backtrack();
3136  QuickCheckDetails* quick_check = trace->quick_check_performed();
3137  int element_count = elms_->length();
3138  for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3139  TextElement elm = elms_->at(i);
3140  int cp_offset = trace->cp_offset() + elm.cp_offset;
3141  if (elm.type == TextElement::ATOM) {
3142  Vector<const uc16> quarks = elm.data.u_atom->data();
3143  for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3144  if (first_element_checked && i == 0 && j == 0) continue;
3145  if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
3146  EmitCharacterFunction* emit_function = NULL;
3147  switch (pass) {
3148  case NON_ASCII_MATCH:
3149  ASSERT(ascii);
3150  if (quarks[j] > String::kMaxAsciiCharCode) {
3151  assembler->GoTo(backtrack);
3152  return;
3153  }
3154  break;
3155  case NON_LETTER_CHARACTER_MATCH:
3156  emit_function = &EmitAtomNonLetter;
3157  break;
3158  case SIMPLE_CHARACTER_MATCH:
3159  emit_function = &EmitSimpleCharacter;
3160  break;
3161  case CASE_CHARACTER_MATCH:
3162  emit_function = &EmitAtomLetter;
3163  break;
3164  default:
3165  break;
3166  }
3167  if (emit_function != NULL) {
3168  bool bound_checked = emit_function(isolate,
3169  compiler,
3170  quarks[j],
3171  backtrack,
3172  cp_offset + j,
3173  *checked_up_to < cp_offset + j,
3174  preloaded);
3175  if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3176  }
3177  }
3178  } else {
3180  if (pass == CHARACTER_CLASS_MATCH) {
3181  if (first_element_checked && i == 0) continue;
3182  if (DeterminedAlready(quick_check, elm.cp_offset)) continue;
3183  RegExpCharacterClass* cc = elm.data.u_char_class;
3184  EmitCharClass(assembler,
3185  cc,
3186  ascii,
3187  backtrack,
3188  cp_offset,
3189  *checked_up_to < cp_offset,
3190  preloaded,
3191  zone());
3192  UpdateBoundsCheck(cp_offset, checked_up_to);
3193  }
3194  }
3195  }
3196 }
3197 
3198 
3199 int TextNode::Length() {
3200  TextElement elm = elms_->last();
3201  ASSERT(elm.cp_offset >= 0);
3202  if (elm.type == TextElement::ATOM) {
3203  return elm.cp_offset + elm.data.u_atom->data().length();
3204  } else {
3205  return elm.cp_offset + 1;
3206  }
3207 }
3208 
3209 
3210 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3211  TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3212  if (ignore_case) {
3213  return pass == SIMPLE_CHARACTER_MATCH;
3214  } else {
3215  return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3216  }
3217 }
3218 
3219 
3220 // This generates the code to match a text node. A text node can contain
3221 // straight character sequences (possibly to be matched in a case-independent
3222 // way) and character classes. For efficiency we do not do this in a single
3223 // pass from left to right. Instead we pass over the text node several times,
3224 // emitting code for some character positions every time. See the comment on
3225 // TextEmitPass for details.
3226 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3227  LimitResult limit_result = LimitVersions(compiler, trace);
3228  if (limit_result == DONE) return;
3229  ASSERT(limit_result == CONTINUE);
3230 
3231  if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
3232  compiler->SetRegExpTooBig();
3233  return;
3234  }
3235 
3236  if (compiler->ascii()) {
3237  int dummy = 0;
3238  TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
3239  }
3240 
3241  bool first_elt_done = false;
3242  int bound_checked_to = trace->cp_offset() - 1;
3243  bound_checked_to += trace->bound_checked_up_to();
3244 
3245  // If a character is preloaded into the current character register then
3246  // check that now.
3247  if (trace->characters_preloaded() == 1) {
3248  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3249  if (!SkipPass(pass, compiler->ignore_case())) {
3250  TextEmitPass(compiler,
3251  static_cast<TextEmitPassType>(pass),
3252  true,
3253  trace,
3254  false,
3255  &bound_checked_to);
3256  }
3257  }
3258  first_elt_done = true;
3259  }
3260 
3261  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3262  if (!SkipPass(pass, compiler->ignore_case())) {
3263  TextEmitPass(compiler,
3264  static_cast<TextEmitPassType>(pass),
3265  false,
3266  trace,
3267  first_elt_done,
3268  &bound_checked_to);
3269  }
3270  }
3271 
3272  Trace successor_trace(*trace);
3273  successor_trace.set_at_start(false);
3274  successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
3275  RecursionCheck rc(compiler);
3276  on_success()->Emit(compiler, &successor_trace);
3277 }
3278 
3279 
3281  characters_preloaded_ = 0;
3282 }
3283 
3284 
3286  ASSERT(by > 0);
3287  // We don't have an instruction for shifting the current character register
3288  // down or for using a shifted value for anything so lets just forget that
3289  // we preloaded any characters into it.
3290  characters_preloaded_ = 0;
3291  // Adjust the offsets of the quick check performed information. This
3292  // information is used to find out what we already determined about the
3293  // characters by means of mask and compare.
3294  quick_check_performed_.Advance(by, compiler->ascii());
3295  cp_offset_ += by;
3296  if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3297  compiler->SetRegExpTooBig();
3298  cp_offset_ = 0;
3299  }
3300  bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3301 }
3302 
3303 
3304 void TextNode::MakeCaseIndependent(bool is_ascii) {
3305  int element_count = elms_->length();
3306  for (int i = 0; i < element_count; i++) {
3307  TextElement elm = elms_->at(i);
3308  if (elm.type == TextElement::CHAR_CLASS) {
3310  // None of the standard character classes is different in the case
3311  // independent case and it slows us down if we don't know that.
3312  if (cc->is_standard(zone())) continue;
3313  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3314  int range_count = ranges->length();
3315  for (int j = 0; j < range_count; j++) {
3316  ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
3317  }
3318  }
3319  }
3320 }
3321 
3322 
3324  TextElement elm = elms_->at(elms_->length() - 1);
3325  if (elm.type == TextElement::CHAR_CLASS) {
3326  return elm.cp_offset + 1;
3327  } else {
3328  return elm.cp_offset + elm.data.u_atom->data().length();
3329  }
3330 }
3331 
3332 
3334  RegExpCompiler* compiler) {
3335  if (elms_->length() != 1) return NULL;
3336  TextElement elm = elms_->at(0);
3337  if (elm.type != TextElement::CHAR_CLASS) return NULL;
3339  ZoneList<CharacterRange>* ranges = node->ranges(zone());
3340  if (!CharacterRange::IsCanonical(ranges)) {
3342  }
3343  if (node->is_negated()) {
3344  return ranges->length() == 0 ? on_success() : NULL;
3345  }
3346  if (ranges->length() != 1) return NULL;
3347  uint32_t max_char;
3348  if (compiler->ascii()) {
3349  max_char = String::kMaxAsciiCharCode;
3350  } else {
3351  max_char = String::kMaxUtf16CodeUnit;
3352  }
3353  return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
3354 }
3355 
3356 
3357 // Finds the fixed match length of a sequence of nodes that goes from
3358 // this alternative and back to this choice node. If there are variable
3359 // length nodes or other complications in the way then return a sentinel
3360 // value indicating that a greedy loop cannot be constructed.
3362  GuardedAlternative* alternative) {
3363  int length = 0;
3364  RegExpNode* node = alternative->node();
3365  // Later we will generate code for all these text nodes using recursion
3366  // so we have to limit the max number.
3367  int recursion_depth = 0;
3368  while (node != this) {
3369  if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
3370  return kNodeIsTooComplexForGreedyLoops;
3371  }
3372  int node_length = node->GreedyLoopTextLength();
3373  if (node_length == kNodeIsTooComplexForGreedyLoops) {
3374  return kNodeIsTooComplexForGreedyLoops;
3375  }
3376  length += node_length;
3377  SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
3378  node = seq_node->on_success();
3379  }
3380  return length;
3381 }
3382 
3383 
3385  ASSERT_EQ(loop_node_, NULL);
3386  AddAlternative(alt);
3387  loop_node_ = alt.node();
3388 }
3389 
3390 
3392  ASSERT_EQ(continue_node_, NULL);
3393  AddAlternative(alt);
3394  continue_node_ = alt.node();
3395 }
3396 
3397 
3398 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3399  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3400  if (trace->stop_node() == this) {
3401  int text_length =
3402  GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3403  ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
3404  // Update the counter-based backtracking info on the stack. This is an
3405  // optimization for greedy loops (see below).
3406  ASSERT(trace->cp_offset() == text_length);
3407  macro_assembler->AdvanceCurrentPosition(text_length);
3408  macro_assembler->GoTo(trace->loop_label());
3409  return;
3410  }
3411  ASSERT(trace->stop_node() == NULL);
3412  if (!trace->is_trivial()) {
3413  trace->Flush(compiler, this);
3414  return;
3415  }
3416  ChoiceNode::Emit(compiler, trace);
3417 }
3418 
3419 
3420 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
3421  int eats_at_least) {
3422  int preload_characters = Min(4, eats_at_least);
3423  if (compiler->macro_assembler()->CanReadUnaligned()) {
3424  bool ascii = compiler->ascii();
3425  if (ascii) {
3426  if (preload_characters > 4) preload_characters = 4;
3427  // We can't preload 3 characters because there is no machine instruction
3428  // to do that. We can't just load 4 because we could be reading
3429  // beyond the end of the string, which could cause a memory fault.
3430  if (preload_characters == 3) preload_characters = 2;
3431  } else {
3432  if (preload_characters > 2) preload_characters = 2;
3433  }
3434  } else {
3435  if (preload_characters > 1) preload_characters = 1;
3436  }
3437  return preload_characters;
3438 }
3439 
3440 
3441 // This class is used when generating the alternatives in a choice node. It
3442 // records the way the alternative is being code generated.
3444  public:
3446  : possible_success(),
3447  expects_preload(false),
3448  after(),
3449  quick_check_details() { }
3452  Label after;
3454 };
3455 
3456 
3457 // Creates a list of AlternativeGenerations. If the list has a reasonable
3458 // size then it is on the stack, otherwise the excess is on the heap.
3460  public:
3462  : alt_gens_(count, zone) {
3463  for (int i = 0; i < count && i < kAFew; i++) {
3464  alt_gens_.Add(a_few_alt_gens_ + i, zone);
3465  }
3466  for (int i = kAFew; i < count; i++) {
3467  alt_gens_.Add(new AlternativeGeneration(), zone);
3468  }
3469  }
3471  for (int i = kAFew; i < alt_gens_.length(); i++) {
3472  delete alt_gens_[i];
3473  alt_gens_[i] = NULL;
3474  }
3475  }
3476 
3478  return alt_gens_[i];
3479  }
3480 
3481  private:
3482  static const int kAFew = 10;
3484  AlternativeGeneration a_few_alt_gens_[kAFew];
3485 };
3486 
3487 
3488 // The '2' variant is has inclusive from and exclusive to.
3489 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,
3490  0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,
3491  0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };
3492 static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
3493 
3494 static const int kWordRanges[] = {
3495  '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
3496 static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);
3497 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
3498 static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
3499 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
3500 static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
3501 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
3502  0x2028, 0x202A, 0x10000 };
3503 static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);
3504 
3505 
3506 void BoyerMoorePositionInfo::Set(int character) {
3507  SetInterval(Interval(character, character));
3508 }
3509 
3510 
3512  s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3513  w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
3514  d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3515  surrogate_ =
3516  AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
3517  if (interval.to() - interval.from() >= kMapSize - 1) {
3518  if (map_count_ != kMapSize) {
3519  map_count_ = kMapSize;
3520  for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3521  }
3522  return;
3523  }
3524  for (int i = interval.from(); i <= interval.to(); i++) {
3525  int mod_character = (i & kMask);
3526  if (!map_->at(mod_character)) {
3527  map_count_++;
3528  map_->at(mod_character) = true;
3529  }
3530  if (map_count_ == kMapSize) return;
3531  }
3532 }
3533 
3534 
3536  s_ = w_ = d_ = kLatticeUnknown;
3537  if (map_count_ != kMapSize) {
3538  map_count_ = kMapSize;
3539  for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3540  }
3541 }
3542 
3543 
3545  int length, RegExpCompiler* compiler, Zone* zone)
3546  : length_(length),
3547  compiler_(compiler) {
3548  if (compiler->ascii()) {
3549  max_char_ = String::kMaxAsciiCharCode;
3550  } else {
3551  max_char_ = String::kMaxUtf16CodeUnit;
3552  }
3553  bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
3554  for (int i = 0; i < length; i++) {
3555  bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
3556  }
3557 }
3558 
3559 
3560 // Find the longest range of lookahead that has the fewest number of different
3561 // characters that can occur at a given position. Since we are optimizing two
3562 // different parameters at once this is a tradeoff.
3563 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
3564  int biggest_points = 0;
3565  // If more than 32 characters out of 128 can occur it is unlikely that we can
3566  // be lucky enough to step forwards much of the time.
3567  const int kMaxMax = 32;
3568  for (int max_number_of_chars = 4;
3569  max_number_of_chars < kMaxMax;
3570  max_number_of_chars *= 2) {
3571  biggest_points =
3572  FindBestInterval(max_number_of_chars, biggest_points, from, to);
3573  }
3574  if (biggest_points == 0) return false;
3575  return true;
3576 }
3577 
3578 
3579 // Find the highest-points range between 0 and length_ where the character
3580 // information is not too vague. 'Too vague' means that there are more than
3581 // max_number_of_chars that can occur at this position. Calculates the number
3582 // of points as the product of width-of-the-range and
3583 // probability-of-finding-one-of-the-characters, where the probability is
3584 // calculated using the frequency distribution of the sample subject string.
3585 int BoyerMooreLookahead::FindBestInterval(
3586  int max_number_of_chars, int old_biggest_points, int* from, int* to) {
3587  int biggest_points = old_biggest_points;
3588  static const int kSize = RegExpMacroAssembler::kTableSize;
3589  for (int i = 0; i < length_; ) {
3590  while (i < length_ && Count(i) > max_number_of_chars) i++;
3591  if (i == length_) break;
3592  int remembered_from = i;
3593  bool union_map[kSize];
3594  for (int j = 0; j < kSize; j++) union_map[j] = false;
3595  while (i < length_ && Count(i) <= max_number_of_chars) {
3596  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3597  for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
3598  i++;
3599  }
3600  int frequency = 0;
3601  for (int j = 0; j < kSize; j++) {
3602  if (union_map[j]) {
3603  // Add 1 to the frequency to give a small per-character boost for
3604  // the cases where our sampling is not good enough and many
3605  // characters have a frequency of zero. This means the frequency
3606  // can theoretically be up to 2*kSize though we treat it mostly as
3607  // a fraction of kSize.
3608  frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3609  }
3610  }
3611  // We use the probability of skipping times the distance we are skipping to
3612  // judge the effectiveness of this. Actually we have a cut-off: By
3613  // dividing by 2 we switch off the skipping if the probability of skipping
3614  // is less than 50%. This is because the multibyte mask-and-compare
3615  // skipping in quickcheck is more likely to do well on this case.
3616  bool in_quickcheck_range = ((i - remembered_from < 4) ||
3617  (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
3618  // Called 'probability' but it is only a rough estimate and can actually
3619  // be outside the 0-kSize range.
3620  int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3621  int points = (i - remembered_from) * probability;
3622  if (points > biggest_points) {
3623  *from = remembered_from;
3624  *to = i - 1;
3625  biggest_points = points;
3626  }
3627  }
3628  return biggest_points;
3629 }
3630 
3631 
3632 // Take all the characters that will not prevent a successful match if they
3633 // occur in the subject string in the range between min_lookahead and
3634 // max_lookahead (inclusive) measured from the current position. If the
3635 // character at max_lookahead offset is not one of these characters, then we
3636 // can safely skip forwards by the number of characters in the range.
3637 int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
3638  int max_lookahead,
3639  Handle<ByteArray> boolean_skip_table) {
3640  const int kSize = RegExpMacroAssembler::kTableSize;
3641 
3642  const int kSkipArrayEntry = 0;
3643  const int kDontSkipArrayEntry = 1;
3644 
3645  for (int i = 0; i < kSize; i++) {
3646  boolean_skip_table->set(i, kSkipArrayEntry);
3647  }
3648  int skip = max_lookahead + 1 - min_lookahead;
3649 
3650  for (int i = max_lookahead; i >= min_lookahead; i--) {
3651  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3652  for (int j = 0; j < kSize; j++) {
3653  if (map->at(j)) {
3654  boolean_skip_table->set(j, kDontSkipArrayEntry);
3655  }
3656  }
3657  }
3658 
3659  return skip;
3660 }
3661 
3662 
3663 // See comment above on the implementation of GetSkipTable.
3665  const int kSize = RegExpMacroAssembler::kTableSize;
3666 
3667  int min_lookahead = 0;
3668  int max_lookahead = 0;
3669 
3670  if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
3671 
3672  bool found_single_character = false;
3673  int single_character = 0;
3674  for (int i = max_lookahead; i >= min_lookahead; i--) {
3675  BoyerMoorePositionInfo* map = bitmaps_->at(i);
3676  if (map->map_count() > 1 ||
3677  (found_single_character && map->map_count() != 0)) {
3678  found_single_character = false;
3679  break;
3680  }
3681  for (int j = 0; j < kSize; j++) {
3682  if (map->at(j)) {
3683  found_single_character = true;
3684  single_character = j;
3685  break;
3686  }
3687  }
3688  }
3689 
3690  int lookahead_width = max_lookahead + 1 - min_lookahead;
3691 
3692  if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3693  // The mask-compare can probably handle this better.
3694  return false;
3695  }
3696 
3697  if (found_single_character) {
3698  Label cont, again;
3699  masm->Bind(&again);
3700  masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3701  if (max_char_ > kSize) {
3702  masm->CheckCharacterAfterAnd(single_character,
3704  &cont);
3705  } else {
3706  masm->CheckCharacter(single_character, &cont);
3707  }
3708  masm->AdvanceCurrentPosition(lookahead_width);
3709  masm->GoTo(&again);
3710  masm->Bind(&cont);
3711  return true;
3712  }
3713 
3714  Handle<ByteArray> boolean_skip_table =
3715  FACTORY->NewByteArray(kSize, TENURED);
3716  int skip_distance = GetSkipTable(
3717  min_lookahead, max_lookahead, boolean_skip_table);
3718  ASSERT(skip_distance != 0);
3719 
3720  Label cont, again;
3721  masm->Bind(&again);
3722  masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3723  masm->CheckBitInTable(boolean_skip_table, &cont);
3724  masm->AdvanceCurrentPosition(skip_distance);
3725  masm->GoTo(&again);
3726  masm->Bind(&cont);
3727 
3728  return true;
3729 }
3730 
3731 
3732 /* Code generation for choice nodes.
3733  *
3734  * We generate quick checks that do a mask and compare to eliminate a
3735  * choice. If the quick check succeeds then it jumps to the continuation to
3736  * do slow checks and check subsequent nodes. If it fails (the common case)
3737  * it falls through to the next choice.
3738  *
3739  * Here is the desired flow graph. Nodes directly below each other imply
3740  * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3741  * 3 doesn't have a quick check so we have to call the slow check.
3742  * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
3743  * regexp continuation is generated directly after the Sn node, up to the
3744  * next GoTo if we decide to reuse some already generated code. Some
3745  * nodes expect preload_characters to be preloaded into the current
3746  * character register. R nodes do this preloading. Vertices are marked
3747  * F for failures and S for success (possible success in the case of quick
3748  * nodes). L, V, < and > are used as arrow heads.
3749  *
3750  * ----------> R
3751  * |
3752  * V
3753  * Q1 -----> S1
3754  * | S /
3755  * F| /
3756  * | F/
3757  * | /
3758  * | R
3759  * | /
3760  * V L
3761  * Q2 -----> S2
3762  * | S /
3763  * F| /
3764  * | F/
3765  * | /
3766  * | R
3767  * | /
3768  * V L
3769  * S3
3770  * |
3771  * F|
3772  * |
3773  * R
3774  * |
3775  * backtrack V
3776  * <----------Q4
3777  * \ F |
3778  * \ |S
3779  * \ F V
3780  * \-----S4
3781  *
3782  * For greedy loops we reverse our expectation and expect to match rather
3783  * than fail. Therefore we want the loop code to look like this (U is the
3784  * unwind code that steps back in the greedy loop). The following alternatives
3785  * look the same as above.
3786  * _____
3787  * / \
3788  * V |
3789  * ----------> S1 |
3790  * /| |
3791  * / |S |
3792  * F/ \_____/
3793  * /
3794  * |<-----------
3795  * | \
3796  * V \
3797  * Q2 ---> S2 \
3798  * | S / |
3799  * F| / |
3800  * | F/ |
3801  * | / |
3802  * | R |
3803  * | / |
3804  * F VL |
3805  * <------U |
3806  * back |S |
3807  * \______________/
3808  */
3809 
3810 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3811  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3812  int choice_count = alternatives_->length();
3813 #ifdef DEBUG
3814  for (int i = 0; i < choice_count - 1; i++) {
3815  GuardedAlternative alternative = alternatives_->at(i);
3816  ZoneList<Guard*>* guards = alternative.guards();
3817  int guard_count = (guards == NULL) ? 0 : guards->length();
3818  for (int j = 0; j < guard_count; j++) {
3819  ASSERT(!trace->mentions_reg(guards->at(j)->reg()));
3820  }
3821  }
3822 #endif
3823 
3824  LimitResult limit_result = LimitVersions(compiler, trace);
3825  if (limit_result == DONE) return;
3826  ASSERT(limit_result == CONTINUE);
3827 
3828  int new_flush_budget = trace->flush_budget() / choice_count;
3829  if (trace->flush_budget() == 0 && trace->actions() != NULL) {
3830  trace->Flush(compiler, this);
3831  return;
3832  }
3833 
3834  RecursionCheck rc(compiler);
3835 
3836  Trace* current_trace = trace;
3837 
3838  int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3839  bool greedy_loop = false;
3840  Label greedy_loop_label;
3841  Trace counter_backtrack_trace;
3842  counter_backtrack_trace.set_backtrack(&greedy_loop_label);
3843  if (not_at_start()) counter_backtrack_trace.set_at_start(false);
3844 
3845  if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3846  // Here we have special handling for greedy loops containing only text nodes
3847  // and other simple nodes. These are handled by pushing the current
3848  // position on the stack and then incrementing the current position each
3849  // time around the switch. On backtrack we decrement the current position
3850  // and check it against the pushed value. This avoids pushing backtrack
3851  // information for each iteration of the loop, which could take up a lot of
3852  // space.
3853  greedy_loop = true;
3854  ASSERT(trace->stop_node() == NULL);
3855  macro_assembler->PushCurrentPosition();
3856  current_trace = &counter_backtrack_trace;
3857  Label greedy_match_failed;
3858  Trace greedy_match_trace;
3859  if (not_at_start()) greedy_match_trace.set_at_start(false);
3860  greedy_match_trace.set_backtrack(&greedy_match_failed);
3861  Label loop_label;
3862  macro_assembler->Bind(&loop_label);
3863  greedy_match_trace.set_stop_node(this);
3864  greedy_match_trace.set_loop_label(&loop_label);
3865  alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3866  macro_assembler->Bind(&greedy_match_failed);
3867  }
3868 
3869  Label second_choice; // For use in greedy matches.
3870  macro_assembler->Bind(&second_choice);
3871 
3872  int first_normal_choice = greedy_loop ? 1 : 0;
3873 
3874  bool not_at_start = current_trace->at_start() == Trace::FALSE;
3875  const int kEatsAtLeastNotYetInitialized = -1;
3876  int eats_at_least = kEatsAtLeastNotYetInitialized;
3877 
3878  bool skip_was_emitted = false;
3879 
3880  if (!greedy_loop && choice_count == 2) {
3881  GuardedAlternative alt1 = alternatives_->at(1);
3882  if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
3883  RegExpNode* eats_anything_node = alt1.node();
3884  if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
3885  this) {
3886  // At this point we know that we are at a non-greedy loop that will eat
3887  // any character one at a time. Any non-anchored regexp has such a
3888  // loop prepended to it in order to find where it starts. We look for
3889  // a pattern of the form ...abc... where we can look 6 characters ahead
3890  // and step forwards 3 if the character is not one of abc. Abc need
3891  // not be atoms, they can be any reasonably limited character class or
3892  // small alternation.
3893  ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
3894  BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3895  if (lookahead == NULL) {
3896  eats_at_least =
3898  EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
3899  if (eats_at_least >= 1) {
3900  BoyerMooreLookahead* bm =
3901  new(zone()) BoyerMooreLookahead(eats_at_least,
3902  compiler,
3903  zone());
3904  GuardedAlternative alt0 = alternatives_->at(0);
3905  alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
3906  skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
3907  }
3908  } else {
3909  skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
3910  }
3911  }
3912  }
3913  }
3914 
3915  if (eats_at_least == kEatsAtLeastNotYetInitialized) {
3916  // Save some time by looking at most one machine word ahead.
3917  eats_at_least = EatsAtLeast(compiler->ascii() ? 4 : 2, 0, not_at_start);
3918  }
3919  int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
3920 
3921  bool preload_is_current = !skip_was_emitted &&
3922  (current_trace->characters_preloaded() == preload_characters);
3923  bool preload_has_checked_bounds = preload_is_current;
3924 
3925  AlternativeGenerationList alt_gens(choice_count, zone());
3926 
3927  // For now we just call all choices one after the other. The idea ultimately
3928  // is to use the Dispatch table to try only the relevant ones.
3929  for (int i = first_normal_choice; i < choice_count; i++) {
3930  GuardedAlternative alternative = alternatives_->at(i);
3931  AlternativeGeneration* alt_gen = alt_gens.at(i);
3932  alt_gen->quick_check_details.set_characters(preload_characters);
3933  ZoneList<Guard*>* guards = alternative.guards();
3934  int guard_count = (guards == NULL) ? 0 : guards->length();
3935  Trace new_trace(*current_trace);
3936  new_trace.set_characters_preloaded(preload_is_current ?
3937  preload_characters :
3938  0);
3939  if (preload_has_checked_bounds) {
3940  new_trace.set_bound_checked_up_to(preload_characters);
3941  }
3942  new_trace.quick_check_performed()->Clear();
3943  if (not_at_start_) new_trace.set_at_start(Trace::FALSE);
3944  alt_gen->expects_preload = preload_is_current;
3945  bool generate_full_check_inline = false;
3946  if (FLAG_regexp_optimization &&
3948  alternative.node()->EmitQuickCheck(compiler,
3949  &new_trace,
3950  preload_has_checked_bounds,
3951  &alt_gen->possible_success,
3952  &alt_gen->quick_check_details,
3953  i < choice_count - 1)) {
3954  // Quick check was generated for this choice.
3955  preload_is_current = true;
3956  preload_has_checked_bounds = true;
3957  // On the last choice in the ChoiceNode we generated the quick
3958  // check to fall through on possible success. So now we need to
3959  // generate the full check inline.
3960  if (i == choice_count - 1) {
3961  macro_assembler->Bind(&alt_gen->possible_success);
3962  new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3963  new_trace.set_characters_preloaded(preload_characters);
3964  new_trace.set_bound_checked_up_to(preload_characters);
3965  generate_full_check_inline = true;
3966  }
3967  } else if (alt_gen->quick_check_details.cannot_match()) {
3968  if (i == choice_count - 1 && !greedy_loop) {
3969  macro_assembler->GoTo(trace->backtrack());
3970  }
3971  continue;
3972  } else {
3973  // No quick check was generated. Put the full code here.
3974  // If this is not the first choice then there could be slow checks from
3975  // previous cases that go here when they fail. There's no reason to
3976  // insist that they preload characters since the slow check we are about
3977  // to generate probably can't use it.
3978  if (i != first_normal_choice) {
3979  alt_gen->expects_preload = false;
3980  new_trace.InvalidateCurrentCharacter();
3981  }
3982  if (i < choice_count - 1) {
3983  new_trace.set_backtrack(&alt_gen->after);
3984  }
3985  generate_full_check_inline = true;
3986  }
3987  if (generate_full_check_inline) {
3988  if (new_trace.actions() != NULL) {
3989  new_trace.set_flush_budget(new_flush_budget);
3990  }
3991  for (int j = 0; j < guard_count; j++) {
3992  GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3993  }
3994  alternative.node()->Emit(compiler, &new_trace);
3995  preload_is_current = false;
3996  }
3997  macro_assembler->Bind(&alt_gen->after);
3998  }
3999  if (greedy_loop) {
4000  macro_assembler->Bind(&greedy_loop_label);
4001  // If we have unwound to the bottom then backtrack.
4002  macro_assembler->CheckGreedyLoop(trace->backtrack());
4003  // Otherwise try the second priority at an earlier position.
4004  macro_assembler->AdvanceCurrentPosition(-text_length);
4005  macro_assembler->GoTo(&second_choice);
4006  }
4007 
4008  // At this point we need to generate slow checks for the alternatives where
4009  // the quick check was inlined. We can recognize these because the associated
4010  // label was bound.
4011  for (int i = first_normal_choice; i < choice_count - 1; i++) {
4012  AlternativeGeneration* alt_gen = alt_gens.at(i);
4013  Trace new_trace(*current_trace);
4014  // If there are actions to be flushed we have to limit how many times
4015  // they are flushed. Take the budget of the parent trace and distribute
4016  // it fairly amongst the children.
4017  if (new_trace.actions() != NULL) {
4018  new_trace.set_flush_budget(new_flush_budget);
4019  }
4020  EmitOutOfLineContinuation(compiler,
4021  &new_trace,
4022  alternatives_->at(i),
4023  alt_gen,
4024  preload_characters,
4025  alt_gens.at(i + 1)->expects_preload);
4026  }
4027 }
4028 
4029 
4030 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
4031  Trace* trace,
4032  GuardedAlternative alternative,
4033  AlternativeGeneration* alt_gen,
4034  int preload_characters,
4035  bool next_expects_preload) {
4036  if (!alt_gen->possible_success.is_linked()) return;
4037 
4038  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
4039  macro_assembler->Bind(&alt_gen->possible_success);
4040  Trace out_of_line_trace(*trace);
4041  out_of_line_trace.set_characters_preloaded(preload_characters);
4042  out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4043  if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE);
4044  ZoneList<Guard*>* guards = alternative.guards();
4045  int guard_count = (guards == NULL) ? 0 : guards->length();
4046  if (next_expects_preload) {
4047  Label reload_current_char;
4048  out_of_line_trace.set_backtrack(&reload_current_char);
4049  for (int j = 0; j < guard_count; j++) {
4050  GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4051  }
4052  alternative.node()->Emit(compiler, &out_of_line_trace);
4053  macro_assembler->Bind(&reload_current_char);
4054  // Reload the current character, since the next quick check expects that.
4055  // We don't need to check bounds here because we only get into this
4056  // code through a quick check which already did the checked load.
4057  macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
4058  NULL,
4059  false,
4060  preload_characters);
4061  macro_assembler->GoTo(&(alt_gen->after));
4062  } else {
4063  out_of_line_trace.set_backtrack(&(alt_gen->after));
4064  for (int j = 0; j < guard_count; j++) {
4065  GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4066  }
4067  alternative.node()->Emit(compiler, &out_of_line_trace);
4068  }
4069 }
4070 
4071 
4072 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
4073  RegExpMacroAssembler* assembler = compiler->macro_assembler();
4074  LimitResult limit_result = LimitVersions(compiler, trace);
4075  if (limit_result == DONE) return;
4076  ASSERT(limit_result == CONTINUE);
4077 
4078  RecursionCheck rc(compiler);
4079 
4080  switch (type_) {
4081  case STORE_POSITION: {
4083  new_capture(data_.u_position_register.reg,
4084  data_.u_position_register.is_capture,
4085  trace);
4086  Trace new_trace = *trace;
4087  new_trace.add_action(&new_capture);
4088  on_success()->Emit(compiler, &new_trace);
4089  break;
4090  }
4091  case INCREMENT_REGISTER: {
4093  new_increment(data_.u_increment_register.reg);
4094  Trace new_trace = *trace;
4095  new_trace.add_action(&new_increment);
4096  on_success()->Emit(compiler, &new_trace);
4097  break;
4098  }
4099  case SET_REGISTER: {
4101  new_set(data_.u_store_register.reg, data_.u_store_register.value);
4102  Trace new_trace = *trace;
4103  new_trace.add_action(&new_set);
4104  on_success()->Emit(compiler, &new_trace);
4105  break;
4106  }
4107  case CLEAR_CAPTURES: {
4109  new_capture(Interval(data_.u_clear_captures.range_from,
4110  data_.u_clear_captures.range_to));
4111  Trace new_trace = *trace;
4112  new_trace.add_action(&new_capture);
4113  on_success()->Emit(compiler, &new_trace);
4114  break;
4115  }
4116  case BEGIN_SUBMATCH:
4117  if (!trace->is_trivial()) {
4118  trace->Flush(compiler, this);
4119  } else {
4120  assembler->WriteCurrentPositionToRegister(
4121  data_.u_submatch.current_position_register, 0);
4122  assembler->WriteStackPointerToRegister(
4123  data_.u_submatch.stack_pointer_register);
4124  on_success()->Emit(compiler, trace);
4125  }
4126  break;
4127  case EMPTY_MATCH_CHECK: {
4128  int start_pos_reg = data_.u_empty_match_check.start_register;
4129  int stored_pos = 0;
4130  int rep_reg = data_.u_empty_match_check.repetition_register;
4131  bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
4132  bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
4133  if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
4134  // If we know we haven't advanced and there is no minimum we
4135  // can just backtrack immediately.
4136  assembler->GoTo(trace->backtrack());
4137  } else if (know_dist && stored_pos < trace->cp_offset()) {
4138  // If we know we've advanced we can generate the continuation
4139  // immediately.
4140  on_success()->Emit(compiler, trace);
4141  } else if (!trace->is_trivial()) {
4142  trace->Flush(compiler, this);
4143  } else {
4144  Label skip_empty_check;
4145  // If we have a minimum number of repetitions we check the current
4146  // number first and skip the empty check if it's not enough.
4147  if (has_minimum) {
4148  int limit = data_.u_empty_match_check.repetition_limit;
4149  assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
4150  }
4151  // If the match is empty we bail out, otherwise we fall through
4152  // to the on-success continuation.
4153  assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
4154  trace->backtrack());
4155  assembler->Bind(&skip_empty_check);
4156  on_success()->Emit(compiler, trace);
4157  }
4158  break;
4159  }
4161  if (!trace->is_trivial()) {
4162  trace->Flush(compiler, this);
4163  return;
4164  }
4166  data_.u_submatch.current_position_register);
4167  assembler->ReadStackPointerFromRegister(
4168  data_.u_submatch.stack_pointer_register);
4169  int clear_register_count = data_.u_submatch.clear_register_count;
4170  if (clear_register_count == 0) {
4171  on_success()->Emit(compiler, trace);
4172  return;
4173  }
4174  int clear_registers_from = data_.u_submatch.clear_register_from;
4175  Label clear_registers_backtrack;
4176  Trace new_trace = *trace;
4177  new_trace.set_backtrack(&clear_registers_backtrack);
4178  on_success()->Emit(compiler, &new_trace);
4179 
4180  assembler->Bind(&clear_registers_backtrack);
4181  int clear_registers_to = clear_registers_from + clear_register_count - 1;
4182  assembler->ClearRegisters(clear_registers_from, clear_registers_to);
4183 
4184  ASSERT(trace->backtrack() == NULL);
4185  assembler->Backtrack();
4186  return;
4187  }
4188  default:
4189  UNREACHABLE();
4190  }
4191 }
4192 
4193 
4195  RegExpMacroAssembler* assembler = compiler->macro_assembler();
4196  if (!trace->is_trivial()) {
4197  trace->Flush(compiler, this);
4198  return;
4199  }
4200 
4201  LimitResult limit_result = LimitVersions(compiler, trace);
4202  if (limit_result == DONE) return;
4203  ASSERT(limit_result == CONTINUE);
4204 
4205  RecursionCheck rc(compiler);
4206 
4207  ASSERT_EQ(start_reg_ + 1, end_reg_);
4208  if (compiler->ignore_case()) {
4209  assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
4210  trace->backtrack());
4211  } else {
4212  assembler->CheckNotBackReference(start_reg_, trace->backtrack());
4213  }
4214  on_success()->Emit(compiler, trace);
4215 }
4216 
4217 
4218 // -------------------------------------------------------------------
4219 // Dot/dotty output
4220 
4221 
4222 #ifdef DEBUG
4223 
4224 
4225 class DotPrinter: public NodeVisitor {
4226  public:
4227  explicit DotPrinter(bool ignore_case)
4228  : ignore_case_(ignore_case),
4229  stream_(&alloc_) { }
4230  void PrintNode(const char* label, RegExpNode* node);
4231  void Visit(RegExpNode* node);
4232  void PrintAttributes(RegExpNode* from);
4233  StringStream* stream() { return &stream_; }
4234  void PrintOnFailure(RegExpNode* from, RegExpNode* to);
4235 #define DECLARE_VISIT(Type) \
4236  virtual void Visit##Type(Type##Node* that);
4238 #undef DECLARE_VISIT
4239  private:
4240  bool ignore_case_;
4241  HeapStringAllocator alloc_;
4242  StringStream stream_;
4243 };
4244 
4245 
4246 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
4247  stream()->Add("digraph G {\n graph [label=\"");
4248  for (int i = 0; label[i]; i++) {
4249  switch (label[i]) {
4250  case '\\':
4251  stream()->Add("\\\\");
4252  break;
4253  case '"':
4254  stream()->Add("\"");
4255  break;
4256  default:
4257  stream()->Put(label[i]);
4258  break;
4259  }
4260  }
4261  stream()->Add("\"];\n");
4262  Visit(node);
4263  stream()->Add("}\n");
4264  printf("%s", *(stream()->ToCString()));
4265 }
4266 
4267 
4268 void DotPrinter::Visit(RegExpNode* node) {
4269  if (node->info()->visited) return;
4270  node->info()->visited = true;
4271  node->Accept(this);
4272 }
4273 
4274 
4275 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
4276  stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
4277  Visit(on_failure);
4278 }
4279 
4280 
4281 class TableEntryBodyPrinter {
4282  public:
4283  TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
4284  : stream_(stream), choice_(choice) { }
4285  void Call(uc16 from, DispatchTable::Entry entry) {
4286  OutSet* out_set = entry.out_set();
4287  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4288  if (out_set->Get(i)) {
4289  stream()->Add(" n%p:s%io%i -> n%p;\n",
4290  choice(),
4291  from,
4292  i,
4293  choice()->alternatives()->at(i).node());
4294  }
4295  }
4296  }
4297  private:
4298  StringStream* stream() { return stream_; }
4299  ChoiceNode* choice() { return choice_; }
4300  StringStream* stream_;
4301  ChoiceNode* choice_;
4302 };
4303 
4304 
4305 class TableEntryHeaderPrinter {
4306  public:
4307  explicit TableEntryHeaderPrinter(StringStream* stream)
4308  : first_(true), stream_(stream) { }
4309  void Call(uc16 from, DispatchTable::Entry entry) {
4310  if (first_) {
4311  first_ = false;
4312  } else {
4313  stream()->Add("|");
4314  }
4315  stream()->Add("{\\%k-\\%k|{", from, entry.to());
4316  OutSet* out_set = entry.out_set();
4317  int priority = 0;
4318  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4319  if (out_set->Get(i)) {
4320  if (priority > 0) stream()->Add("|");
4321  stream()->Add("<s%io%i> %i", from, i, priority);
4322  priority++;
4323  }
4324  }
4325  stream()->Add("}}");
4326  }
4327 
4328  private:
4329  bool first_;
4330  StringStream* stream() { return stream_; }
4331  StringStream* stream_;
4332 };
4333 
4334 
4335 class AttributePrinter {
4336  public:
4337  explicit AttributePrinter(DotPrinter* out)
4338  : out_(out), first_(true) { }
4339  void PrintSeparator() {
4340  if (first_) {
4341  first_ = false;
4342  } else {
4343  out_->stream()->Add("|");
4344  }
4345  }
4346  void PrintBit(const char* name, bool value) {
4347  if (!value) return;
4348  PrintSeparator();
4349  out_->stream()->Add("{%s}", name);
4350  }
4351  void PrintPositive(const char* name, int value) {
4352  if (value < 0) return;
4353  PrintSeparator();
4354  out_->stream()->Add("{%s|%x}", name, value);
4355  }
4356  private:
4357  DotPrinter* out_;
4358  bool first_;
4359 };
4360 
4361 
4362 void DotPrinter::PrintAttributes(RegExpNode* that) {
4363  stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
4364  "margin=0.1, fontsize=10, label=\"{",
4365  that);
4366  AttributePrinter printer(this);
4367  NodeInfo* info = that->info();
4368  printer.PrintBit("NI", info->follows_newline_interest);
4369  printer.PrintBit("WI", info->follows_word_interest);
4370  printer.PrintBit("SI", info->follows_start_interest);
4371  Label* label = that->label();
4372  if (label->is_bound())
4373  printer.PrintPositive("@", label->pos());
4374  stream()->Add("}\"];\n");
4375  stream()->Add(" a%p -> n%p [style=dashed, color=grey, "
4376  "arrowhead=none];\n", that, that);
4377 }
4378 
4379 
4380 static const bool kPrintDispatchTable = false;
4381 void DotPrinter::VisitChoice(ChoiceNode* that) {
4382  if (kPrintDispatchTable) {
4383  stream()->Add(" n%p [shape=Mrecord, label=\"", that);
4384  TableEntryHeaderPrinter header_printer(stream());
4385  that->GetTable(ignore_case_)->ForEach(&header_printer);
4386  stream()->Add("\"]\n", that);
4387  PrintAttributes(that);
4388  TableEntryBodyPrinter body_printer(stream(), that);
4389  that->GetTable(ignore_case_)->ForEach(&body_printer);
4390  } else {
4391  stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
4392  for (int i = 0; i < that->alternatives()->length(); i++) {
4393  GuardedAlternative alt = that->alternatives()->at(i);
4394  stream()->Add(" n%p -> n%p;\n", that, alt.node());
4395  }
4396  }
4397  for (int i = 0; i < that->alternatives()->length(); i++) {
4398  GuardedAlternative alt = that->alternatives()->at(i);
4399  alt.node()->Accept(this);
4400  }
4401 }
4402 
4403 
4404 void DotPrinter::VisitText(TextNode* that) {
4405  Zone* zone = that->zone();
4406  stream()->Add(" n%p [label=\"", that);
4407  for (int i = 0; i < that->elements()->length(); i++) {
4408  if (i > 0) stream()->Add(" ");
4409  TextElement elm = that->elements()->at(i);
4410  switch (elm.type) {
4411  case TextElement::ATOM: {
4412  stream()->Add("'%w'", elm.data.u_atom->data());
4413  break;
4414  }
4415  case TextElement::CHAR_CLASS: {
4416  RegExpCharacterClass* node = elm.data.u_char_class;
4417  stream()->Add("[");
4418  if (node->is_negated())
4419  stream()->Add("^");
4420  for (int j = 0; j < node->ranges(zone)->length(); j++) {
4421  CharacterRange range = node->ranges(zone)->at(j);
4422  stream()->Add("%k-%k", range.from(), range.to());
4423  }
4424  stream()->Add("]");
4425  break;
4426  }
4427  default:
4428  UNREACHABLE();
4429  }
4430  }
4431  stream()->Add("\", shape=box, peripheries=2];\n");
4432  PrintAttributes(that);
4433  stream()->Add(" n%p -> n%p;\n", that, that->on_success());
4434  Visit(that->on_success());
4435 }
4436 
4437 
4438 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
4439  stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
4440  that,
4441  that->start_register(),
4442  that->end_register());
4443  PrintAttributes(that);
4444  stream()->Add(" n%p -> n%p;\n", that, that->on_success());
4445  Visit(that->on_success());
4446 }
4447 
4448 
4449 void DotPrinter::VisitEnd(EndNode* that) {
4450  stream()->Add(" n%p [style=bold, shape=point];\n", that);
4451  PrintAttributes(that);
4452 }
4453 
4454 
4455 void DotPrinter::VisitAssertion(AssertionNode* that) {
4456  stream()->Add(" n%p [", that);
4457  switch (that->type()) {
4458  case AssertionNode::AT_END:
4459  stream()->Add("label=\"$\", shape=septagon");
4460  break;
4462  stream()->Add("label=\"^\", shape=septagon");
4463  break;
4465  stream()->Add("label=\"\\b\", shape=septagon");
4466  break;
4468  stream()->Add("label=\"\\B\", shape=septagon");
4469  break;
4471  stream()->Add("label=\"(?<=\\n)\", shape=septagon");
4472  break;
4473  }
4474  stream()->Add("];\n");
4475  PrintAttributes(that);
4476  RegExpNode* successor = that->on_success();
4477  stream()->Add(" n%p -> n%p;\n", that, successor);
4478  Visit(successor);
4479 }
4480 
4481 
4482 void DotPrinter::VisitAction(ActionNode* that) {
4483  stream()->Add(" n%p [", that);
4484  switch (that->type_) {
4486  stream()->Add("label=\"$%i:=%i\", shape=octagon",
4487  that->data_.u_store_register.reg,
4488  that->data_.u_store_register.value);
4489  break;
4491  stream()->Add("label=\"$%i++\", shape=octagon",
4492  that->data_.u_increment_register.reg);
4493  break;
4495  stream()->Add("label=\"$%i:=$pos\", shape=octagon",
4496  that->data_.u_position_register.reg);
4497  break;
4499  stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
4500  that->data_.u_submatch.current_position_register);
4501  break;
4503  stream()->Add("label=\"escape\", shape=septagon");
4504  break;
4506  stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
4507  that->data_.u_empty_match_check.start_register,
4508  that->data_.u_empty_match_check.repetition_register,
4509  that->data_.u_empty_match_check.repetition_limit);
4510  break;
4512  stream()->Add("label=\"clear $%i to $%i\", shape=septagon",
4513  that->data_.u_clear_captures.range_from,
4514  that->data_.u_clear_captures.range_to);
4515  break;
4516  }
4517  }
4518  stream()->Add("];\n");
4519  PrintAttributes(that);
4520  RegExpNode* successor = that->on_success();
4521  stream()->Add(" n%p -> n%p;\n", that, successor);
4522  Visit(successor);
4523 }
4524 
4525 
4526 class DispatchTableDumper {
4527  public:
4528  explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
4529  void Call(uc16 key, DispatchTable::Entry entry);
4530  StringStream* stream() { return stream_; }
4531  private:
4532  StringStream* stream_;
4533 };
4534 
4535 
4536 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
4537  stream()->Add("[%k-%k]: {", key, entry.to());
4538  OutSet* set = entry.out_set();
4539  bool first = true;
4540  for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
4541  if (set->Get(i)) {
4542  if (first) {
4543  first = false;
4544  } else {
4545  stream()->Add(", ");
4546  }
4547  stream()->Add("%i", i);
4548  }
4549  }
4550  stream()->Add("}\n");
4551 }
4552 
4553 
4554 void DispatchTable::Dump() {
4555  HeapStringAllocator alloc;
4556  StringStream stream(&alloc);
4557  DispatchTableDumper dumper(&stream);
4558  tree()->ForEach(&dumper);
4559  OS::PrintError("%s", *stream.ToCString());
4560 }
4561 
4562 
4563 void RegExpEngine::DotPrint(const char* label,
4564  RegExpNode* node,
4565  bool ignore_case) {
4566  DotPrinter printer(ignore_case);
4567  printer.PrintNode(label, node);
4568 }
4569 
4570 
4571 #endif // DEBUG
4572 
4573 
4574 // -------------------------------------------------------------------
4575 // Tree to graph conversion
4576 
4578  RegExpNode* on_success) {
4579  ZoneList<TextElement>* elms =
4580  new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
4581  elms->Add(TextElement::Atom(this), compiler->zone());
4582  return new(compiler->zone()) TextNode(elms, on_success);
4583 }
4584 
4585 
4587  RegExpNode* on_success) {
4588  return new(compiler->zone()) TextNode(elements(), on_success);
4589 }
4590 
4591 
4592 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4593  const int* special_class,
4594  int length) {
4595  length--; // Remove final 0x10000.
4596  ASSERT(special_class[length] == 0x10000);
4597  ASSERT(ranges->length() != 0);
4598  ASSERT(length != 0);
4599  ASSERT(special_class[0] != 0);
4600  if (ranges->length() != (length >> 1) + 1) {
4601  return false;
4602  }
4603  CharacterRange range = ranges->at(0);
4604  if (range.from() != 0) {
4605  return false;
4606  }
4607  for (int i = 0; i < length; i += 2) {
4608  if (special_class[i] != (range.to() + 1)) {
4609  return false;
4610  }
4611  range = ranges->at((i >> 1) + 1);
4612  if (special_class[i+1] != range.from()) {
4613  return false;
4614  }
4615  }
4616  if (range.to() != 0xffff) {
4617  return false;
4618  }
4619  return true;
4620 }
4621 
4622 
4623 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4624  const int* special_class,
4625  int length) {
4626  length--; // Remove final 0x10000.
4627  ASSERT(special_class[length] == 0x10000);
4628  if (ranges->length() * 2 != length) {
4629  return false;
4630  }
4631  for (int i = 0; i < length; i += 2) {
4632  CharacterRange range = ranges->at(i >> 1);
4633  if (range.from() != special_class[i] ||
4634  range.to() != special_class[i + 1] - 1) {
4635  return false;
4636  }
4637  }
4638  return true;
4639 }
4640 
4641 
4643  // TODO(lrn): Remove need for this function, by not throwing away information
4644  // along the way.
4645  if (is_negated_) {
4646  return false;
4647  }
4648  if (set_.is_standard()) {
4649  return true;
4650  }
4651  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4652  set_.set_standard_set_type('s');
4653  return true;
4654  }
4655  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4656  set_.set_standard_set_type('S');
4657  return true;
4658  }
4659  if (CompareInverseRanges(set_.ranges(zone),
4660  kLineTerminatorRanges,
4661  kLineTerminatorRangeCount)) {
4662  set_.set_standard_set_type('.');
4663  return true;
4664  }
4665  if (CompareRanges(set_.ranges(zone),
4666  kLineTerminatorRanges,
4667  kLineTerminatorRangeCount)) {
4668  set_.set_standard_set_type('n');
4669  return true;
4670  }
4671  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4672  set_.set_standard_set_type('w');
4673  return true;
4674  }
4675  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4676  set_.set_standard_set_type('W');
4677  return true;
4678  }
4679  return false;
4680 }
4681 
4682 
4684  RegExpNode* on_success) {
4685  return new(compiler->zone()) TextNode(this, on_success);
4686 }
4687 
4688 
4690  RegExpNode* on_success) {
4692  int length = alternatives->length();
4693  ChoiceNode* result =
4694  new(compiler->zone()) ChoiceNode(length, compiler->zone());
4695  for (int i = 0; i < length; i++) {
4696  GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
4697  on_success));
4698  result->AddAlternative(alternative);
4699  }
4700  return result;
4701 }
4702 
4703 
4705  RegExpNode* on_success) {
4706  return ToNode(min(),
4707  max(),
4708  is_greedy(),
4709  body(),
4710  compiler,
4711  on_success);
4712 }
4713 
4714 
4715 // Scoped object to keep track of how much we unroll quantifier loops in the
4716 // regexp graph generator.
4718  public:
4719  static const int kMaxExpansionFactor = 6;
4721  : compiler_(compiler),
4722  saved_expansion_factor_(compiler->current_expansion_factor()),
4723  ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
4724  ASSERT(factor > 0);
4725  if (ok_to_expand_) {
4726  if (factor > kMaxExpansionFactor) {
4727  // Avoid integer overflow of the current expansion factor.
4728  ok_to_expand_ = false;
4730  } else {
4731  int new_factor = saved_expansion_factor_ * factor;
4732  ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
4733  compiler->set_current_expansion_factor(new_factor);
4734  }
4735  }
4736  }
4737 
4739  compiler_->set_current_expansion_factor(saved_expansion_factor_);
4740  }
4741 
4742  bool ok_to_expand() { return ok_to_expand_; }
4743 
4744  private:
4745  RegExpCompiler* compiler_;
4746  int saved_expansion_factor_;
4747  bool ok_to_expand_;
4748 
4749  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
4750 };
4751 
4752 
4754  int max,
4755  bool is_greedy,
4756  RegExpTree* body,
4757  RegExpCompiler* compiler,
4758  RegExpNode* on_success,
4759  bool not_at_start) {
4760  // x{f, t} becomes this:
4761  //
4762  // (r++)<-.
4763  // | `
4764  // | (x)
4765  // v ^
4766  // (r=0)-->(?)---/ [if r < t]
4767  // |
4768  // [if r >= f] \----> ...
4769  //
4770 
4771  // 15.10.2.5 RepeatMatcher algorithm.
4772  // The parser has already eliminated the case where max is 0. In the case
4773  // where max_match is zero the parser has removed the quantifier if min was
4774  // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
4775 
4776  // If we know that we cannot match zero length then things are a little
4777  // simpler since we don't need to make the special zero length match check
4778  // from step 2.1. If the min and max are small we can unroll a little in
4779  // this case.
4780  static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
4781  static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
4782  if (max == 0) return on_success; // This can happen due to recursion.
4783  bool body_can_be_empty = (body->min_match() == 0);
4784  int body_start_reg = RegExpCompiler::kNoRegister;
4785  Interval capture_registers = body->CaptureRegisters();
4786  bool needs_capture_clearing = !capture_registers.is_empty();
4787  Zone* zone = compiler->zone();
4788 
4789  if (body_can_be_empty) {
4790  body_start_reg = compiler->AllocateRegister();
4791  } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
4792  // Only unroll if there are no captures and the body can't be
4793  // empty.
4794  {
4795  RegExpExpansionLimiter limiter(
4796  compiler, min + ((max != min) ? 1 : 0));
4797  if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4798  int new_max = (max == kInfinity) ? max : max - min;
4799  // Recurse once to get the loop or optional matches after the fixed
4800  // ones.
4801  RegExpNode* answer = ToNode(
4802  0, new_max, is_greedy, body, compiler, on_success, true);
4803  // Unroll the forced matches from 0 to min. This can cause chains of
4804  // TextNodes (which the parser does not generate). These should be
4805  // combined if it turns out they hinder good code generation.
4806  for (int i = 0; i < min; i++) {
4807  answer = body->ToNode(compiler, answer);
4808  }
4809  return answer;
4810  }
4811  }
4812  if (max <= kMaxUnrolledMaxMatches && min == 0) {
4813  ASSERT(max > 0); // Due to the 'if' above.
4814  RegExpExpansionLimiter limiter(compiler, max);
4815  if (limiter.ok_to_expand()) {
4816  // Unroll the optional matches up to max.
4817  RegExpNode* answer = on_success;
4818  for (int i = 0; i < max; i++) {
4819  ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
4820  if (is_greedy) {
4821  alternation->AddAlternative(
4822  GuardedAlternative(body->ToNode(compiler, answer)));
4823  alternation->AddAlternative(GuardedAlternative(on_success));
4824  } else {
4825  alternation->AddAlternative(GuardedAlternative(on_success));
4826  alternation->AddAlternative(
4827  GuardedAlternative(body->ToNode(compiler, answer)));
4828  }
4829  answer = alternation;
4830  if (not_at_start) alternation->set_not_at_start();
4831  }
4832  return answer;
4833  }
4834  }
4835  }
4836  bool has_min = min > 0;
4837  bool has_max = max < RegExpTree::kInfinity;
4838  bool needs_counter = has_min || has_max;
4839  int reg_ctr = needs_counter
4840  ? compiler->AllocateRegister()
4842  LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
4843  zone);
4844  if (not_at_start) center->set_not_at_start();
4845  RegExpNode* loop_return = needs_counter
4846  ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
4847  : static_cast<RegExpNode*>(center);
4848  if (body_can_be_empty) {
4849  // If the body can be empty we need to check if it was and then
4850  // backtrack.
4851  loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
4852  reg_ctr,
4853  min,
4854  loop_return);
4855  }
4856  RegExpNode* body_node = body->ToNode(compiler, loop_return);
4857  if (body_can_be_empty) {
4858  // If the body can be empty we need to store the start position
4859  // so we can bail out if it was empty.
4860  body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
4861  }
4862  if (needs_capture_clearing) {
4863  // Before entering the body of this loop we need to clear captures.
4864  body_node = ActionNode::ClearCaptures(capture_registers, body_node);
4865  }
4866  GuardedAlternative body_alt(body_node);
4867  if (has_max) {
4868  Guard* body_guard =
4869  new(zone) Guard(reg_ctr, Guard::LT, max);
4870  body_alt.AddGuard(body_guard, zone);
4871  }
4872  GuardedAlternative rest_alt(on_success);
4873  if (has_min) {
4874  Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
4875  rest_alt.AddGuard(rest_guard, zone);
4876  }
4877  if (is_greedy) {
4878  center->AddLoopAlternative(body_alt);
4879  center->AddContinueAlternative(rest_alt);
4880  } else {
4881  center->AddContinueAlternative(rest_alt);
4882  center->AddLoopAlternative(body_alt);
4883  }
4884  if (needs_counter) {
4885  return ActionNode::SetRegister(reg_ctr, 0, center);
4886  } else {
4887  return center;
4888  }
4889 }
4890 
4891 
4893  RegExpNode* on_success) {
4894  NodeInfo info;
4895  Zone* zone = compiler->zone();
4896 
4897  switch (type()) {
4898  case START_OF_LINE:
4899  return AssertionNode::AfterNewline(on_success);
4900  case START_OF_INPUT:
4901  return AssertionNode::AtStart(on_success);
4902  case BOUNDARY:
4903  return AssertionNode::AtBoundary(on_success);
4904  case NON_BOUNDARY:
4905  return AssertionNode::AtNonBoundary(on_success);
4906  case END_OF_INPUT:
4907  return AssertionNode::AtEnd(on_success);
4908  case END_OF_LINE: {
4909  // Compile $ in multiline regexps as an alternation with a positive
4910  // lookahead in one side and an end-of-input on the other side.
4911  // We need two registers for the lookahead.
4912  int stack_pointer_register = compiler->AllocateRegister();
4913  int position_register = compiler->AllocateRegister();
4914  // The ChoiceNode to distinguish between a newline and end-of-input.
4915  ChoiceNode* result = new(zone) ChoiceNode(2, zone);
4916  // Create a newline atom.
4917  ZoneList<CharacterRange>* newline_ranges =
4918  new(zone) ZoneList<CharacterRange>(3, zone);
4919  CharacterRange::AddClassEscape('n', newline_ranges, zone);
4920  RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
4921  TextNode* newline_matcher = new(zone) TextNode(
4922  newline_atom,
4923  ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
4924  position_register,
4925  0, // No captures inside.
4926  -1, // Ignored if no captures.
4927  on_success));
4928  // Create an end-of-input matcher.
4929  RegExpNode* end_of_line = ActionNode::BeginSubmatch(
4930  stack_pointer_register,
4931  position_register,
4932  newline_matcher);
4933  // Add the two alternatives to the ChoiceNode.
4934  GuardedAlternative eol_alternative(end_of_line);
4935  result->AddAlternative(eol_alternative);
4936  GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4937  result->AddAlternative(end_alternative);
4938  return result;
4939  }
4940  default:
4941  UNREACHABLE();
4942  }
4943  return on_success;
4944 }
4945 
4946 
4948  RegExpNode* on_success) {
4949  return new(compiler->zone())
4952  on_success);
4953 }
4954 
4955 
4957  RegExpNode* on_success) {
4958  return on_success;
4959 }
4960 
4961 
4963  RegExpNode* on_success) {
4964  int stack_pointer_register = compiler->AllocateRegister();
4965  int position_register = compiler->AllocateRegister();
4966 
4967  const int registers_per_capture = 2;
4968  const int register_of_first_capture = 2;
4969  int register_count = capture_count_ * registers_per_capture;
4970  int register_start =
4971  register_of_first_capture + capture_from_ * registers_per_capture;
4972 
4973  RegExpNode* success;
4974  if (is_positive()) {
4976  stack_pointer_register,
4977  position_register,
4978  body()->ToNode(
4979  compiler,
4980  ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
4981  position_register,
4982  register_count,
4983  register_start,
4984  on_success)));
4985  return node;
4986  } else {
4987  // We use a ChoiceNode for a negative lookahead because it has most of
4988  // the characteristics we need. It has the body of the lookahead as its
4989  // first alternative and the expression after the lookahead of the second
4990  // alternative. If the first alternative succeeds then the
4991  // NegativeSubmatchSuccess will unwind the stack including everything the
4992  // choice node set up and backtrack. If the first alternative fails then
4993  // the second alternative is tried, which is exactly the desired result
4994  // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
4995  // ChoiceNode that knows to ignore the first exit when calculating quick
4996  // checks.
4997  Zone* zone = compiler->zone();
4998 
4999  GuardedAlternative body_alt(
5000  body()->ToNode(
5001  compiler,
5002  success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
5003  position_register,
5004  register_count,
5005  register_start,
5006  zone)));
5007  ChoiceNode* choice_node =
5008  new(zone) NegativeLookaheadChoiceNode(body_alt,
5009  GuardedAlternative(on_success),
5010  zone);
5011  return ActionNode::BeginSubmatch(stack_pointer_register,
5012  position_register,
5013  choice_node);
5014  }
5015 }
5016 
5017 
5019  RegExpNode* on_success) {
5020  return ToNode(body(), index(), compiler, on_success);
5021 }
5022 
5023 
5025  int index,
5026  RegExpCompiler* compiler,
5027  RegExpNode* on_success) {
5028  int start_reg = RegExpCapture::StartRegister(index);
5029  int end_reg = RegExpCapture::EndRegister(index);
5030  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
5031  RegExpNode* body_node = body->ToNode(compiler, store_end);
5032  return ActionNode::StorePosition(start_reg, true, body_node);
5033 }
5034 
5035 
5037  RegExpNode* on_success) {
5038  ZoneList<RegExpTree*>* children = nodes();
5039  RegExpNode* current = on_success;
5040  for (int i = children->length() - 1; i >= 0; i--) {
5041  current = children->at(i)->ToNode(compiler, current);
5042  }
5043  return current;
5044 }
5045 
5046 
5047 static void AddClass(const int* elmv,
5048  int elmc,
5049  ZoneList<CharacterRange>* ranges,
5050  Zone* zone) {
5051  elmc--;
5052  ASSERT(elmv[elmc] == 0x10000);
5053  for (int i = 0; i < elmc; i += 2) {
5054  ASSERT(elmv[i] < elmv[i + 1]);
5055  ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
5056  }
5057 }
5058 
5059 
5060 static void AddClassNegated(const int *elmv,
5061  int elmc,
5062  ZoneList<CharacterRange>* ranges,
5063  Zone* zone) {
5064  elmc--;
5065  ASSERT(elmv[elmc] == 0x10000);
5066  ASSERT(elmv[0] != 0x0000);
5067  ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
5068  uc16 last = 0x0000;
5069  for (int i = 0; i < elmc; i += 2) {
5070  ASSERT(last <= elmv[i] - 1);
5071  ASSERT(elmv[i] < elmv[i + 1]);
5072  ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
5073  last = elmv[i + 1];
5074  }
5075  ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);
5076 }
5077 
5078 
5080  ZoneList<CharacterRange>* ranges,
5081  Zone* zone) {
5082  switch (type) {
5083  case 's':
5084  AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5085  break;
5086  case 'S':
5087  AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5088  break;
5089  case 'w':
5090  AddClass(kWordRanges, kWordRangeCount, ranges, zone);
5091  break;
5092  case 'W':
5093  AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
5094  break;
5095  case 'd':
5096  AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
5097  break;
5098  case 'D':
5099  AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
5100  break;
5101  case '.':
5102  AddClassNegated(kLineTerminatorRanges,
5103  kLineTerminatorRangeCount,
5104  ranges,
5105  zone);
5106  break;
5107  // This is not a character range as defined by the spec but a
5108  // convenient shorthand for a character class that matches any
5109  // character.
5110  case '*':
5111  ranges->Add(CharacterRange::Everything(), zone);
5112  break;
5113  // This is the set of characters matched by the $ and ^ symbols
5114  // in multiline mode.
5115  case 'n':
5116  AddClass(kLineTerminatorRanges,
5117  kLineTerminatorRangeCount,
5118  ranges,
5119  zone);
5120  break;
5121  default:
5122  UNREACHABLE();
5123  }
5124 }
5125 
5126 
5128  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5129 }
5130 
5131 
5133  public:
5135  ZoneList<CharacterRange>** excluded,
5136  Zone* zone)
5137  : included_(included),
5138  excluded_(excluded),
5139  zone_(zone) { }
5140  void Call(uc16 from, DispatchTable::Entry entry);
5141 
5142  static const int kInBase = 0;
5143  static const int kInOverlay = 1;
5144 
5145  private:
5146  ZoneList<CharacterRange>** included_;
5147  ZoneList<CharacterRange>** excluded_;
5148  Zone* zone_;
5149 };
5150 
5151 
5153  if (!entry.out_set()->Get(kInBase)) return;
5154  ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
5155  ? included_
5156  : excluded_;
5157  if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);
5158  (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
5159 }
5160 
5161 
5163  Vector<const int> overlay,
5164  ZoneList<CharacterRange>** included,
5165  ZoneList<CharacterRange>** excluded,
5166  Zone* zone) {
5167  ASSERT_EQ(NULL, *included);
5168  ASSERT_EQ(NULL, *excluded);
5169  DispatchTable table(zone);
5170  for (int i = 0; i < base->length(); i++)
5171  table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
5172  for (int i = 0; i < overlay.length(); i += 2) {
5173  table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5175  }
5176  CharacterRangeSplitter callback(included, excluded, zone);
5177  table.ForEach(&callback);
5178 }
5179 
5180 
5182  bool is_ascii,
5183  Zone* zone) {
5184  Isolate* isolate = Isolate::Current();
5185  uc16 bottom = from();
5186  uc16 top = to();
5187  if (is_ascii) {
5188  if (bottom > String::kMaxAsciiCharCode) return;
5190  }
5192  if (top == bottom) {
5193  // If this is a singleton we just expand the one character.
5194  int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5195  for (int i = 0; i < length; i++) {
5196  uc32 chr = chars[i];
5197  if (chr != bottom) {
5198  ranges->Add(CharacterRange::Singleton(chars[i]), zone);
5199  }
5200  }
5201  } else {
5202  // If this is a range we expand the characters block by block,
5203  // expanding contiguous subranges (blocks) one at a time.
5204  // The approach is as follows. For a given start character we
5205  // look up the remainder of the block that contains it (represented
5206  // by the end point), for instance we find 'z' if the character
5207  // is 'c'. A block is characterized by the property
5208  // that all characters uncanonicalize in the same way, except that
5209  // each entry in the result is incremented by the distance from the first
5210  // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
5211  // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
5212  // Once we've found the end point we look up its uncanonicalization
5213  // and produce a range for each element. For instance for [c-f]
5214  // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
5215  // add a range if it is not already contained in the input, so [c-f]
5216  // will be skipped but [C-F] will be added. If this range is not
5217  // completely contained in a block we do this for all the blocks
5218  // covered by the range (handling characters that is not in a block
5219  // as a "singleton block").
5221  int pos = bottom;
5222  while (pos <= top) {
5223  int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
5224  uc16 block_end;
5225  if (length == 0) {
5226  block_end = pos;
5227  } else {
5228  ASSERT_EQ(1, length);
5229  block_end = range[0];
5230  }
5231  int end = (block_end > top) ? top : block_end;
5232  length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
5233  for (int i = 0; i < length; i++) {
5234  uc32 c = range[i];
5235  uc16 range_from = c - (block_end - pos);
5236  uc16 range_to = c - (block_end - end);
5237  if (!(bottom <= range_from && range_to <= top)) {
5238  ranges->Add(CharacterRange(range_from, range_to), zone);
5239  }
5240  }
5241  pos = end + 1;
5242  }
5243  }
5244 }
5245 
5246 
5248  ASSERT_NOT_NULL(ranges);
5249  int n = ranges->length();
5250  if (n <= 1) return true;
5251  int max = ranges->at(0).to();
5252  for (int i = 1; i < n; i++) {
5253  CharacterRange next_range = ranges->at(i);
5254  if (next_range.from() <= max + 1) return false;
5255  max = next_range.to();
5256  }
5257  return true;
5258 }
5259 
5260 
5261 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
5262  if (ranges_ == NULL) {
5263  ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
5264  CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);
5265  }
5266  return ranges_;
5267 }
5268 
5269 
5270 // Move a number of elements in a zonelist to another position
5271 // in the same list. Handles overlapping source and target areas.
5272 static void MoveRanges(ZoneList<CharacterRange>* list,
5273  int from,
5274  int to,
5275  int count) {
5276  // Ranges are potentially overlapping.
5277  if (from < to) {
5278  for (int i = count - 1; i >= 0; i--) {
5279  list->at(to + i) = list->at(from + i);
5280  }
5281  } else {
5282  for (int i = 0; i < count; i++) {
5283  list->at(to + i) = list->at(from + i);
5284  }
5285  }
5286 }
5287 
5288 
5289 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
5290  int count,
5291  CharacterRange insert) {
5292  // Inserts a range into list[0..count[, which must be sorted
5293  // by from value and non-overlapping and non-adjacent, using at most
5294  // list[0..count] for the result. Returns the number of resulting
5295  // canonicalized ranges. Inserting a range may collapse existing ranges into
5296  // fewer ranges, so the return value can be anything in the range 1..count+1.
5297  uc16 from = insert.from();
5298  uc16 to = insert.to();
5299  int start_pos = 0;
5300  int end_pos = count;
5301  for (int i = count - 1; i >= 0; i--) {
5302  CharacterRange current = list->at(i);
5303  if (current.from() > to + 1) {
5304  end_pos = i;
5305  } else if (current.to() + 1 < from) {
5306  start_pos = i + 1;
5307  break;
5308  }
5309  }
5310 
5311  // Inserted range overlaps, or is adjacent to, ranges at positions
5312  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
5313  // not affected by the insertion.
5314  // If start_pos == end_pos, the range must be inserted before start_pos.
5315  // if start_pos < end_pos, the entire range from start_pos to end_pos
5316  // must be merged with the insert range.
5317 
5318  if (start_pos == end_pos) {
5319  // Insert between existing ranges at position start_pos.
5320  if (start_pos < count) {
5321  MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
5322  }
5323  list->at(start_pos) = insert;
5324  return count + 1;
5325  }
5326  if (start_pos + 1 == end_pos) {
5327  // Replace single existing range at position start_pos.
5328  CharacterRange to_replace = list->at(start_pos);
5329  int new_from = Min(to_replace.from(), from);
5330  int new_to = Max(to_replace.to(), to);
5331  list->at(start_pos) = CharacterRange(new_from, new_to);
5332  return count;
5333  }
5334  // Replace a number of existing ranges from start_pos to end_pos - 1.
5335  // Move the remaining ranges down.
5336 
5337  int new_from = Min(list->at(start_pos).from(), from);
5338  int new_to = Max(list->at(end_pos - 1).to(), to);
5339  if (end_pos < count) {
5340  MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
5341  }
5342  list->at(start_pos) = CharacterRange(new_from, new_to);
5343  return count - (end_pos - start_pos) + 1;
5344 }
5345 
5346 
5348  // Special/default classes are always considered canonical. The result
5349  // of calling ranges() will be sorted.
5350  if (ranges_ == NULL) return;
5352 }
5353 
5354 
5356  if (character_ranges->length() <= 1) return;
5357  // Check whether ranges are already canonical (increasing, non-overlapping,
5358  // non-adjacent).
5359  int n = character_ranges->length();
5360  int max = character_ranges->at(0).to();
5361  int i = 1;
5362  while (i < n) {
5363  CharacterRange current = character_ranges->at(i);
5364  if (current.from() <= max + 1) {
5365  break;
5366  }
5367  max = current.to();
5368  i++;
5369  }
5370  // Canonical until the i'th range. If that's all of them, we are done.
5371  if (i == n) return;
5372 
5373  // The ranges at index i and forward are not canonicalized. Make them so by
5374  // doing the equivalent of insertion sort (inserting each into the previous
5375  // list, in order).
5376  // Notice that inserting a range can reduce the number of ranges in the
5377  // result due to combining of adjacent and overlapping ranges.
5378  int read = i; // Range to insert.
5379  int num_canonical = i; // Length of canonicalized part of list.
5380  do {
5381  num_canonical = InsertRangeInCanonicalList(character_ranges,
5382  num_canonical,
5383  character_ranges->at(read));
5384  read++;
5385  } while (read < n);
5386  character_ranges->Rewind(num_canonical);
5387 
5388  ASSERT(CharacterRange::IsCanonical(character_ranges));
5389 }
5390 
5391 
5393  ZoneList<CharacterRange>* negated_ranges,
5394  Zone* zone) {
5396  ASSERT_EQ(0, negated_ranges->length());
5397  int range_count = ranges->length();
5398  uc16 from = 0;
5399  int i = 0;
5400  if (range_count > 0 && ranges->at(0).from() == 0) {
5401  from = ranges->at(0).to();
5402  i = 1;
5403  }
5404  while (i < range_count) {
5405  CharacterRange range = ranges->at(i);
5406  negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
5407  from = range.to();
5408  i++;
5409  }
5410  if (from < String::kMaxUtf16CodeUnit) {
5411  negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
5412  zone);
5413  }
5414 }
5415 
5416 
5417 // -------------------------------------------------------------------
5418 // Splay tree
5419 
5420 
5421 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
5422  if (Get(value))
5423  return this;
5424  if (successors(zone) != NULL) {
5425  for (int i = 0; i < successors(zone)->length(); i++) {
5426  OutSet* successor = successors(zone)->at(i);
5427  if (successor->Get(value))
5428  return successor;
5429  }
5430  } else {
5431  successors_ = new(zone) ZoneList<OutSet*>(2, zone);
5432  }
5433  OutSet* result = new(zone) OutSet(first_, remaining_);
5434  result->Set(value, zone);
5435  successors(zone)->Add(result, zone);
5436  return result;
5437 }
5438 
5439 
5440 void OutSet::Set(unsigned value, Zone *zone) {
5441  if (value < kFirstLimit) {
5442  first_ |= (1 << value);
5443  } else {
5444  if (remaining_ == NULL)
5445  remaining_ = new(zone) ZoneList<unsigned>(1, zone);
5446  if (remaining_->is_empty() || !remaining_->Contains(value))
5447  remaining_->Add(value, zone);
5448  }
5449 }
5450 
5451 
5452 bool OutSet::Get(unsigned value) {
5453  if (value < kFirstLimit) {
5454  return (first_ & (1 << value)) != 0;
5455  } else if (remaining_ == NULL) {
5456  return false;
5457  } else {
5458  return remaining_->Contains(value);
5459  }
5460 }
5461 
5462 
5464 
5465 
5466 void DispatchTable::AddRange(CharacterRange full_range, int value,
5467  Zone* zone) {
5468  CharacterRange current = full_range;
5469  if (tree()->is_empty()) {
5470  // If this is the first range we just insert into the table.
5472  ASSERT_RESULT(tree()->Insert(current.from(), &loc));
5473  loc.set_value(Entry(current.from(), current.to(),
5474  empty()->Extend(value, zone)));
5475  return;
5476  }
5477  // First see if there is a range to the left of this one that
5478  // overlaps.
5480  if (tree()->FindGreatestLessThan(current.from(), &loc)) {
5481  Entry* entry = &loc.value();
5482  // If we've found a range that overlaps with this one, and it
5483  // starts strictly to the left of this one, we have to fix it
5484  // because the following code only handles ranges that start on
5485  // or after the start point of the range we're adding.
5486  if (entry->from() < current.from() && entry->to() >= current.from()) {
5487  // Snap the overlapping range in half around the start point of
5488  // the range we're adding.
5489  CharacterRange left(entry->from(), current.from() - 1);
5490  CharacterRange right(current.from(), entry->to());
5491  // The left part of the overlapping range doesn't overlap.
5492  // Truncate the whole entry to be just the left part.
5493  entry->set_to(left.to());
5494  // The right part is the one that overlaps. We add this part
5495  // to the map and let the next step deal with merging it with
5496  // the range we're adding.
5498  ASSERT_RESULT(tree()->Insert(right.from(), &loc));
5499  loc.set_value(Entry(right.from(),
5500  right.to(),
5501  entry->out_set()));
5502  }
5503  }
5504  while (current.is_valid()) {
5505  if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
5506  (loc.value().from() <= current.to()) &&
5507  (loc.value().to() >= current.from())) {
5508  Entry* entry = &loc.value();
5509  // We have overlap. If there is space between the start point of
5510  // the range we're adding and where the overlapping range starts
5511  // then we have to add a range covering just that space.
5512  if (current.from() < entry->from()) {
5514  ASSERT_RESULT(tree()->Insert(current.from(), &ins));
5515  ins.set_value(Entry(current.from(),
5516  entry->from() - 1,
5517  empty()->Extend(value, zone)));
5518  current.set_from(entry->from());
5519  }
5520  ASSERT_EQ(current.from(), entry->from());
5521  // If the overlapping range extends beyond the one we want to add
5522  // we have to snap the right part off and add it separately.
5523  if (entry->to() > current.to()) {
5525  ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
5526  ins.set_value(Entry(current.to() + 1,
5527  entry->to(),
5528  entry->out_set()));
5529  entry->set_to(current.to());
5530  }
5531  ASSERT(entry->to() <= current.to());
5532  // The overlapping range is now completely contained by the range
5533  // we're adding so we can just update it and move the start point
5534  // of the range we're adding just past it.
5535  entry->AddValue(value, zone);
5536  // Bail out if the last interval ended at 0xFFFF since otherwise
5537  // adding 1 will wrap around to 0.
5538  if (entry->to() == String::kMaxUtf16CodeUnit)
5539  break;
5540  ASSERT(entry->to() + 1 > current.from());
5541  current.set_from(entry->to() + 1);
5542  } else {
5543  // There is no overlap so we can just add the range
5545  ASSERT_RESULT(tree()->Insert(current.from(), &ins));
5546  ins.set_value(Entry(current.from(),
5547  current.to(),
5548  empty()->Extend(value, zone)));
5549  break;
5550  }
5551  }
5552 }
5553 
5554 
5557  if (!tree()->FindGreatestLessThan(value, &loc))
5558  return empty();
5559  Entry* entry = &loc.value();
5560  if (value <= entry->to())
5561  return entry->out_set();
5562  else
5563  return empty();
5564 }
5565 
5566 
5567 // -------------------------------------------------------------------
5568 // Analysis
5569 
5570 
5572  StackLimitCheck check(Isolate::Current());
5573  if (check.HasOverflowed()) {
5574  fail("Stack overflow");
5575  return;
5576  }
5577  if (that->info()->been_analyzed || that->info()->being_analyzed)
5578  return;
5579  that->info()->being_analyzed = true;
5580  that->Accept(this);
5581  that->info()->being_analyzed = false;
5582  that->info()->been_analyzed = true;
5583 }
5584 
5585 
5586 void Analysis::VisitEnd(EndNode* that) {
5587  // nothing to do
5588 }
5589 
5590 
5592  int element_count = elements()->length();
5593  // Set up the offsets of the elements relative to the start. This is a fixed
5594  // quantity since a TextNode can only contain fixed-width things.
5595  int cp_offset = 0;
5596  for (int i = 0; i < element_count; i++) {
5597  TextElement& elm = elements()->at(i);
5598  elm.cp_offset = cp_offset;
5599  if (elm.type == TextElement::ATOM) {
5600  cp_offset += elm.data.u_atom->data().length();
5601  } else {
5602  cp_offset++;
5603  }
5604  }
5605 }
5606 
5607 
5608 void Analysis::VisitText(TextNode* that) {
5609  if (ignore_case_) {
5610  that->MakeCaseIndependent(is_ascii_);
5611  }
5612  EnsureAnalyzed(that->on_success());
5613  if (!has_failed()) {
5614  that->CalculateOffsets();
5615  }
5616 }
5617 
5618 
5619 void Analysis::VisitAction(ActionNode* that) {
5620  RegExpNode* target = that->on_success();
5621  EnsureAnalyzed(target);
5622  if (!has_failed()) {
5623  // If the next node is interested in what it follows then this node
5624  // has to be interested too so it can pass the information on.
5625  that->info()->AddFromFollowing(target->info());
5626  }
5627 }
5628 
5629 
5630 void Analysis::VisitChoice(ChoiceNode* that) {
5631  NodeInfo* info = that->info();
5632  for (int i = 0; i < that->alternatives()->length(); i++) {
5633  RegExpNode* node = that->alternatives()->at(i).node();
5634  EnsureAnalyzed(node);
5635  if (has_failed()) return;
5636  // Anything the following nodes need to know has to be known by
5637  // this node also, so it can pass it on.
5638  info->AddFromFollowing(node->info());
5639  }
5640 }
5641 
5642 
5644  NodeInfo* info = that->info();
5645  for (int i = 0; i < that->alternatives()->length(); i++) {
5646  RegExpNode* node = that->alternatives()->at(i).node();
5647  if (node != that->loop_node()) {
5648  EnsureAnalyzed(node);
5649  if (has_failed()) return;
5650  info->AddFromFollowing(node->info());
5651  }
5652  }
5653  // Check the loop last since it may need the value of this node
5654  // to get a correct result.
5655  EnsureAnalyzed(that->loop_node());
5656  if (!has_failed()) {
5657  info->AddFromFollowing(that->loop_node()->info());
5658  }
5659 }
5660 
5661 
5662 void Analysis::VisitBackReference(BackReferenceNode* that) {
5663  EnsureAnalyzed(that->on_success());
5664 }
5665 
5666 
5667 void Analysis::VisitAssertion(AssertionNode* that) {
5668  EnsureAnalyzed(that->on_success());
5669 }
5670 
5671 
5673  int recursion_depth,
5674  int budget,
5675  BoyerMooreLookahead* bm,
5676  bool not_at_start) {
5677  // Working out the set of characters that a backreference can match is too
5678  // hard, so we just say that any character can match.
5679  bm->SetRest(offset);
5680  SaveBMInfo(bm, not_at_start, offset);
5681 }
5682 
5683 
5686 
5687 
5689  int recursion_depth,
5690  int budget,
5691  BoyerMooreLookahead* bm,
5692  bool not_at_start) {
5694  budget = (budget - 1) / alts->length();
5695  for (int i = 0; i < alts->length(); i++) {
5696  GuardedAlternative& alt = alts->at(i);
5697  if (alt.guards() != NULL && alt.guards()->length() != 0) {
5698  bm->SetRest(offset); // Give up trying to fill in info.
5699  SaveBMInfo(bm, not_at_start, offset);
5700  return;
5701  }
5702  alt.node()->FillInBMInfo(
5703  offset, recursion_depth + 1, budget, bm, not_at_start);
5704  }
5705  SaveBMInfo(bm, not_at_start, offset);
5706 }
5707 
5708 
5709 void TextNode::FillInBMInfo(int initial_offset,
5710  int recursion_depth,
5711  int budget,
5712  BoyerMooreLookahead* bm,
5713  bool not_at_start) {
5714  if (initial_offset >= bm->length()) return;
5715  int offset = initial_offset;
5716  int max_char = bm->max_char();
5717  for (int i = 0; i < elements()->length(); i++) {
5718  if (offset >= bm->length()) {
5719  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5720  return;
5721  }
5722  TextElement text = elements()->at(i);
5723  if (text.type == TextElement::ATOM) {
5724  RegExpAtom* atom = text.data.u_atom;
5725  for (int j = 0; j < atom->length(); j++, offset++) {
5726  if (offset >= bm->length()) {
5727  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5728  return;
5729  }
5730  uc16 character = atom->data()[j];
5731  if (bm->compiler()->ignore_case()) {
5733  int length = GetCaseIndependentLetters(
5734  ISOLATE,
5735  character,
5737  chars);
5738  for (int j = 0; j < length; j++) {
5739  bm->Set(offset, chars[j]);
5740  }
5741  } else {
5742  if (character <= max_char) bm->Set(offset, character);
5743  }
5744  }
5745  } else {
5747  RegExpCharacterClass* char_class = text.data.u_char_class;
5748  ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
5749  if (char_class->is_negated()) {
5750  bm->SetAll(offset);
5751  } else {
5752  for (int k = 0; k < ranges->length(); k++) {
5753  CharacterRange& range = ranges->at(k);
5754  if (range.from() > max_char) continue;
5755  int to = Min(max_char, static_cast<int>(range.to()));
5756  bm->SetInterval(offset, Interval(range.from(), to));
5757  }
5758  }
5759  offset++;
5760  }
5761  }
5762  if (offset >= bm->length()) {
5763  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5764  return;
5765  }
5766  on_success()->FillInBMInfo(offset,
5767  recursion_depth + 1,
5768  budget - 1,
5769  bm,
5770  true); // Not at start after a text node.
5771  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5772 }
5773 
5774 
5775 // -------------------------------------------------------------------
5776 // Dispatch table construction
5777 
5778 
5779 void DispatchTableConstructor::VisitEnd(EndNode* that) {
5781 }
5782 
5783 
5785  node->set_being_calculated(true);
5786  ZoneList<GuardedAlternative>* alternatives = node->alternatives();
5787  for (int i = 0; i < alternatives->length(); i++) {
5788  set_choice_index(i);
5789  alternatives->at(i).node()->Accept(this);
5790  }
5791  node->set_being_calculated(false);
5792 }
5793 
5794 
5796  public:
5798  : constructor_(constructor) { }
5799  void Call(uc32 from, DispatchTable::Entry entry);
5800  private:
5801  DispatchTableConstructor* constructor_;
5802 };
5803 
5804 
5806  CharacterRange range(from, entry.to());
5807  constructor_->AddRange(range);
5808 }
5809 
5810 
5811 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
5812  if (node->being_calculated())
5813  return;
5815  AddDispatchRange adder(this);
5816  table->ForEach(&adder);
5817 }
5818 
5819 
5820 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
5821  // TODO(160): Find the node that we refer back to and propagate its start
5822  // set back to here. For now we just accept anything.
5824 }
5825 
5826 
5827 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
5828  RegExpNode* target = that->on_success();
5829  target->Accept(this);
5830 }
5831 
5832 
5833 static int CompareRangeByFrom(const CharacterRange* a,
5834  const CharacterRange* b) {
5835  return Compare<uc16>(a->from(), b->from());
5836 }
5837 
5838 
5840  ranges->Sort(CompareRangeByFrom);
5841  uc16 last = 0;
5842  for (int i = 0; i < ranges->length(); i++) {
5843  CharacterRange range = ranges->at(i);
5844  if (last < range.from())
5845  AddRange(CharacterRange(last, range.from() - 1));
5846  if (range.to() >= last) {
5847  if (range.to() == String::kMaxUtf16CodeUnit) {
5848  return;
5849  } else {
5850  last = range.to() + 1;
5851  }
5852  }
5853  }
5855 }
5856 
5857 
5858 void DispatchTableConstructor::VisitText(TextNode* that) {
5859  TextElement elm = that->elements()->at(0);
5860  switch (elm.type) {
5861  case TextElement::ATOM: {
5862  uc16 c = elm.data.u_atom->data()[0];
5863  AddRange(CharacterRange(c, c));
5864  break;
5865  }
5866  case TextElement::CHAR_CLASS: {
5867  RegExpCharacterClass* tree = elm.data.u_char_class;
5868  ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
5869  if (tree->is_negated()) {
5870  AddInverse(ranges);
5871  } else {
5872  for (int i = 0; i < ranges->length(); i++)
5873  AddRange(ranges->at(i));
5874  }
5875  break;
5876  }
5877  default: {
5878  UNIMPLEMENTED();
5879  }
5880  }
5881 }
5882 
5883 
5884 void DispatchTableConstructor::VisitAction(ActionNode* that) {
5885  RegExpNode* target = that->on_success();
5886  target->Accept(this);
5887 }
5888 
5889 
5891  RegExpCompileData* data,
5892  bool ignore_case,
5893  bool is_global,
5894  bool is_multiline,
5895  Handle<String> pattern,
5896  Handle<String> sample_subject,
5897  bool is_ascii,
5898  Zone* zone) {
5899  if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
5900  return IrregexpRegExpTooBig();
5901  }
5902  RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);
5903 
5904  // Sample some characters from the middle of the string.
5905  static const int kSampleSize = 128;
5906 
5907  FlattenString(sample_subject);
5908  int chars_sampled = 0;
5909  int half_way = (sample_subject->length() - kSampleSize) / 2;
5910  for (int i = Max(0, half_way);
5911  i < sample_subject->length() && chars_sampled < kSampleSize;
5912  i++, chars_sampled++) {
5913  compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
5914  }
5915 
5916  // Wrap the body of the regexp in capture #0.
5917  RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
5918  0,
5919  &compiler,
5920  compiler.accept());
5921  RegExpNode* node = captured_body;
5922  bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5923  bool is_start_anchored = data->tree->IsAnchoredAtStart();
5924  int max_length = data->tree->max_match();
5925  if (!is_start_anchored) {
5926  // Add a .*? at the beginning, outside the body capture, unless
5927  // this expression is anchored at the beginning.
5928  RegExpNode* loop_node =
5931  false,
5932  new(zone) RegExpCharacterClass('*'),
5933  &compiler,
5934  captured_body,
5935  data->contains_anchor);
5936 
5937  if (data->contains_anchor) {
5938  // Unroll loop once, to take care of the case that might start
5939  // at the start of input.
5940  ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
5941  first_step_node->AddAlternative(GuardedAlternative(captured_body));
5942  first_step_node->AddAlternative(GuardedAlternative(
5943  new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
5944  node = first_step_node;
5945  } else {
5946  node = loop_node;
5947  }
5948  }
5949  if (is_ascii) {
5951  // Do it again to propagate the new nodes to places where they were not
5952  // put because they had not been calculated yet.
5953  if (node != NULL) node = node->FilterASCII(RegExpCompiler::kMaxRecursion);
5954  }
5955 
5956  if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
5957  data->node = node;
5958  Analysis analysis(ignore_case, is_ascii);
5959  analysis.EnsureAnalyzed(node);
5960  if (analysis.has_failed()) {
5961  const char* error_message = analysis.error_message();
5962  return CompilationResult(error_message);
5963  }
5964 
5965  // Create the correct assembler for the architecture.
5966 #ifndef V8_INTERPRETED_REGEXP
5967  // Native regexp implementation.
5968 
5972 
5973 #if V8_TARGET_ARCH_IA32
5974  RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
5975  zone);
5976 #elif V8_TARGET_ARCH_X64
5977  RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,
5978  zone);
5979 #elif V8_TARGET_ARCH_ARM
5980  RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,
5981  zone);
5982 #elif V8_TARGET_ARCH_MIPS
5983  RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
5984  zone);
5985 #endif
5986 
5987 #else // V8_INTERPRETED_REGEXP
5988  // Interpreted regexp implementation.
5990  RegExpMacroAssemblerIrregexp macro_assembler(codes);
5991 #endif // V8_INTERPRETED_REGEXP
5992 
5993  // Inserted here, instead of in Assembler, because it depends on information
5994  // in the AST that isn't replicated in the Node structure.
5995  static const int kMaxBacksearchLimit = 1024;
5996  if (is_end_anchored &&
5997  !is_start_anchored &&
5998  max_length < kMaxBacksearchLimit) {
5999  macro_assembler.SetCurrentPositionFromEnd(max_length);
6000  }
6001 
6002  if (is_global) {
6003  macro_assembler.set_global_mode(
6004  (data->tree->min_match() > 0)
6007  }
6008 
6009  return compiler.Assemble(&macro_assembler,
6010  node,
6011  data->capture_count,
6012  pattern);
6013 }
6014 
6015 
6016 }} // namespace v8::internal
void SetInterval(const Interval &interval)
Definition: jsregexp.cc:3511
ZoneList< TextElement > * elements()
Definition: ast.h:2397
AlternativeGenerationList(int count, Zone *zone)
Definition: jsregexp.cc:3461
static bool ParseRegExp(FlatStringReader *input, bool multiline, RegExpCompileData *result)
Definition: parser.cc:6004
virtual void WriteStackPointerToRegister(int reg)=0
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5555
static Handle< Object > New(Handle< JSFunction > func, int argc, Handle< Object > argv[], bool *pending_exception)
Definition: execution.cc:177
bool EmitCharacterFunction(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1636
Failure * StackOverflow()
Definition: isolate.cc:897
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5672
void FlattenString(Handle< String > string)
Definition: handles.cc:211
RegExpNode * stop_node()
Definition: jsregexp.h:1442
static const int kNumberOfRegistersOffset
Definition: jsregexp.cc:947
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
Definition: jsregexp.cc:3361
static const int kIrregexpMaxRegisterCountIndex
Definition: objects.h:6465
double total_regexp_code_generated()
Definition: heap.h:1498
static const int kMaxAsciiCharCode
Definition: objects.h:7107
bool Contains(const T &elm) const
Definition: list-inl.h:178
#define DECLARE_VISIT(type)
Definition: full-codegen.h:573
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5127
#define ASSERT_RESULT(expr)
Definition: checks.h:269
virtual void GoTo(Label *label)=0
static AssertionNode * AtStart(RegExpNode *on_success)
Definition: jsregexp.h:883
const char * ToCString(const v8::String::Utf8Value &value)
void SetInterval(int map_number, const Interval &interval)
Definition: jsregexp.h:1288
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
Definition: jsregexp.cc:1402
RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler *assembler, RegExpNode *start, int capture_count, Handle< String > pattern)
Definition: jsregexp.cc:1022
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:486
void set(int index, Object *value)
Definition: objects-inl.h:1695
CompilationCache * compilation_cache()
Definition: isolate.h:812
CharacterRangeSplitter(ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5134
static const int kStaticOffsetsVectorSize
Definition: jsregexp.h:1649
void PrintF(const char *format,...)
Definition: v8utils.cc:40
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1344
static String * cast(Object *obj)
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
Definition: jsregexp.cc:1442
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2264
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3226
static const int kMaxUtf16CodeUnit
Definition: objects.h:7109
void set_characters(int characters)
Definition: jsregexp.h:525
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2274
unibrow::Mapping< unibrow::Ecma262UnCanonicalize > * jsregexp_uncanonicalize()
Definition: isolate.h:877
VisitMarker(NodeInfo *info)
Definition: jsregexp.cc:2673
Isolate * isolate()
Definition: heap-inl.h:494
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4586
virtual void AppendToText(RegExpText *text, Zone *zone)
Definition: jsregexp.cc:838
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.h:592
static Smi * FromInt(int value)
Definition: objects-inl.h:973
#define LOG(isolate, Call)
Definition: log.h:81
bool Contains(int value)
Definition: jsregexp.h:689
int to() const
Definition: jsregexp.h:694
ZoneList< CharacterRange > * ranges(Zone *zone)
Definition: ast.h:2353
void set_at_start(bool at_start)
Definition: jsregexp.h:1439
virtual void CheckNotBackReference(int start_reg, Label *on_no_match)=0
#define ASSERT_NOT_NULL(p)
Definition: checks.h:285
value format" "after each garbage collection") DEFINE_bool(print_cumulative_gc_stat, false, "print cumulative GC statistics in name=value format on exit") DEFINE_bool(trace_gc_verbose, false, "print more details following each garbage collection") DEFINE_bool(trace_fragmentation, false, "report fragmentation for old pointer and data pages") DEFINE_bool(collect_maps, true, "garbage collect maps from which no objects can be reached") DEFINE_bool(flush_code, true, "flush code that we expect not to use again before full gc") DEFINE_bool(incremental_marking, true, "use incremental marking") DEFINE_bool(incremental_marking_steps, true, "do incremental marking steps") DEFINE_bool(trace_incremental_marking, false, "trace progress of the incremental marking") DEFINE_bool(use_idle_notification, true, "Use idle notification to reduce memory footprint.") DEFINE_bool(send_idle_notification, false, "Send idle notifcation between stress runs.") DEFINE_bool(use_ic, true, "use inline caching") DEFINE_bool(native_code_counters, false, "generate extra code for manipulating stats counters") DEFINE_bool(always_compact, false, "Perform compaction on every full GC") DEFINE_bool(lazy_sweeping, true, "Use lazy sweeping for old pointer and data spaces") DEFINE_bool(never_compact, false, "Never perform compaction on full GC-testing only") DEFINE_bool(compact_code_space, true, "Compact code space on full non-incremental collections") DEFINE_bool(cleanup_code_caches_at_gc, true, "Flush inline caches prior to mark compact collection and" "flush code caches in maps during mark compact cycle.") DEFINE_int(random_seed, 0, "Default seed for initializing random generator" "(0, the default, means to use system random).") DEFINE_bool(use_verbose_printer, true, "allows verbose printing") DEFINE_bool(allow_natives_syntax, false, "allow natives syntax") DEFINE_bool(trace_sim, false, "Trace simulator execution") DEFINE_bool(check_icache, false, "Check icache flushes in ARM and MIPS simulator") DEFINE_int(stop_sim_at, 0, "Simulator stop after x number of instructions") DEFINE_int(sim_stack_alignment, 8, "Stack alingment in bytes in simulator(4 or 8, 8 is default)") DEFINE_bool(trace_exception, false, "print stack trace when throwing exceptions") DEFINE_bool(preallocate_message_memory, false, "preallocate some memory to build stack traces.") DEFINE_bool(randomize_hashes, true, "randomize hashes to avoid predictable hash collisions" "(with snapshots this option cannot override the baked-in seed)") DEFINE_int(hash_seed, 0, "Fixed seed to use to hash property keys(0 means random)" "(with snapshots this option cannot override the baked-in seed)") DEFINE_bool(preemption, false, "activate a 100ms timer that switches between V8 threads") DEFINE_bool(regexp_optimization, true, "generate optimized regexp code") DEFINE_bool(testing_bool_flag, true, "testing_bool_flag") DEFINE_int(testing_int_flag, 13, "testing_int_flag") DEFINE_float(testing_float_flag, 2.5, "float-flag") DEFINE_string(testing_string_flag, "Hello, world!", "string-flag") DEFINE_int(testing_prng_seed, 42, "Seed used for threading test randomness") DEFINE_string(testing_serialization_file, "/tmp/serdes", "file in which to serialize heap") DEFINE_bool(help, false, "Print usage message, including flags, on console") DEFINE_bool(dump_counters, false, "Dump counters on exit") DEFINE_string(map_counters, "", "Map counters to a file") DEFINE_args(js_arguments, JSARGUMENTS_INIT, "Pass all remaining arguments to the script.Alias for\"--\".") DEFINE_bool(debug_compile_events, true,"Enable debugger compile events") DEFINE_bool(debug_script_collected_events, true,"Enable debugger script collected events") DEFINE_bool(gdbjit, false,"enable GDBJIT interface (disables compacting GC)") DEFINE_bool(gdbjit_full, false,"enable GDBJIT interface for all code objects") DEFINE_bool(gdbjit_dump, false,"dump elf objects with debug info to disk") DEFINE_string(gdbjit_dump_filter,"","dump only objects containing this substring") DEFINE_bool(force_marking_deque_overflows, false,"force overflows of marking deque by reducing it's size ""to 64 words") DEFINE_bool(stress_compaction, false,"stress the GC compactor to flush out bugs (implies ""--force_marking_deque_overflows)")#define FLAG DEFINE_bool(enable_slow_asserts, false,"enable asserts that are slow to execute") DEFINE_bool(trace_codegen, false,"print name of functions for which code is generated") DEFINE_bool(print_source, false,"pretty print source code") DEFINE_bool(print_builtin_source, false,"pretty print source code for builtins") DEFINE_bool(print_ast, false,"print source AST") DEFINE_bool(print_builtin_ast, false,"print source AST for builtins") DEFINE_string(stop_at,"","function name where to insert a breakpoint") DEFINE_bool(print_builtin_scopes, false,"print scopes for builtins") DEFINE_bool(print_scopes, false,"print scopes") DEFINE_bool(trace_contexts, false,"trace contexts operations") DEFINE_bool(gc_greedy, false,"perform GC prior to some allocations") DEFINE_bool(gc_verbose, false,"print stuff during garbage collection") DEFINE_bool(heap_stats, false,"report heap statistics before and after GC") DEFINE_bool(code_stats, false,"report code statistics after GC") DEFINE_bool(verify_heap, false,"verify heap pointers before and after GC") DEFINE_bool(print_handles, false,"report handles after GC") DEFINE_bool(print_global_handles, false,"report global handles after GC") DEFINE_bool(trace_ic, false,"trace inline cache state transitions") DEFINE_bool(print_interfaces, false,"print interfaces") DEFINE_bool(print_interface_details, false,"print interface inference details") DEFINE_int(print_interface_depth, 5,"depth for printing interfaces") DEFINE_bool(trace_normalization, false,"prints when objects are turned into dictionaries.") DEFINE_bool(trace_lazy, false,"trace lazy compilation") DEFINE_bool(collect_heap_spill_statistics, false,"report heap spill statistics along with heap_stats ""(requires heap_stats)") DEFINE_bool(trace_isolates, false,"trace isolate state changes") DEFINE_bool(log_state_changes, false,"Log state changes.") DEFINE_bool(regexp_possessive_quantifier, false,"enable possessive quantifier syntax for testing") DEFINE_bool(trace_regexp_bytecodes, false,"trace regexp bytecode execution") DEFINE_bool(trace_regexp_assembler, false,"trace regexp macro assembler calls.")#define FLAG DEFINE_bool(log, false,"Minimal logging (no API, code, GC, suspect, or handles samples).") DEFINE_bool(log_all, false,"Log all events to the log file.") DEFINE_bool(log_runtime, false,"Activate runtime system %Log call.") DEFINE_bool(log_api, false,"Log API events to the log file.") DEFINE_bool(log_code, false,"Log code events to the log file without profiling.") DEFINE_bool(log_gc, false,"Log heap samples on garbage collection for the hp2ps tool.") DEFINE_bool(log_handles, false,"Log global handle events.") DEFINE_bool(log_snapshot_positions, false,"log positions of (de)serialized objects in the snapshot.") DEFINE_bool(log_suspect, false,"Log suspect operations.") DEFINE_bool(prof, false,"Log statistical profiling information (implies --log-code).") DEFINE_bool(prof_auto, true,"Used with --prof, starts profiling automatically") DEFINE_bool(prof_lazy, false,"Used with --prof, only does sampling and logging"" when profiler is active (implies --noprof_auto).") DEFINE_bool(prof_browser_mode, true,"Used with --prof, turns on browser-compatible mode for profiling.") DEFINE_bool(log_regexp, false,"Log regular expression execution.") DEFINE_bool(sliding_state_window, false,"Update sliding state window counters.") DEFINE_string(logfile,"v8.log","Specify the name of the log file.") DEFINE_bool(ll_prof, false,"Enable low-level linux profiler.")#define FLAG DEFINE_bool(trace_elements_transitions, false,"trace elements transitions") DEFINE_bool(print_code_stubs, false,"print code stubs") DEFINE_bool(test_secondary_stub_cache, false,"test secondary stub cache by disabling the primary one") DEFINE_bool(test_primary_stub_cache, false,"test primary stub cache by disabling the secondary one") DEFINE_bool(print_code, false,"print generated code") DEFINE_bool(print_opt_code, false,"print optimized code") DEFINE_bool(print_unopt_code, false,"print unoptimized code before ""printing optimized code based on it") DEFINE_bool(print_code_verbose, false,"print more information for code") DEFINE_bool(print_builtin_code, false,"print generated code for builtins")#43"/Users/thlorenz/dev/dx/v8-perf/build/v8/src/flags.cc"2#define FLAG_MODE_DEFINE_DEFAULTS#1"/Users/thlorenz/dev/dx/v8-perf/build/v8/src/flag-definitions.h"1#define FLAG_FULL(ftype, ctype, nam, def, cmt)#define FLAG_READONLY(ftype, ctype, nam, def, cmt)#define DEFINE_implication(whenflag, thenflag)#define DEFINE_bool(nam, def, cmt)#define DEFINE_int(nam, def, cmt)#define DEFINE_float(nam, def, cmt)#define DEFINE_string(nam, def, cmt)#define DEFINE_args(nam, def, cmt)#define FLAG DEFINE_bool(use_strict, false,"enforce strict mode") DEFINE_bool(es5_readonly, false,"activate correct semantics for inheriting readonliness") DEFINE_bool(es52_globals, false,"activate new semantics for global var declarations") DEFINE_bool(harmony_typeof, false,"enable harmony semantics for typeof") DEFINE_bool(harmony_scoping, false,"enable harmony block scoping") DEFINE_bool(harmony_modules, false,"enable harmony modules (implies block scoping)") DEFINE_bool(harmony_proxies, false,"enable harmony proxies") DEFINE_bool(harmony_collections, false,"enable harmony collections (sets, maps, and weak maps)") DEFINE_bool(harmony, false,"enable all harmony features (except typeof)") DEFINE_implication(harmony, harmony_scoping) DEFINE_implication(harmony, harmony_modules) DEFINE_implication(harmony, harmony_proxies) DEFINE_implication(harmony, harmony_collections) DEFINE_implication(harmony_modules, harmony_scoping) DEFINE_bool(packed_arrays, false,"optimizes arrays that have no holes") DEFINE_bool(smi_only_arrays, true,"tracks arrays with only smi values") DEFINE_bool(clever_optimizations, true,"Optimize object size, Array shift, DOM strings and string +") DEFINE_bool(unbox_double_arrays, true,"automatically unbox arrays of doubles") DEFINE_bool(string_slices, true,"use string slices") DEFINE_bool(crankshaft, true,"use crankshaft") DEFINE_string(hydrogen_filter,"","optimization filter") DEFINE_bool(use_range, true,"use hydrogen range analysis") DEFINE_bool(eliminate_dead_phis, true,"eliminate dead phis") DEFINE_bool(use_gvn, true,"use hydrogen global value numbering") DEFINE_bool(use_canonicalizing, true,"use hydrogen instruction canonicalizing") DEFINE_bool(use_inlining, true,"use function inlining") DEFINE_int(max_inlined_source_size, 600,"maximum source size in bytes considered for a single inlining") DEFINE_int(max_inlined_nodes, 196,"maximum number of AST nodes considered for a single inlining") DEFINE_int(max_inlined_nodes_cumulative, 196,"maximum cumulative number of AST nodes considered for inlining") DEFINE_bool(loop_invariant_code_motion, true,"loop invariant code motion") DEFINE_bool(collect_megamorphic_maps_from_stub_cache, true,"crankshaft harvests type feedback from stub cache") DEFINE_bool(hydrogen_stats, false,"print statistics for hydrogen") DEFINE_bool(trace_hydrogen, false,"trace generated hydrogen to file") DEFINE_string(trace_phase,"Z","trace generated IR for specified phases") DEFINE_bool(trace_inlining, false,"trace inlining decisions") DEFINE_bool(trace_alloc, false,"trace register allocator") DEFINE_bool(trace_all_uses, false,"trace all use positions") DEFINE_bool(trace_range, false,"trace range analysis") DEFINE_bool(trace_gvn, false,"trace global value numbering") DEFINE_bool(trace_representation, false,"trace representation types") DEFINE_bool(stress_pointer_maps, false,"pointer map for every instruction") DEFINE_bool(stress_environments, false,"environment for every instruction") DEFINE_int(deopt_every_n_times, 0,"deoptimize every n times a deopt point is passed") DEFINE_bool(trap_on_deopt, false,"put a break point before deoptimizing") DEFINE_bool(deoptimize_uncommon_cases, true,"deoptimize uncommon cases") DEFINE_bool(polymorphic_inlining, true,"polymorphic inlining") DEFINE_bool(use_osr, true,"use on-stack replacement") DEFINE_bool(array_bounds_checks_elimination, false,"perform array bounds checks elimination") DEFINE_bool(array_index_dehoisting, false,"perform array index dehoisting") DEFINE_bool(trace_osr, false,"trace on-stack replacement") DEFINE_int(stress_runs, 0,"number of stress runs") DEFINE_bool(optimize_closures, true,"optimize closures") DEFINE_bool(inline_construct, true,"inline constructor calls") DEFINE_bool(inline_arguments, true,"inline functions with arguments object") DEFINE_int(loop_weight, 1,"loop weight for representation inference") DEFINE_bool(optimize_for_in, true,"optimize functions containing for-in loops") DEFINE_bool(experimental_profiler, true,"enable all profiler experiments") DEFINE_bool(watch_ic_patching, false,"profiler considers IC stability") DEFINE_int(frame_count, 1,"number of stack frames inspected by the profiler") DEFINE_bool(self_optimization, false,"primitive functions trigger their own optimization") DEFINE_bool(direct_self_opt, false,"call recompile stub directly when self-optimizing") DEFINE_bool(retry_self_opt, false,"re-try self-optimization if it failed") DEFINE_bool(count_based_interrupts, false,"trigger profiler ticks based on counting instead of timing") DEFINE_bool(interrupt_at_exit, false,"insert an interrupt check at function exit") DEFINE_bool(weighted_back_edges, false,"weight back edges by jump distance for interrupt triggering") DEFINE_int(interrupt_budget, 5900,"execution budget before interrupt is triggered") DEFINE_int(type_info_threshold, 15,"percentage of ICs that must have type info to allow optimization") DEFINE_int(self_opt_count, 130,"call count before self-optimization") DEFINE_implication(experimental_profiler, watch_ic_patching) DEFINE_implication(experimental_profiler, self_optimization) DEFINE_implication(experimental_profiler, retry_self_opt) DEFINE_implication(experimental_profiler, count_based_interrupts) DEFINE_implication(experimental_profiler, interrupt_at_exit) DEFINE_implication(experimental_profiler, weighted_back_edges) DEFINE_bool(trace_opt_verbose, false,"extra verbose compilation tracing") DEFINE_implication(trace_opt_verbose, trace_opt) DEFINE_bool(debug_code, false,"generate extra code (assertions) for debugging") DEFINE_bool(code_comments, false,"emit comments in code disassembly") DEFINE_bool(enable_sse2, true,"enable use of SSE2 instructions if available") DEFINE_bool(enable_sse3, true,"enable use of SSE3 instructions if available") DEFINE_bool(enable_sse4_1, true,"enable use of SSE4.1 instructions if available") DEFINE_bool(enable_cmov, true,"enable use of CMOV instruction if available") DEFINE_bool(enable_rdtsc, true,"enable use of RDTSC instruction if available") DEFINE_bool(enable_sahf, true,"enable use of SAHF instruction if available (X64 only)") DEFINE_bool(enable_vfp3, true,"enable use of VFP3 instructions if available - this implies ""enabling ARMv7 instructions (ARM only)") DEFINE_bool(enable_armv7, true,"enable use of ARMv7 instructions if available (ARM only)") DEFINE_bool(enable_fpu, true,"enable use of MIPS FPU instructions if available (MIPS only)") DEFINE_string(expose_natives_as, NULL,"expose natives in global object") DEFINE_string(expose_debug_as, NULL,"expose debug in global object") DEFINE_bool(expose_gc, false,"expose gc extension") DEFINE_bool(expose_externalize_string, false,"expose externalize string extension") DEFINE_int(stack_trace_limit, 10,"number of stack frames to capture") DEFINE_bool(builtins_in_stack_traces, false,"show built-in functions in stack traces") DEFINE_bool(disable_native_files, false,"disable builtin natives files") DEFINE_bool(inline_new, true,"use fast inline allocation") DEFINE_bool(stack_trace_on_abort, true,"print a stack trace if an assertion failure occurs") DEFINE_bool(trace, false,"trace function calls") DEFINE_bool(mask_constants_with_cookie, true,"use random jit cookie to mask large constants") DEFINE_bool(lazy, true,"use lazy compilation") DEFINE_bool(trace_opt, false,"trace lazy optimization") DEFINE_bool(trace_opt_stats, false,"trace lazy optimization statistics") DEFINE_bool(opt, true,"use adaptive optimizations") DEFINE_bool(always_opt, false,"always try to optimize functions") DEFINE_bool(prepare_always_opt, false,"prepare for turning on always opt") DEFINE_bool(trace_deopt, false,"trace deoptimization") DEFINE_int(min_preparse_length, 1024,"minimum length for automatic enable preparsing") DEFINE_bool(always_full_compiler, false,"try to use the dedicated run-once backend for all code") DEFINE_bool(trace_bailout, false,"print reasons for falling back to using the classic V8 backend") DEFINE_bool(compilation_cache, true,"enable compilation cache") DEFINE_bool(cache_prototype_transitions, true,"cache prototype transitions") DEFINE_bool(trace_debug_json, false,"trace debugging JSON request/response") DEFINE_bool(debugger_auto_break, true,"automatically set the debug break flag when debugger commands are ""in the queue") DEFINE_bool(enable_liveedit, true,"enable liveedit experimental feature") DEFINE_bool(break_on_abort, true,"always cause a debug break before aborting") DEFINE_int(stack_size, kPointerSize *123,"default size of stack region v8 is allowed to use (in kBytes)") DEFINE_int(max_stack_trace_source_length, 300,"maximum length of function source code printed in a stack trace.") DEFINE_bool(always_inline_smi_code, false,"always inline smi code in non-opt code") DEFINE_int(max_new_space_size, 0,"max size of the new generation (in kBytes)") DEFINE_int(max_old_space_size, 0,"max size of the old generation (in Mbytes)") DEFINE_int(max_executable_size, 0,"max size of executable memory (in Mbytes)") DEFINE_bool(gc_global, false,"always perform global GCs") DEFINE_int(gc_interval,-1,"garbage collect after <n> allocations") DEFINE_bool(trace_gc, false,"print one trace line following each garbage collection") DEFINE_bool(trace_gc_nvp, false,"print one detailed trace line in name=value format ""after each garbage collection") DEFINE_bool(print_cumulative_gc_stat, false,"print cumulative GC statistics in name=value format on exit") DEFINE_bool(trace_gc_verbose, false,"print more details following each garbage collection") DEFINE_bool(trace_fragmentation, false,"report fragmentation for old pointer and data pages") DEFINE_bool(collect_maps, true,"garbage collect maps from which no objects can be reached") DEFINE_bool(flush_code, true,"flush code that we expect not to use again before full gc") DEFINE_bool(incremental_marking, true,"use incremental marking") DEFINE_bool(incremental_marking_steps, true,"do incremental marking steps") DEFINE_bool(trace_incremental_marking, false,"trace progress of the incremental marking") DEFINE_bool(use_idle_notification, true,"Use idle notification to reduce memory footprint.") DEFINE_bool(send_idle_notification, false,"Send idle notifcation between stress runs.") DEFINE_bool(use_ic, true,"use inline caching") DEFINE_bool(native_code_counters, false,"generate extra code for manipulating stats counters") DEFINE_bool(always_compact, false,"Perform compaction on every full GC") DEFINE_bool(lazy_sweeping, true,"Use lazy sweeping for old pointer and data spaces") DEFINE_bool(never_compact, false,"Never perform compaction on full GC - testing only") DEFINE_bool(compact_code_space, true,"Compact code space on full non-incremental collections") DEFINE_bool(cleanup_code_caches_at_gc, true,"Flush inline caches prior to mark compact collection and ""flush code caches in maps during mark compact cycle.") DEFINE_int(random_seed, 0,"Default seed for initializing random generator ""(0, the default, means to use system random).") DEFINE_bool(use_verbose_printer, true,"allows verbose printing") DEFINE_bool(allow_natives_syntax, false,"allow natives syntax") DEFINE_bool(trace_sim, false,"Trace simulator execution") DEFINE_bool(check_icache, false,"Check icache flushes in ARM and MIPS simulator") DEFINE_int(stop_sim_at, 0,"Simulator stop after x number of instructions") DEFINE_int(sim_stack_alignment, 8,"Stack alingment in bytes in simulator (4 or 8, 8 is default)") DEFINE_bool(trace_exception, false,"print stack trace when throwing exceptions") DEFINE_bool(preallocate_message_memory, false,"preallocate some memory to build stack traces.") DEFINE_bool(randomize_hashes, true,"randomize hashes to avoid predictable hash collisions ""(with snapshots this option cannot override the baked-in seed)") DEFINE_int(hash_seed, 0,"Fixed seed to use to hash property keys (0 means random)""(with snapshots this option cannot override the baked-in seed)") DEFINE_bool(preemption, false,"activate a 100ms timer that switches between V8 threads") DEFINE_bool(regexp_optimization, true,"generate optimized regexp code") DEFINE_bool(testing_bool_flag, true,"testing_bool_flag") DEFINE_int(testing_int_flag, 13,"testing_int_flag") DEFINE_float(testing_float_flag, 2.5,"float-flag") DEFINE_string(testing_string_flag,"Hello, world!","string-flag") DEFINE_int(testing_prng_seed, 42,"Seed used for threading test randomness") DEFINE_string(testing_serialization_file,"/tmp/serdes","file in which to serialize heap") DEFINE_bool(help, false,"Print usage message, including flags, on console") DEFINE_bool(dump_counters, false,"Dump counters on exit") DEFINE_string(map_counters,"","Map counters to a file") DEFINE_args(js_arguments, JSARGUMENTS_INIT,"Pass all remaining arguments to the script. Alias for \"--\".") DEFINE_bool(debug_compile_events, true,"Enable debugger compile events") DEFINE_bool(debug_script_collected_events, true,"Enable debugger script collected events") DEFINE_bool(gdbjit, false,"enable GDBJIT interface (disables compacting GC)") DEFINE_bool(gdbjit_full, false,"enable GDBJIT interface for all code objects") DEFINE_bool(gdbjit_dump, false,"dump elf objects with debug info to disk") DEFINE_string(gdbjit_dump_filter,"","dump only objects containing this substring") DEFINE_bool(force_marking_deque_overflows, false,"force overflows of marking deque by reducing it's size ""to 64 words") DEFINE_bool(stress_compaction, false,"stress the GC compactor to flush out bugs (implies ""--force_marking_deque_overflows)")#define FLAG DEFINE_bool(enable_slow_asserts, false,"enable asserts that are slow to execute") DEFINE_bool(trace_codegen, false,"print name of functions for which code is generated") DEFINE_bool(print_source, false,"pretty print source code") DEFINE_bool(print_builtin_source, false,"pretty print source code for builtins") DEFINE_bool(print_ast, false,"print source AST") DEFINE_bool(print_builtin_ast, false,"print source AST for builtins") DEFINE_string(stop_at,"","function name where to insert a breakpoint") DEFINE_bool(print_builtin_scopes, false,"print scopes for builtins") DEFINE_bool(print_scopes, false,"print scopes") DEFINE_bool(trace_contexts, false,"trace contexts operations") DEFINE_bool(gc_greedy, false,"perform GC prior to some allocations") DEFINE_bool(gc_verbose, false,"print stuff during garbage collection") DEFINE_bool(heap_stats, false,"report heap statistics before and after GC") DEFINE_bool(code_stats, false,"report code statistics after GC") DEFINE_bool(verify_heap, false,"verify heap pointers before and after GC") DEFINE_bool(print_handles, false,"report handles after GC") DEFINE_bool(print_global_handles, false,"report global handles after GC") DEFINE_bool(trace_ic, false,"trace inline cache state transitions") DEFINE_bool(print_interfaces, false,"print interfaces") DEFINE_bool(print_interface_details, false,"print interface inference details") DEFINE_int(print_interface_depth, 5,"depth for printing interfaces") DEFINE_bool(trace_normalization, false,"prints when objects are turned into dictionaries.") DEFINE_bool(trace_lazy, false,"trace lazy compilation") DEFINE_bool(collect_heap_spill_statistics, false,"report heap spill statistics along with heap_stats ""(requires heap_stats)") DEFINE_bool(trace_isolates, false,"trace isolate state changes") DEFINE_bool(log_state_changes, false,"Log state changes.") DEFINE_bool(regexp_possessive_quantifier, false,"enable possessive quantifier syntax for testing") DEFINE_bool(trace_regexp_bytecodes, false,"trace regexp bytecode execution") DEFINE_bool(trace_regexp_assembler, false,"trace regexp macro assembler calls.")#define FLAG DEFINE_bool(log, false,"Minimal logging (no API, code, GC, suspect, or handles samples).") DEFINE_bool(log_all, false,"Log all events to the log file.") DEFINE_bool(log_runtime, false,"Activate runtime system %Log call.") DEFINE_bool(log_api, false,"Log API events to the log file.") DEFINE_bool(log_code, false,"Log code events to the log file without profiling.") DEFINE_bool(log_gc, false,"Log heap samples on garbage collection for the hp2ps tool.") DEFINE_bool(log_handles, false,"Log global handle events.") DEFINE_bool(log_snapshot_positions, false,"log positions of (de)serialized objects in the snapshot.") DEFINE_bool(log_suspect, false,"Log suspect operations.") DEFINE_bool(prof, false,"Log statistical profiling information (implies --log-code).") DEFINE_bool(prof_auto, true,"Used with --prof, starts profiling automatically") DEFINE_bool(prof_lazy, false,"Used with --prof, only does sampling and logging"" when profiler is active (implies --noprof_auto).") DEFINE_bool(prof_browser_mode, true,"Used with --prof, turns on browser-compatible mode for profiling.") DEFINE_bool(log_regexp, false,"Log regular expression execution.") DEFINE_bool(sliding_state_window, false,"Update sliding state window counters.") DEFINE_string(logfile,"v8.log","Specify the name of the log file.") DEFINE_bool(ll_prof, false,"Enable low-level linux profiler.")#define FLAG DEFINE_bool(trace_elements_transitions, false,"trace elements transitions") DEFINE_bool(print_code_stubs, false,"print code stubs") DEFINE_bool(test_secondary_stub_cache, false,"test secondary stub cache by disabling the primary one") DEFINE_bool(test_primary_stub_cache, false,"test primary stub cache by disabling the secondary one") DEFINE_bool(print_code, false,"print generated code") DEFINE_bool(print_opt_code, false,"print optimized code") DEFINE_bool(print_unopt_code, false,"print unoptimized code before ""printing optimized code based on it") DEFINE_bool(print_code_verbose, false,"print more information for code") DEFINE_bool(print_builtin_code, false,"print generated code for builtins")#47"/Users/thlorenz/dev/dx/v8-perf/build/v8/src/flags.cc"2 namespace{struct Flag{enum FlagType{TYPE_BOOL, TYPE_INT, TYPE_FLOAT, TYPE_STRING, TYPE_ARGS} name
Definition: flags.cc:1349
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
Definition: jsregexp.cc:2384
void set_loop_label(Label *label)
Definition: jsregexp.h:1461
static Handle< T > cast(Handle< S > that)
Definition: handles.h:81
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
Definition: jsregexp.cc:1468
RegExpTree * body()
Definition: ast.h:2500
T Max(T a, T b)
Definition: utils.h:222
virtual void ClearRegisters(int reg_from, int reg_to)=0
Position * positions(int index)
Definition: jsregexp.h:526
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4577
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.cc:3333
int32_t uc32
Definition: globals.h:274
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2165
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1371
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Definition: jsregexp.cc:66
Vector< const char > ToAsciiVector()
Definition: objects.h:6918
static TextElement CharClass(RegExpCharacterClass *char_class)
Definition: jsregexp.cc:851
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2702
void AddFromFollowing(NodeInfo *that)
Definition: jsregexp.h:470
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject, Zone *zone)
Definition: jsregexp.cc:504
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5466
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
Definition: jsregexp.cc:1413
uc16 to()
Definition: jsregexp.h:338
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)=0
static ByteArray * cast(Object *obj)
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, Vector< int > registers, Zone *zone)
Definition: jsregexp.cc:540
Flag flags[]
Definition: flags.cc:1467
static int IrregexpNumberOfCaptures(FixedArray *re)
Definition: jsregexp.cc:471
void set_to(uc16 value)
Definition: jsregexp.h:339
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4962
virtual void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1)=0
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5355
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2836
Handle< FixedArray > LookupRegExp(Handle< String > source, JSRegExp::Flags flags)
void EnsureAnalyzed(RegExpNode *node)
Definition: jsregexp.cc:5571
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2329
void set_flush_budget(int to)
Definition: jsregexp.h:1464
static CharacterRange Everything()
Definition: jsregexp.h:258
virtual bool IsAnchoredAtEnd()
Definition: ast.h:2213
RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:1006
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1082
void AddValue(int value, Zone *zone)
Definition: jsregexp.h:340
bool Get(unsigned value)
Definition: jsregexp.cc:5452
FlagType type_
Definition: flags.cc:1351
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
static int GlobalOffsetsVectorSize(Handle< JSRegExp > regexp, int registers_per_match, int *max_matches)
Definition: jsregexp.cc:524
#define ASSERT(condition)
Definition: checks.h:270
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
Definition: jsregexp.cc:1421
#define ASSERT_GE(v1, v2)
Definition: checks.h:273
void set_characters_preloaded(int count)
Definition: jsregexp.h:1462
const int kPatternTooShortForBoyerMoore
Definition: jsregexp.cc:139
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2209
struct v8::internal::ActionNode::@6::@8 u_increment_register
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
Definition: jsregexp.cc:3285
const char * error_message()
Definition: jsregexp.h:1569
RegExpCompiler * compiler()
Definition: jsregexp.h:1274
void BuildTable(ChoiceNode *node)
Definition: jsregexp.cc:5784
virtual void ReadCurrentPositionFromRegister(int reg)=0
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2339
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
Definition: jsregexp.cc:3664
#define DEFINE_ACCEPT(Type)
Definition: jsregexp.cc:1481
ZoneList< TextElement > * elements()
Definition: jsregexp.h:836
Factory * factory()
Definition: isolate.h:977
void SetAll(int map_number)
Definition: jsregexp.h:1298
struct v8::internal::ActionNode::@6::@12 u_clear_captures
static Code * cast(Object *obj)
static void SetLastSubject(FixedArray *array, String *to)
Definition: jsregexp.h:168
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
Definition: jsregexp.cc:5392
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:281
static Smi * cast(Object *object)
Handle< String > FlattenGetString(Handle< String > string)
Definition: handles.cc:216
static const int kRegExpExecutableMemoryLimit
Definition: jsregexp.h:197
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
Definition: jsregexp.cc:1453
static const int kNoRegister
Definition: jsregexp.cc:971
virtual void AdvanceCurrentPosition(int by)=0
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:48
RegExpMacroAssembler * macro_assembler()
Definition: jsregexp.cc:950
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5162
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:5036
RegExpCharacterClass * u_char_class
Definition: jsregexp.h:424
SmartArrayPointer< char > ToCString(AllowNullsFlag allow_nulls, RobustnessFlag robustness_flag, int offset, int length, int *length_output=0)
Definition: objects.cc:6161
RegExpNode * FilterSuccessor(int depth)
Definition: jsregexp.cc:2694
static void SetLastInput(FixedArray *array, String *to)
Definition: jsregexp.h:172
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5709
virtual Interval CaptureRegisters()
Definition: ast.h:2218
static const int kRegWxpCompiledLimit
Definition: jsregexp.h:198
void Advance(int by, bool ascii)
Definition: jsregexp.cc:2621
void Merge(QuickCheckDetails *other, int from_index)
Definition: jsregexp.cc:2642
Label * loop_label()
Definition: jsregexp.h:1441
struct v8::internal::ActionNode::@6::@11 u_empty_match_check
#define UNREACHABLE()
Definition: checks.h:50
RegExpExpansionLimiter(RegExpCompiler *compiler, int factor)
Definition: jsregexp.cc:4720
void IncreaseTotalRegexpCodeGenerated(int size)
Definition: heap.h:1499
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2235
T * start() const
Definition: utils.h:389
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4194
STATIC_ASSERT((FixedDoubleArray::kHeaderSize &kDoubleAlignmentMask)==0)
union v8::internal::TextElement::@5 data
static const unsigned kFirstLimit
Definition: jsregexp.h:304
void AddAlternative(GuardedAlternative node)
Definition: jsregexp.h:1055
static void SetLastCaptureCount(FixedArray *array, int to)
Definition: jsregexp.h:164
void fail(const char *error_message)
Definition: jsregexp.h:1573
AlternativeGeneration * at(int i)
Definition: jsregexp.cc:3477
virtual void CheckCharacterGT(uc16 limit, Label *on_greater)=0
void Call(uc32 from, DispatchTable::Entry entry)
Definition: jsregexp.cc:5805
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5688
void set_bound_checked_up_to(int to)
Definition: jsregexp.h:1463
void set_backtrack(Label *backtrack)
Definition: jsregexp.h:1459
static const int kInfinity
Definition: ast.h:2206
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2867
virtual void Accept(NodeVisitor *visitor)
Definition: jsregexp.cc:1489
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4947
v8::Handle< v8::Object > bottom
Definition: test-api.cc:1625
MemoryAllocator * memory_allocator()
Definition: isolate.h:830
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2764
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2298
struct v8::internal::ActionNode::@6::@10 u_submatch
void AddWork(RegExpNode *node)
Definition: jsregexp.cc:944
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5247
QuickCheckDetails * quick_check_performed()
Definition: jsregexp.h:1446
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)=0
void AddGuard(Guard *guard, Zone *zone)
Definition: jsregexp.cc:1395
RegExpTree * body()
Definition: ast.h:2469
static int StartRegister(int index)
Definition: ast.h:2471
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo, Zone *zone)
Definition: jsregexp.cc:617
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:656
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5839
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2287
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.h:1503
ZoneList< RegExpTree * > * alternatives()
Definition: ast.h:2242
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5181
struct v8::internal::ActionNode::@6::@7 u_store_register
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4072
virtual void ReadStackPointerFromRegister(int reg)=0
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
Definition: jsregexp.cc:491
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, Label *on_no_match)=0
TriBool at_start()
Definition: jsregexp.h:1438
Label * backtrack()
Definition: jsregexp.h:1440
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:216
const Register pc
QuickCheckDetails quick_check_details
Definition: jsregexp.cc:3453
Vector< const uc16 > ToUC16Vector()
Definition: objects.h:6924
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.h:614
const int kMaxLookaheadForBoyerMoore
Definition: jsregexp.cc:136
Zone * zone() const
Definition: jsregexp.h:648
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
Definition: jsregexp.h:1220
int length() const
Definition: utils.h:383
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo, Zone *zone)
Definition: jsregexp.cc:231
#define ASSERT_LE(v1, v2)
Definition: checks.h:275
virtual void CheckNotAtStart(Label *on_not_at_start)=0
void add_action(DeferredAction *new_action)
Definition: jsregexp.h:1454
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3398
bool has_pending_exception()
Definition: isolate.h:554
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
Definition: jsregexp.cc:3544
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
Definition: jsregexp.h:630
static const int kIrregexpCaptureCountIndex
Definition: objects.h:6467
void AddRange(CharacterRange range)
Definition: jsregexp.h:1520
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:603
void set_being_calculated(bool b)
Definition: jsregexp.h:1081
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
Definition: jsregexp.cc:256
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4892
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4683
int characters_preloaded()
Definition: jsregexp.h:1443
DeferredAction * actions()
Definition: jsregexp.h:1418
static const int kNodeIsTooComplexForGreedyLoops
Definition: jsregexp.h:588
uc16 from()
Definition: jsregexp.h:337
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2220
void PutRegExp(Handle< String > source, JSRegExp::Flags flags, Handle< FixedArray > data)
Vector< const char > CStrVector(const char *data)
Definition: utils.h:525
DispatchTable * GetTable(bool ignore_case)
Definition: jsregexp.cc:869
static int IrregexpMaxRegisterCount(FixedArray *re)
Definition: jsregexp.cc:460
void CountCharacter(int character)
Definition: jsregexp.cc:887
RegExpNode * on_success()
Definition: jsregexp.h:707
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
Definition: jsregexp.cc:466
BoyerMooreLookahead * bm_info(bool not_at_start)
Definition: jsregexp.h:644
#define ASSERT_LT(v1, v2)
Definition: checks.h:274
static const int kLastMatchOverhead
Definition: jsregexp.h:147
void set_from(uc16 value)
Definition: jsregexp.h:263
FlatContent GetFlatContent()
Definition: objects.cc:6119
virtual void AppendToText(RegExpText *text, Zone *zone)
Definition: jsregexp.cc:833
#define ISOLATE
Definition: isolate.h:1410
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4956
int bound_checked_up_to()
Definition: jsregexp.h:1444
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:589
virtual bool CheckSpecialCharacterClass(uc16 type, Label *on_no_match)
static const int kMaxRecursion
Definition: jsregexp.cc:953
void AddContinueAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3391
struct v8::internal::ActionNode::@6::@9 u_position_register
bool GetStoredPosition(int reg, int *cp_offset)
Definition: jsregexp.cc:1095
static AssertionNode * AfterNewline(RegExpNode *on_success)
Definition: jsregexp.h:892
int from() const
Definition: jsregexp.h:693
FrequencyCollator * frequency_collator()
Definition: jsregexp.cc:962
void Sort(int(*cmp)(const T *x, const T *y))
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
Definition: jsregexp.h:889
bool is_null() const
Definition: handles.h:87
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset)=0
static const int kMaxWidth
Definition: unicode.h:307
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.cc:5643
static const int kAtomPatternIndex
Definition: objects.h:6443
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2459
virtual void Bind(Label *label)=0
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
int Count(int map_number)
Definition: jsregexp.h:1276
#define UNIMPLEMENTED()
Definition: checks.h:48
uint16_t uc16
Definition: globals.h:273
void MakeCaseIndependent(bool is_ascii)
Definition: jsregexp.cc:3304
virtual void CheckCharacterLT(uc16 limit, Label *on_less)=0
ZoneList< GuardedAlternative > * alternatives()
Definition: jsregexp.h:1058
virtual bool IsAnchoredAtStart()
Definition: ast.h:2212
static int code_index(bool is_ascii)
Definition: objects.h:6409
static void PrintError(const char *format,...)
static Handle< T > null()
Definition: handles.h:86
int SearchString(Isolate *isolate, Vector< const SubjectChar > subject, Vector< const PatternChar > pattern, int start_index)
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2685
void SetRest(int from_map)
Definition: jsregexp.h:1302
Vector< const uc16 > data()
Definition: ast.h:2374
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
Definition: flags.cc:274
void ForEach(Callback *callback)
Definition: jsregexp.h:371
virtual void AppendToText(RegExpText *text, Zone *zone)
Definition: jsregexp.cc:823
OutSet * Extend(unsigned value, Zone *zone)
Definition: jsregexp.cc:5421
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
static const uchar kBadChar
Definition: unicode.h:162
void USE(T)
Definition: globals.h:303
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:5018
ZoneList< GuardedAlternative > * alternatives_
Definition: jsregexp.h:1087
static const unsigned kMaxAsciiCharCodeU
Definition: objects.h:7108
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping true
Definition: flags.cc:157
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
Definition: jsregexp.cc:111
virtual void IfRegisterEqPos(int reg, Label *if_eq)=0
virtual int GreedyLoopTextLength()
Definition: jsregexp.cc:3323
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:481
static FixedArray * cast(Object *obj)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3046
void set_current_expansion_factor(int value)
Definition: jsregexp.cc:965
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
Definition: jsregexp.cc:1432
static const int kCodeOffset
Definition: jsregexp.cc:948
void Set(int map_number, int character)
Definition: jsregexp.h:1282
static const int kImplementationOffset
Definition: jsregexp.cc:946
static int IrregexpNumberOfRegisters(FixedArray *re)
Definition: jsregexp.cc:476
virtual void CheckBitInTable(Handle< ByteArray > table, Label *on_bit_set)=0
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2814
unibrow::Mapping< unibrow::CanonicalizationRange > * jsregexp_canonrange()
Definition: isolate.h:881
static const int kCompilationErrorValue
Definition: objects.h:6494
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:38
virtual void AppendToText(RegExpText *text, Zone *zone)
Definition: jsregexp.cc:828
#define FACTORY
Definition: isolate.h:1409
Definition: jsregexp.h:332
bool Rationalize(bool ascii)
Definition: jsregexp.cc:2360
Object * get(int index)
Definition: objects-inl.h:1675
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
void AddLoopAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3384
virtual void Accept(NodeVisitor *visitor)=0
RegExpNode * loop_node()
Definition: jsregexp.h:1167
int EatsAtLeastHelper(int still_to_find, int recursion_depth, RegExpNode *ignore_this_node, bool not_at_start)
Definition: jsregexp.cc:2310
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags)
Definition: jsregexp.cc:168
static TextElement Atom(RegExpAtom *atom)
Definition: jsregexp.cc:844
ZoneList< Guard * > * guards()
Definition: jsregexp.h:1034
static const int kFillInBMBudget
Definition: jsregexp.h:602
static int EndRegister(int index)
Definition: ast.h:2472
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3810
#define FOR_EACH_NODE_TYPE(VISIT)
Definition: jsregexp.h:385
T Min(T a, T b)
Definition: utils.h:229
void Call(uc16 from, DispatchTable::Entry entry)
Definition: jsregexp.cc:5152
virtual int max_match()=0
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
Definition: jsregexp.cc:1283
static const int kUninitializedValue
Definition: objects.h:6490
ZoneList< RegExpTree * > * nodes()
Definition: ast.h:2263
void set_quick_check_performed(QuickCheckDetails *d)
Definition: jsregexp.h:1465
void check(i::Vector< const char > string)
const int kSampleSize
Definition: process.cc:570
void InvalidateCurrentCharacter()
Definition: jsregexp.cc:3280
#define ARRAY_SIZE(a)
Definition: globals.h:295
virtual void CheckNotCharacter(unsigned c, Label *on_not_equal)=0
RegExpTree * body()
Definition: ast.h:2439
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5079
OutSet * out_set()
Definition: jsregexp.h:343
static int saved_code_index(bool is_ascii)
Definition: objects.h:6417
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4704
int Frequency(int in_character)
Definition: jsregexp.cc:895
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
RecursionCheck(RegExpCompiler *compiler)
Definition: jsregexp.cc:990
FlagType type() const
Definition: flags.cc:1358
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:251
unsigned int uchar
Definition: unicode.h:40
static AssertionNode * AtEnd(RegExpNode *on_success)
Definition: jsregexp.h:880
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: jsregexp.cc:4689
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2251
void AddElement(TextElement elm, Zone *zone)
Definition: ast.h:2393
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_not_equal)=0
virtual Handle< HeapObject > GetCode(Handle< String > source)=0
AddDispatchRange(DispatchTableConstructor *constructor)
Definition: jsregexp.cc:5797
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.cc:3031
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
void set_stop_node(RegExpNode *node)
Definition: jsregexp.h:1460
virtual void PushBacktrack(Label *label)=0
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5890
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2747
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2849
static void SetCapture(FixedArray *array, int index, int to)
Definition: jsregexp.h:176
static AssertionNode * AtBoundary(RegExpNode *on_success)
Definition: jsregexp.h:886
bool mentions_reg(int reg)
Definition: jsregexp.cc:1084