v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
jsregexp.h
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_JSREGEXP_H_
29 #define V8_JSREGEXP_H_
30 
31 #include "allocation.h"
32 #include "assembler.h"
33 #include "zone-inl.h"
34 
35 namespace v8 {
36 namespace internal {
37 
38 class NodeVisitor;
39 class RegExpCompiler;
40 class RegExpMacroAssembler;
41 class RegExpNode;
42 class RegExpTree;
43 class BoyerMooreLookahead;
44 
45 class RegExpImpl {
46  public:
47  // Whether V8 is compiled with native regexp support or not.
48  static bool UsesNativeRegExp() {
49 #ifdef V8_INTERPRETED_REGEXP
50  return false;
51 #else
52  return true;
53 #endif
54  }
55 
56  // Creates a regular expression literal in the old space.
57  // This function calls the garbage collector if necessary.
59  Handle<String> pattern,
61  bool* has_pending_exception);
62 
63  // Returns a string representation of a regular expression.
64  // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
65  // This function calls the garbage collector if necessary.
67 
68  // Parses the RegExp pattern and prepares the JSRegExp object with
69  // generic data and choice of implementation - as well as what
70  // the implementation wants to store in the data field.
71  // Returns false if compilation fails.
73  Handle<String> pattern,
75 
76  // See ECMA-262 section 15.10.6.2.
77  // This function calls the garbage collector if necessary.
78  static Handle<Object> Exec(Handle<JSRegExp> regexp,
79  Handle<String> subject,
80  int index,
81  Handle<JSArray> lastMatchInfo,
82  Zone* zone);
83 
84  // Prepares a JSRegExp object with Irregexp-specific data.
85  static void IrregexpInitialize(Handle<JSRegExp> re,
86  Handle<String> pattern,
88  int capture_register_count);
89 
90 
91  static void AtomCompile(Handle<JSRegExp> re,
92  Handle<String> pattern,
94  Handle<String> match_pattern);
95 
97  Handle<String> subject,
98  int index,
99  Handle<JSArray> lastMatchInfo);
100 
102 
103  // Prepare a RegExp for being executed one or more times (using
104  // IrregexpExecOnce) on the subject.
105  // This ensures that the regexp is compiled for the subject, and that
106  // the subject is flat.
107  // Returns the number of integer spaces required by IrregexpExecOnce
108  // as its "registers" argument. If the regexp cannot be compiled,
109  // an exception is set as pending, and this function returns negative.
110  static int IrregexpPrepare(Handle<JSRegExp> regexp,
111  Handle<String> subject,
112  Zone* zone);
113 
114  // Calculate the size of offsets vector for the case of global regexp
115  // and the number of matches this vector is able to store.
116  static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
117  int registers_per_match,
118  int* max_matches);
119 
120  // Execute a regular expression on the subject, starting from index.
121  // If matching succeeds, return the number of matches. This can be larger
122  // than one in the case of global regular expressions.
123  // The captures and subcaptures are stored into the registers vector.
124  // If matching fails, returns RE_FAILURE.
125  // If execution fails, sets a pending exception and returns RE_EXCEPTION.
126  static int IrregexpExecRaw(Handle<JSRegExp> regexp,
127  Handle<String> subject,
128  int index,
129  Vector<int> registers,
130  Zone* zone);
131 
132  // Execute an Irregexp bytecode pattern.
133  // On a successful match, the result is a JSArray containing
134  // captured positions. On a failure, the result is the null value.
135  // Returns an empty handle in case of an exception.
137  Handle<String> subject,
138  int index,
139  Handle<JSArray> lastMatchInfo,
140  Zone* zone);
141 
142  // Array index in the lastMatchInfo array.
143  static const int kLastCaptureCount = 0;
144  static const int kLastSubject = 1;
145  static const int kLastInput = 2;
146  static const int kFirstCapture = 3;
147  static const int kLastMatchOverhead = 3;
148 
149  // Direct offset into the lastMatchInfo array.
150  static const int kLastCaptureCountOffset =
152  static const int kLastSubjectOffset =
154  static const int kLastInputOffset =
156  static const int kFirstCaptureOffset =
158 
159  // Used to access the lastMatchInfo array.
160  static int GetCapture(FixedArray* array, int index) {
161  return Smi::cast(array->get(index + kFirstCapture))->value();
162  }
163 
164  static void SetLastCaptureCount(FixedArray* array, int to) {
165  array->set(kLastCaptureCount, Smi::FromInt(to));
166  }
167 
168  static void SetLastSubject(FixedArray* array, String* to) {
169  array->set(kLastSubject, to);
170  }
171 
172  static void SetLastInput(FixedArray* array, String* to) {
173  array->set(kLastInput, to);
174  }
175 
176  static void SetCapture(FixedArray* array, int index, int to) {
177  array->set(index + kFirstCapture, Smi::FromInt(to));
178  }
179 
180  static int GetLastCaptureCount(FixedArray* array) {
181  return Smi::cast(array->get(kLastCaptureCount))->value();
182  }
183 
184  // For acting on the JSRegExp data FixedArray.
185  static int IrregexpMaxRegisterCount(FixedArray* re);
186  static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
187  static int IrregexpNumberOfCaptures(FixedArray* re);
188  static int IrregexpNumberOfRegisters(FixedArray* re);
189  static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
190  static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
191 
192  // Limit the space regexps take up on the heap. In order to limit this we
193  // would like to keep track of the amount of regexp code on the heap. This
194  // is not tracked, however. As a conservative approximation we track the
195  // total regexp code compiled including code that has subsequently been freed
196  // and the total executable memory at any point.
197  static const int kRegExpExecutableMemoryLimit = 16 * MB;
198  static const int kRegWxpCompiledLimit = 1 * MB;
199 
200  private:
201  static String* last_ascii_string_;
202  static String* two_byte_cached_string_;
203 
204  static bool CompileIrregexp(
205  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii,
206  Zone* zone);
207  static inline bool EnsureCompiledIrregexp(
208  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii,
209  Zone* zone);
210 
211 
212  // Set the subject cache. The previous string buffer is not deleted, so the
213  // caller should ensure that it doesn't leak.
214  static void SetSubjectCache(String* subject,
215  char* utf8_subject,
216  int uft8_length,
217  int character_position,
218  int utf8_position);
219 
220  // A one element cache of the last utf8_subject string and its length. The
221  // subject JS String object is cached in the heap. We also cache a
222  // translation between position and utf8 position.
223  static char* utf8_subject_cache_;
224  static int utf8_length_cache_;
225  static int utf8_position_;
226  static int character_position_;
227 };
228 
229 
230 // Represents the location of one element relative to the intersection of
231 // two sets. Corresponds to the four areas of a Venn diagram.
237 };
238 
239 
240 // Represents code units in the range from from_ to to_, both ends are
241 // inclusive.
243  public:
244  CharacterRange() : from_(0), to_(0) { }
245  // For compatibility with the CHECK_OK macro
246  CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
247  CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
248  static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
249  Zone* zone);
251  static inline CharacterRange Singleton(uc16 value) {
252  return CharacterRange(value, value);
253  }
254  static inline CharacterRange Range(uc16 from, uc16 to) {
255  ASSERT(from <= to);
256  return CharacterRange(from, to);
257  }
258  static inline CharacterRange Everything() {
259  return CharacterRange(0, 0xFFFF);
260  }
261  bool Contains(uc16 i) { return from_ <= i && i <= to_; }
262  uc16 from() const { return from_; }
263  void set_from(uc16 value) { from_ = value; }
264  uc16 to() const { return to_; }
265  void set_to(uc16 value) { to_ = value; }
266  bool is_valid() { return from_ <= to_; }
267  bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
268  bool IsSingleton() { return (from_ == to_); }
269  void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii,
270  Zone* zone);
271  static void Split(ZoneList<CharacterRange>* base,
272  Vector<const int> overlay,
273  ZoneList<CharacterRange>** included,
274  ZoneList<CharacterRange>** excluded,
275  Zone* zone);
276  // Whether a range list is in canonical form: Ranges ordered by from value,
277  // and ranges non-overlapping and non-adjacent.
278  static bool IsCanonical(ZoneList<CharacterRange>* ranges);
279  // Convert range list to canonical form. The characters covered by the ranges
280  // will still be the same, but no character is in more than one range, and
281  // adjacent ranges are merged. The resulting list may be shorter than the
282  // original, but cannot be longer.
283  static void Canonicalize(ZoneList<CharacterRange>* ranges);
284  // Negate the contents of a character range in canonical form.
285  static void Negate(ZoneList<CharacterRange>* src,
287  Zone* zone);
288  static const int kStartMarker = (1 << 24);
289  static const int kPayloadMask = (1 << 24) - 1;
290 
291  private:
292  uc16 from_;
293  uc16 to_;
294 };
295 
296 
297 // A set of unsigned integers that behaves especially well on small
298 // integers (< 32). May do zone-allocation.
299 class OutSet: public ZoneObject {
300  public:
301  OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
302  OutSet* Extend(unsigned value, Zone* zone);
303  bool Get(unsigned value);
304  static const unsigned kFirstLimit = 32;
305 
306  private:
307  // Destructively set a value in this set. In most cases you want
308  // to use Extend instead to ensure that only one instance exists
309  // that contains the same values.
310  void Set(unsigned value, Zone* zone);
311 
312  // The successors are a list of sets that contain the same values
313  // as this set and the one more value that is not present in this
314  // set.
315  ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
316 
317  OutSet(uint32_t first, ZoneList<unsigned>* remaining)
318  : first_(first), remaining_(remaining), successors_(NULL) { }
319  uint32_t first_;
320  ZoneList<unsigned>* remaining_;
321  ZoneList<OutSet*>* successors_;
322  friend class Trace;
323 };
324 
325 
326 // A mapping from integers, specified as ranges, to a set of integers.
327 // Used for mapping character ranges to choices.
328 class DispatchTable : public ZoneObject {
329  public:
330  explicit DispatchTable(Zone* zone) : tree_(zone) { }
331 
332  class Entry {
333  public:
334  Entry() : from_(0), to_(0), out_set_(NULL) { }
336  : from_(from), to_(to), out_set_(out_set) { }
337  uc16 from() { return from_; }
338  uc16 to() { return to_; }
339  void set_to(uc16 value) { to_ = value; }
340  void AddValue(int value, Zone* zone) {
341  out_set_ = out_set_->Extend(value, zone);
342  }
343  OutSet* out_set() { return out_set_; }
344  private:
345  uc16 from_;
346  uc16 to_;
347  OutSet* out_set_;
348  };
349 
350  class Config {
351  public:
352  typedef uc16 Key;
353  typedef Entry Value;
354  static const uc16 kNoKey;
355  static const Entry NoValue() { return Value(); }
356  static inline int Compare(uc16 a, uc16 b) {
357  if (a == b)
358  return 0;
359  else if (a < b)
360  return -1;
361  else
362  return 1;
363  }
364  };
365 
366  void AddRange(CharacterRange range, int value, Zone* zone);
367  OutSet* Get(uc16 value);
368  void Dump();
369 
370  template <typename Callback>
371  void ForEach(Callback* callback) {
372  return tree()->ForEach(callback);
373  }
374 
375  private:
376  // There can't be a static empty set since it allocates its
377  // successors in a zone and caches them.
378  OutSet* empty() { return &empty_; }
379  OutSet empty_;
380  ZoneSplayTree<Config>* tree() { return &tree_; }
381  ZoneSplayTree<Config> tree_;
382 };
383 
384 
385 #define FOR_EACH_NODE_TYPE(VISIT) \
386  VISIT(End) \
387  VISIT(Action) \
388  VISIT(Choice) \
389  VISIT(BackReference) \
390  VISIT(Assertion) \
391  VISIT(Text)
392 
393 
394 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
395  VISIT(Disjunction) \
396  VISIT(Alternative) \
397  VISIT(Assertion) \
398  VISIT(CharacterClass) \
399  VISIT(Atom) \
400  VISIT(Quantifier) \
401  VISIT(Capture) \
402  VISIT(Lookahead) \
403  VISIT(BackReference) \
404  VISIT(Empty) \
405  VISIT(Text)
406 
407 
408 #define FORWARD_DECLARE(Name) class RegExp##Name;
410 #undef FORWARD_DECLARE
411 
412 
413 class TextElement {
414  public:
417  explicit TextElement(Type t) : type(t), cp_offset(-1) { }
418  static TextElement Atom(RegExpAtom* atom);
419  static TextElement CharClass(RegExpCharacterClass* char_class);
420  int length();
422  union {
425  } data;
427 };
428 
429 
430 class Trace;
431 
432 
433 struct NodeInfo {
440  at_end(false),
441  visited(false),
443 
444  // Returns true if the interests and assumptions of this node
445  // matches the given one.
446  bool Matches(NodeInfo* that) {
447  return (at_end == that->at_end) &&
451  }
452 
453  // Updates the interests of this node given the interests of the
454  // node preceding it.
456  at_end |= that->at_end;
460  }
461 
462  bool HasLookbehind() {
463  return follows_word_interest ||
466  }
467 
468  // Sets the interests of this node to include the interests of the
469  // following node.
474  }
475 
477  being_analyzed = false;
478  been_analyzed = false;
479  }
480 
481  bool being_analyzed: 1;
482  bool been_analyzed: 1;
483 
484  // These bits are set of this node has to know what the preceding
485  // character was.
489 
490  bool at_end: 1;
491  bool visited: 1;
493 };
494 
495 
496 // Details of a quick mask-compare check that can look ahead in the
497 // input stream.
499  public:
501  : characters_(0),
502  mask_(0),
503  value_(0),
504  cannot_match_(false) { }
506  : characters_(characters),
507  mask_(0),
508  value_(0),
509  cannot_match_(false) { }
510  bool Rationalize(bool ascii);
511  // Merge in the information from another branch of an alternation.
512  void Merge(QuickCheckDetails* other, int from_index);
513  // Advance the current position by some amount.
514  void Advance(int by, bool ascii);
515  void Clear();
516  bool cannot_match() { return cannot_match_; }
517  void set_cannot_match() { cannot_match_ = true; }
518  struct Position {
523  };
524  int characters() { return characters_; }
525  void set_characters(int characters) { characters_ = characters; }
526  Position* positions(int index) {
527  ASSERT(index >= 0);
528  ASSERT(index < characters_);
529  return positions_ + index;
530  }
531  uint32_t mask() { return mask_; }
532  uint32_t value() { return value_; }
533 
534  private:
535  // How many characters do we have quick check information from. This is
536  // the same for all branches of a choice node.
537  int characters_;
538  Position positions_[4];
539  // These values are the condensate of the above array after Rationalize().
540  uint32_t mask_;
541  uint32_t value_;
542  // If set to true, there is no way this quick check can match at all.
543  // E.g., if it requires to be at the start of the input, and isn't.
544  bool cannot_match_;
545 };
546 
547 
549 
550 
551 class RegExpNode: public ZoneObject {
552  public:
553  explicit RegExpNode(Zone* zone)
554  : replacement_(NULL), trace_count_(0), zone_(zone) {
555  bm_info_[0] = bm_info_[1] = NULL;
556  }
557  virtual ~RegExpNode();
558  virtual void Accept(NodeVisitor* visitor) = 0;
559  // Generates a goto to this node or actually generates the code at this point.
560  virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
561  // How many characters must this node consume at a minimum in order to
562  // succeed. If we have found at least 'still_to_find' characters that
563  // must be consumed there is no need to ask any following nodes whether
564  // they are sure to eat any more characters. The not_at_start argument is
565  // used to indicate that we know we are not at the start of the input. In
566  // this case anchored branches will always fail and can be ignored when
567  // determining how many characters are consumed on success.
568  virtual int EatsAtLeast(int still_to_find,
569  int recursion_depth,
570  bool not_at_start) = 0;
571  // Emits some quick code that checks whether the preloaded characters match.
572  // Falls through on certain failure, jumps to the label on possible success.
573  // If the node cannot make a quick check it does nothing and returns false.
574  bool EmitQuickCheck(RegExpCompiler* compiler,
575  Trace* trace,
576  bool preload_has_checked_bounds,
577  Label* on_possible_success,
578  QuickCheckDetails* details_return,
579  bool fall_through_on_failure);
580  // For a given number of characters this returns a mask and a value. The
581  // next n characters are anded with the mask and compared with the value.
582  // A comparison failure indicates the node cannot match the next n characters.
583  // A comparison success indicates the node may match.
584  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
585  RegExpCompiler* compiler,
586  int characters_filled_in,
587  bool not_at_start) = 0;
588  static const int kNodeIsTooComplexForGreedyLoops = -1;
590  // Only returns the successor for a text node of length 1 that matches any
591  // character and that has no guards on it.
593  RegExpCompiler* compiler) {
594  return NULL;
595  }
596 
597  // Collects information on the possible code units (mod 128) that can match if
598  // we look forward. This is used for a Boyer-Moore-like string searching
599  // implementation. TODO(erikcorry): This should share more code with
600  // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
601  // the number of nodes we are willing to look at in order to create this data.
602  static const int kFillInBMBudget = 200;
603  virtual void FillInBMInfo(int offset,
604  int recursion_depth,
605  int budget,
607  bool not_at_start) {
608  UNREACHABLE();
609  }
610 
611  // If we know that the input is ASCII then there are some nodes that can
612  // never match. This method returns a node that can be substituted for
613  // itself, or NULL if the node can never match.
614  virtual RegExpNode* FilterASCII(int depth) { return this; }
615  // Helper for FilterASCII.
617  ASSERT(info()->replacement_calculated);
618  return replacement_;
619  }
621  info()->replacement_calculated = true;
623  return replacement; // For convenience.
624  }
625 
626  // We want to avoid recalculating the lookahead info, so we store it on the
627  // node. Only info that is for this node is stored. We can tell that the
628  // info is for this node when offset == 0, so the information is calculated
629  // relative to this node.
630  void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
631  if (offset == 0) set_bm_info(not_at_start, bm);
632  }
633 
634  Label* label() { return &label_; }
635  // If non-generic code is generated for a node (i.e. the node is not at the
636  // start of the trace) then it cannot be reused. This variable sets a limit
637  // on how often we allow that to happen before we insist on starting a new
638  // trace and generating generic code for a node that can be reused by flushing
639  // the deferred actions in the current trace and generating a goto.
640  static const int kMaxCopiesCodeGenerated = 10;
641 
642  NodeInfo* info() { return &info_; }
643 
644  BoyerMooreLookahead* bm_info(bool not_at_start) {
645  return bm_info_[not_at_start ? 1 : 0];
646  }
647 
648  Zone* zone() const { return zone_; }
649 
650  protected:
653 
654  LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
655 
656  void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
657  bm_info_[not_at_start ? 1 : 0] = bm;
658  }
659 
660  private:
661  static const int kFirstCharBudget = 10;
662  Label label_;
663  NodeInfo info_;
664  // This variable keeps track of how many times code has been generated for
665  // this node (in different traces). We don't keep track of where the
666  // generated code is located unless the code is generated at the start of
667  // a trace, in which case it is generic and can be reused by flushing the
668  // deferred operations in the current trace and generating a goto.
669  int trace_count_;
670  BoyerMooreLookahead* bm_info_[2];
671 
672  Zone* zone_;
673 };
674 
675 
676 // A simple closed interval.
677 class Interval {
678  public:
679  Interval() : from_(kNone), to_(kNone) { }
680  Interval(int from, int to) : from_(from), to_(to) { }
682  if (that.from_ == kNone)
683  return *this;
684  else if (from_ == kNone)
685  return that;
686  else
687  return Interval(Min(from_, that.from_), Max(to_, that.to_));
688  }
689  bool Contains(int value) {
690  return (from_ <= value) && (value <= to_);
691  }
692  bool is_empty() { return from_ == kNone; }
693  int from() const { return from_; }
694  int to() const { return to_; }
695  static Interval Empty() { return Interval(); }
696  static const int kNone = -1;
697  private:
698  int from_;
699  int to_;
700 };
701 
702 
703 class SeqRegExpNode: public RegExpNode {
704  public:
706  : RegExpNode(on_success->zone()), on_success_(on_success) { }
707  RegExpNode* on_success() { return on_success_; }
708  void set_on_success(RegExpNode* node) { on_success_ = node; }
709  virtual RegExpNode* FilterASCII(int depth);
710  virtual void FillInBMInfo(int offset,
711  int recursion_depth,
712  int budget,
714  bool not_at_start) {
715  on_success_->FillInBMInfo(
716  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
717  if (offset == 0) set_bm_info(not_at_start, bm);
718  }
719 
720  protected:
721  RegExpNode* FilterSuccessor(int depth);
722 
723  private:
724  RegExpNode* on_success_;
725 };
726 
727 
728 class ActionNode: public SeqRegExpNode {
729  public:
730  enum Type {
738  };
739  static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
741  static ActionNode* StorePosition(int reg,
742  bool is_capture,
745  static ActionNode* BeginSubmatch(int stack_pointer_reg,
746  int position_reg,
748  static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
749  int restore_reg,
750  int clear_capture_count,
751  int clear_capture_from,
755  int repetition_limit,
757  virtual void Accept(NodeVisitor* visitor);
758  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
759  virtual int EatsAtLeast(int still_to_find,
760  int recursion_depth,
761  bool not_at_start);
763  RegExpCompiler* compiler,
764  int filled_in,
765  bool not_at_start) {
767  details, compiler, filled_in, not_at_start);
768  }
769  virtual void FillInBMInfo(int offset,
770  int recursion_depth,
771  int budget,
773  bool not_at_start);
774  Type type() { return type_; }
775  // TODO(erikcorry): We should allow some action nodes in greedy loops.
777 
778  private:
779  union {
780  struct {
781  int reg;
782  int value;
784  struct {
785  int reg;
787  struct {
788  int reg;
791  struct {
796  } u_submatch;
797  struct {
802  struct {
804  int range_to;
806  } data_;
807  ActionNode(Type type, RegExpNode* on_success)
808  : SeqRegExpNode(on_success),
809  type_(type) { }
810  Type type_;
811  friend class DotPrinter;
812 };
813 
814 
815 class TextNode: public SeqRegExpNode {
816  public:
819  : SeqRegExpNode(on_success),
820  elms_(elms) { }
823  : SeqRegExpNode(on_success),
824  elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
825  elms_->Add(TextElement::CharClass(that), zone());
826  }
827  virtual void Accept(NodeVisitor* visitor);
828  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
829  virtual int EatsAtLeast(int still_to_find,
830  int recursion_depth,
831  bool not_at_start);
832  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
833  RegExpCompiler* compiler,
834  int characters_filled_in,
835  bool not_at_start);
836  ZoneList<TextElement>* elements() { return elms_; }
837  void MakeCaseIndependent(bool is_ascii);
838  virtual int GreedyLoopTextLength();
840  RegExpCompiler* compiler);
841  virtual void FillInBMInfo(int offset,
842  int recursion_depth,
843  int budget,
845  bool not_at_start);
846  void CalculateOffsets();
847  virtual RegExpNode* FilterASCII(int depth);
848 
849  private:
850  enum TextEmitPassType {
851  NON_ASCII_MATCH, // Check for characters that can't match.
852  SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
853  NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
854  CASE_CHARACTER_MATCH, // Case-independent single character check.
855  CHARACTER_CLASS_MATCH // Character class.
856  };
857  static bool SkipPass(int pass, bool ignore_case);
858  static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
859  static const int kLastPass = CHARACTER_CLASS_MATCH;
860  void TextEmitPass(RegExpCompiler* compiler,
861  TextEmitPassType pass,
862  bool preloaded,
863  Trace* trace,
864  bool first_element_checked,
865  int* checked_up_to);
866  int Length();
867  ZoneList<TextElement>* elms_;
868 };
869 
870 
872  public:
879  };
881  return new(on_success->zone()) AssertionNode(AT_END, on_success);
882  }
884  return new(on_success->zone()) AssertionNode(AT_START, on_success);
885  }
887  return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
888  }
890  return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
891  }
893  return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
894  }
895  virtual void Accept(NodeVisitor* visitor);
896  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
897  virtual int EatsAtLeast(int still_to_find,
898  int recursion_depth,
899  bool not_at_start);
900  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
901  RegExpCompiler* compiler,
902  int filled_in,
903  bool not_at_start);
904  virtual void FillInBMInfo(int offset,
905  int recursion_depth,
906  int budget,
908  bool not_at_start);
909  AssertionNodeType type() { return type_; }
910  void set_type(AssertionNodeType type) { type_ = type; }
911 
912  private:
913  void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
914  enum IfPrevious { kIsNonWord, kIsWord };
915  void BacktrackIfPrevious(RegExpCompiler* compiler,
916  Trace* trace,
917  IfPrevious backtrack_if_previous);
918  AssertionNode(AssertionNodeType t, RegExpNode* on_success)
919  : SeqRegExpNode(on_success), type_(t) { }
920  AssertionNodeType type_;
921 };
922 
923 
925  public:
926  BackReferenceNode(int start_reg,
927  int end_reg,
929  : SeqRegExpNode(on_success),
930  start_reg_(start_reg),
931  end_reg_(end_reg) { }
932  virtual void Accept(NodeVisitor* visitor);
933  int start_register() { return start_reg_; }
934  int end_register() { return end_reg_; }
935  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
936  virtual int EatsAtLeast(int still_to_find,
937  int recursion_depth,
938  bool not_at_start);
940  RegExpCompiler* compiler,
941  int characters_filled_in,
942  bool not_at_start) {
943  return;
944  }
945  virtual void FillInBMInfo(int offset,
946  int recursion_depth,
947  int budget,
949  bool not_at_start);
950 
951  private:
952  int start_reg_;
953  int end_reg_;
954 };
955 
956 
957 class EndNode: public RegExpNode {
958  public:
960  explicit EndNode(Action action, Zone* zone)
961  : RegExpNode(zone), action_(action) { }
962  virtual void Accept(NodeVisitor* visitor);
963  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
964  virtual int EatsAtLeast(int still_to_find,
965  int recursion_depth,
966  bool not_at_start) { return 0; }
968  RegExpCompiler* compiler,
969  int characters_filled_in,
970  bool not_at_start) {
971  // Returning 0 from EatsAtLeast should ensure we never get here.
972  UNREACHABLE();
973  }
974  virtual void FillInBMInfo(int offset,
975  int recursion_depth,
976  int budget,
978  bool not_at_start) {
979  // Returning 0 from EatsAtLeast should ensure we never get here.
980  UNREACHABLE();
981  }
982 
983  private:
984  Action action_;
985 };
986 
987 
989  public:
990  NegativeSubmatchSuccess(int stack_pointer_reg,
991  int position_reg,
992  int clear_capture_count,
993  int clear_capture_start,
994  Zone* zone)
996  stack_pointer_register_(stack_pointer_reg),
997  current_position_register_(position_reg),
998  clear_capture_count_(clear_capture_count),
999  clear_capture_start_(clear_capture_start) { }
1000  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1001 
1002  private:
1003  int stack_pointer_register_;
1004  int current_position_register_;
1005  int clear_capture_count_;
1006  int clear_capture_start_;
1007 };
1008 
1009 
1010 class Guard: public ZoneObject {
1011  public:
1012  enum Relation { LT, GEQ };
1014  : reg_(reg),
1015  op_(op),
1016  value_(value) { }
1017  int reg() { return reg_; }
1018  Relation op() { return op_; }
1019  int value() { return value_; }
1020 
1021  private:
1022  int reg_;
1023  Relation op_;
1024  int value_;
1025 };
1026 
1027 
1029  public:
1030  explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
1031  void AddGuard(Guard* guard, Zone* zone);
1032  RegExpNode* node() { return node_; }
1033  void set_node(RegExpNode* node) { node_ = node; }
1034  ZoneList<Guard*>* guards() { return guards_; }
1035 
1036  private:
1037  RegExpNode* node_;
1038  ZoneList<Guard*>* guards_;
1039 };
1040 
1041 
1042 class AlternativeGeneration;
1043 
1044 
1045 class ChoiceNode: public RegExpNode {
1046  public:
1047  explicit ChoiceNode(int expected_size, Zone* zone)
1048  : RegExpNode(zone),
1049  alternatives_(new(zone)
1050  ZoneList<GuardedAlternative>(expected_size, zone)),
1051  table_(NULL),
1052  not_at_start_(false),
1053  being_calculated_(false) { }
1054  virtual void Accept(NodeVisitor* visitor);
1056  alternatives()->Add(node, zone());
1057  }
1059  DispatchTable* GetTable(bool ignore_case);
1060  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1061  virtual int EatsAtLeast(int still_to_find,
1062  int recursion_depth,
1063  bool not_at_start);
1064  int EatsAtLeastHelper(int still_to_find,
1065  int recursion_depth,
1066  RegExpNode* ignore_this_node,
1067  bool not_at_start);
1068  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1069  RegExpCompiler* compiler,
1070  int characters_filled_in,
1071  bool not_at_start);
1072  virtual void FillInBMInfo(int offset,
1073  int recursion_depth,
1074  int budget,
1075  BoyerMooreLookahead* bm,
1076  bool not_at_start);
1077 
1078  bool being_calculated() { return being_calculated_; }
1079  bool not_at_start() { return not_at_start_; }
1080  void set_not_at_start() { not_at_start_ = true; }
1081  void set_being_calculated(bool b) { being_calculated_ = b; }
1082  virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1083  virtual RegExpNode* FilterASCII(int depth);
1084 
1085  protected:
1088 
1089  private:
1091  friend class Analysis;
1092  void GenerateGuard(RegExpMacroAssembler* macro_assembler,
1093  Guard* guard,
1094  Trace* trace);
1095  int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
1096  void EmitOutOfLineContinuation(RegExpCompiler* compiler,
1097  Trace* trace,
1098  GuardedAlternative alternative,
1099  AlternativeGeneration* alt_gen,
1100  int preload_characters,
1101  bool next_expects_preload);
1102  DispatchTable* table_;
1103  // If true, this node is never checked at the start of the input.
1104  // Allows a new trace to start with at_start() set to false.
1105  bool not_at_start_;
1106  bool being_calculated_;
1107 };
1108 
1109 
1111  public:
1113  GuardedAlternative then_do_this,
1114  Zone* zone)
1115  : ChoiceNode(2, zone) {
1116  AddAlternative(this_must_fail);
1117  AddAlternative(then_do_this);
1118  }
1119  virtual int EatsAtLeast(int still_to_find,
1120  int recursion_depth,
1121  bool not_at_start);
1122  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1123  RegExpCompiler* compiler,
1124  int characters_filled_in,
1125  bool not_at_start);
1126  virtual void FillInBMInfo(int offset,
1127  int recursion_depth,
1128  int budget,
1129  BoyerMooreLookahead* bm,
1130  bool not_at_start) {
1131  alternatives_->at(1).node()->FillInBMInfo(
1132  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
1133  if (offset == 0) set_bm_info(not_at_start, bm);
1134  }
1135  // For a negative lookahead we don't emit the quick check for the
1136  // alternative that is expected to fail. This is because quick check code
1137  // starts by loading enough characters for the alternative that takes fewest
1138  // characters, but on a negative lookahead the negative branch did not take
1139  // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1140  virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1141  virtual RegExpNode* FilterASCII(int depth);
1142 };
1143 
1144 
1146  public:
1148  : ChoiceNode(2, zone),
1149  loop_node_(NULL),
1150  continue_node_(NULL),
1151  body_can_be_zero_length_(body_can_be_zero_length) { }
1154  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1155  virtual int EatsAtLeast(int still_to_find,
1156  int recursion_depth,
1157  bool not_at_start);
1158  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1159  RegExpCompiler* compiler,
1160  int characters_filled_in,
1161  bool not_at_start);
1162  virtual void FillInBMInfo(int offset,
1163  int recursion_depth,
1164  int budget,
1165  BoyerMooreLookahead* bm,
1166  bool not_at_start);
1167  RegExpNode* loop_node() { return loop_node_; }
1168  RegExpNode* continue_node() { return continue_node_; }
1169  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1170  virtual void Accept(NodeVisitor* visitor);
1171  virtual RegExpNode* FilterASCII(int depth);
1172 
1173  private:
1174  // AddAlternative is made private for loop nodes because alternatives
1175  // should not be added freely, we need to keep track of which node
1176  // goes back to the node itself.
1177  void AddAlternative(GuardedAlternative node) {
1179  }
1180 
1181  RegExpNode* loop_node_;
1182  RegExpNode* continue_node_;
1183  bool body_can_be_zero_length_;
1184 };
1185 
1186 
1187 // Improve the speed that we scan for an initial point where a non-anchored
1188 // regexp can match by using a Boyer-Moore-like table. This is done by
1189 // identifying non-greedy non-capturing loops in the nodes that eat any
1190 // character one at a time. For example in the middle of the regexp
1191 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
1192 // inserted at the start of any non-anchored regexp.
1193 //
1194 // When we have found such a loop we look ahead in the nodes to find the set of
1195 // characters that can come at given distances. For example for the regexp
1196 // /.?foo/ we know that there are at least 3 characters ahead of us, and the
1197 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1198 // the lookahead info where the set of characters is reasonably constrained. In
1199 // our example this is from index 1 to 2 (0 is not constrained). We can now
1200 // look 3 characters ahead and if we don't find one of [f, o] (the union of
1201 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1202 //
1203 // For Unicode input strings we do the same, but modulo 128.
1204 //
1205 // We also look at the first string fed to the regexp and use that to get a hint
1206 // of the character frequencies in the inputs. This affects the assessment of
1207 // whether the set of characters is 'reasonably constrained'.
1208 //
1209 // We also have another lookahead mechanism (called quick check in the code),
1210 // which uses a wide load of multiple characters followed by a mask and compare
1211 // to determine whether a match is possible at this point.
1213  kNotYet = 0,
1216  kLatticeUnknown = 3 // Can also mean both in and out.
1217 };
1218 
1219 
1221  return static_cast<ContainedInLattice>(a | b);
1222 }
1223 
1224 
1226  const int* ranges,
1227  int ranges_size,
1228  Interval new_range);
1229 
1230 
1232  public:
1234  : map_(new(zone) ZoneList<bool>(kMapSize, zone)),
1235  map_count_(0),
1236  w_(kNotYet),
1237  s_(kNotYet),
1238  d_(kNotYet),
1239  surrogate_(kNotYet) {
1240  for (int i = 0; i < kMapSize; i++) {
1241  map_->Add(false, zone);
1242  }
1243  }
1244 
1245  bool& at(int i) { return map_->at(i); }
1246 
1247  static const int kMapSize = 128;
1248  static const int kMask = kMapSize - 1;
1249 
1250  int map_count() const { return map_count_; }
1251 
1252  void Set(int character);
1253  void SetInterval(const Interval& interval);
1254  void SetAll();
1255  bool is_non_word() { return w_ == kLatticeOut; }
1256  bool is_word() { return w_ == kLatticeIn; }
1257 
1258  private:
1259  ZoneList<bool>* map_;
1260  int map_count_; // Number of set bits in the map.
1261  ContainedInLattice w_; // The \w character class.
1262  ContainedInLattice s_; // The \s character class.
1263  ContainedInLattice d_; // The \d character class.
1264  ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
1265 };
1266 
1267 
1269  public:
1271 
1272  int length() { return length_; }
1273  int max_char() { return max_char_; }
1274  RegExpCompiler* compiler() { return compiler_; }
1275 
1276  int Count(int map_number) {
1277  return bitmaps_->at(map_number)->map_count();
1278  }
1279 
1280  BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
1281 
1282  void Set(int map_number, int character) {
1283  if (character > max_char_) return;
1284  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1285  info->Set(character);
1286  }
1287 
1288  void SetInterval(int map_number, const Interval& interval) {
1289  if (interval.from() > max_char_) return;
1290  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1291  if (interval.to() > max_char_) {
1292  info->SetInterval(Interval(interval.from(), max_char_));
1293  } else {
1294  info->SetInterval(interval);
1295  }
1296  }
1297 
1298  void SetAll(int map_number) {
1299  bitmaps_->at(map_number)->SetAll();
1300  }
1301 
1302  void SetRest(int from_map) {
1303  for (int i = from_map; i < length_; i++) SetAll(i);
1304  }
1306 
1307  private:
1308  // This is the value obtained by EatsAtLeast. If we do not have at least this
1309  // many characters left in the sample string then the match is bound to fail.
1310  // Therefore it is OK to read a character this far ahead of the current match
1311  // point.
1312  int length_;
1313  RegExpCompiler* compiler_;
1314  // 0x7f for ASCII, 0xffff for UTF-16.
1315  int max_char_;
1317 
1318  int GetSkipTable(int min_lookahead,
1319  int max_lookahead,
1320  Handle<ByteArray> boolean_skip_table);
1321  bool FindWorthwhileInterval(int* from, int* to);
1322  int FindBestInterval(
1323  int max_number_of_chars, int old_biggest_points, int* from, int* to);
1324 };
1325 
1326 
1327 // There are many ways to generate code for a node. This class encapsulates
1328 // the current way we should be generating. In other words it encapsulates
1329 // the current state of the code generator. The effect of this is that we
1330 // generate code for paths that the matcher can take through the regular
1331 // expression. A given node in the regexp can be code-generated several times
1332 // as it can be part of several traces. For example for the regexp:
1333 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
1334 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
1335 // to match foo is generated only once (the traces have a common prefix). The
1336 // code to store the capture is deferred and generated (twice) after the places
1337 // where baz has been matched.
1338 class Trace {
1339  public:
1340  // A value for a property that is either known to be true, know to be false,
1341  // or not known.
1342  enum TriBool {
1343  UNKNOWN = -1, FALSE = 0, TRUE = 1
1344  };
1345 
1347  public:
1349  : type_(type), reg_(reg), next_(NULL) { }
1350  DeferredAction* next() { return next_; }
1351  bool Mentions(int reg);
1352  int reg() { return reg_; }
1353  ActionNode::Type type() { return type_; }
1354  private:
1355  ActionNode::Type type_;
1356  int reg_;
1357  DeferredAction* next_;
1358  friend class Trace;
1359  };
1360 
1362  public:
1364  : DeferredAction(ActionNode::STORE_POSITION, reg),
1365  cp_offset_(trace->cp_offset()),
1366  is_capture_(is_capture) { }
1367  int cp_offset() { return cp_offset_; }
1368  bool is_capture() { return is_capture_; }
1369  private:
1370  int cp_offset_;
1371  bool is_capture_;
1372  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
1373  };
1374 
1376  public:
1378  : DeferredAction(ActionNode::SET_REGISTER, reg),
1379  value_(value) { }
1380  int value() { return value_; }
1381  private:
1382  int value_;
1383  };
1384 
1386  public:
1388  : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
1389  range_(range) { }
1390  Interval range() { return range_; }
1391  private:
1392  Interval range_;
1393  };
1394 
1396  public:
1398  : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
1399  };
1400 
1402  : cp_offset_(0),
1403  actions_(NULL),
1404  backtrack_(NULL),
1405  stop_node_(NULL),
1406  loop_label_(NULL),
1407  characters_preloaded_(0),
1408  bound_checked_up_to_(0),
1409  flush_budget_(100),
1410  at_start_(UNKNOWN) { }
1411 
1412  // End the trace. This involves flushing the deferred actions in the trace
1413  // and pushing a backtrack location onto the backtrack stack. Once this is
1414  // done we can start a new trace or go to one that has already been
1415  // generated.
1416  void Flush(RegExpCompiler* compiler, RegExpNode* successor);
1417  int cp_offset() { return cp_offset_; }
1418  DeferredAction* actions() { return actions_; }
1419  // A trivial trace is one that has no deferred actions or other state that
1420  // affects the assumptions used when generating code. There is no recorded
1421  // backtrack location in a trivial trace, so with a trivial trace we will
1422  // generate code that, on a failure to match, gets the backtrack location
1423  // from the backtrack stack rather than using a direct jump instruction. We
1424  // always start code generation with a trivial trace and non-trivial traces
1425  // are created as we emit code for nodes or add to the list of deferred
1426  // actions in the trace. The location of the code generated for a node using
1427  // a trivial trace is recorded in a label in the node so that gotos can be
1428  // generated to that code.
1429  bool is_trivial() {
1430  return backtrack_ == NULL &&
1431  actions_ == NULL &&
1432  cp_offset_ == 0 &&
1433  characters_preloaded_ == 0 &&
1434  bound_checked_up_to_ == 0 &&
1435  quick_check_performed_.characters() == 0 &&
1436  at_start_ == UNKNOWN;
1437  }
1438  TriBool at_start() { return at_start_; }
1439  void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
1440  Label* backtrack() { return backtrack_; }
1441  Label* loop_label() { return loop_label_; }
1442  RegExpNode* stop_node() { return stop_node_; }
1443  int characters_preloaded() { return characters_preloaded_; }
1444  int bound_checked_up_to() { return bound_checked_up_to_; }
1445  int flush_budget() { return flush_budget_; }
1446  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1447  bool mentions_reg(int reg);
1448  // Returns true if a deferred position store exists to the specified
1449  // register and stores the offset in the out-parameter. Otherwise
1450  // returns false.
1451  bool GetStoredPosition(int reg, int* cp_offset);
1452  // These set methods and AdvanceCurrentPositionInTrace should be used only on
1453  // new traces - the intention is that traces are immutable after creation.
1454  void add_action(DeferredAction* new_action) {
1455  ASSERT(new_action->next_ == NULL);
1456  new_action->next_ = actions_;
1457  actions_ = new_action;
1458  }
1459  void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1460  void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1461  void set_loop_label(Label* label) { loop_label_ = label; }
1462  void set_characters_preloaded(int count) { characters_preloaded_ = count; }
1463  void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
1464  void set_flush_budget(int to) { flush_budget_ = to; }
1466  quick_check_performed_ = *d;
1467  }
1469  void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
1470 
1471  private:
1472  int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
1473  void PerformDeferredActions(RegExpMacroAssembler* macro,
1474  int max_register,
1475  OutSet& affected_registers,
1476  OutSet* registers_to_pop,
1477  OutSet* registers_to_clear,
1478  Zone* zone);
1479  void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1480  int max_register,
1481  OutSet& registers_to_pop,
1482  OutSet& registers_to_clear);
1483  int cp_offset_;
1484  DeferredAction* actions_;
1485  Label* backtrack_;
1486  RegExpNode* stop_node_;
1487  Label* loop_label_;
1488  int characters_preloaded_;
1489  int bound_checked_up_to_;
1490  QuickCheckDetails quick_check_performed_;
1491  int flush_budget_;
1492  TriBool at_start_;
1493 };
1494 
1495 
1497  public:
1498  virtual ~NodeVisitor() { }
1499 #define DECLARE_VISIT(Type) \
1500  virtual void Visit##Type(Type##Node* that) = 0;
1502 #undef DECLARE_VISIT
1503  virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
1504 };
1505 
1506 
1507 // Node visitor used to add the start set of the alternatives to the
1508 // dispatch table of a choice node.
1510  public:
1512  Zone* zone)
1513  : table_(table),
1514  choice_index_(-1),
1515  ignore_case_(ignore_case),
1516  zone_(zone) { }
1517 
1518  void BuildTable(ChoiceNode* node);
1519 
1520  void AddRange(CharacterRange range) {
1521  table()->AddRange(range, choice_index_, zone_);
1522  }
1523 
1524  void AddInverse(ZoneList<CharacterRange>* ranges);
1525 
1526 #define DECLARE_VISIT(Type) \
1527  virtual void Visit##Type(Type##Node* that);
1529 #undef DECLARE_VISIT
1530 
1531  DispatchTable* table() { return table_; }
1532  void set_choice_index(int value) { choice_index_ = value; }
1533 
1534  protected:
1539 };
1540 
1541 
1542 // Assertion propagation moves information about assertions such as
1543 // \b to the affected nodes. For instance, in /.\b./ information must
1544 // be propagated to the first '.' that whatever follows needs to know
1545 // if it matched a word or a non-word, and to the second '.' that it
1546 // has to check if it succeeds a word or non-word. In this case the
1547 // result will be something like:
1548 //
1549 // +-------+ +------------+
1550 // | . | | . |
1551 // +-------+ ---> +------------+
1552 // | word? | | check word |
1553 // +-------+ +------------+
1554 class Analysis: public NodeVisitor {
1555  public:
1556  Analysis(bool ignore_case, bool is_ascii)
1557  : ignore_case_(ignore_case),
1558  is_ascii_(is_ascii),
1559  error_message_(NULL) { }
1560  void EnsureAnalyzed(RegExpNode* node);
1561 
1562 #define DECLARE_VISIT(Type) \
1563  virtual void Visit##Type(Type##Node* that);
1565 #undef DECLARE_VISIT
1566  virtual void VisitLoopChoice(LoopChoiceNode* that);
1567 
1568  bool has_failed() { return error_message_ != NULL; }
1569  const char* error_message() {
1570  ASSERT(error_message_ != NULL);
1571  return error_message_;
1572  }
1573  void fail(const char* error_message) {
1574  error_message_ = error_message;
1575  }
1576 
1577  private:
1578  bool ignore_case_;
1579  bool is_ascii_;
1580  const char* error_message_;
1581 
1582  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
1583 };
1584 
1585 
1588  : tree(NULL),
1589  node(NULL),
1590  simple(true),
1592  capture_count(0) { }
1595  bool simple;
1599 };
1600 
1601 
1602 class RegExpEngine: public AllStatic {
1603  public:
1605  explicit CompilationResult(const char* error_message)
1606  : error_message(error_message),
1607  code(HEAP->the_hole_value()),
1608  num_registers(0) {}
1609  CompilationResult(Object* code, int registers)
1610  : error_message(NULL),
1611  code(code),
1612  num_registers(registers) {}
1613  const char* error_message;
1616  };
1617 
1619  bool ignore_case,
1620  bool global,
1621  bool multiline,
1622  Handle<String> pattern,
1623  Handle<String> sample_subject,
1624  bool is_ascii, Zone* zone);
1625 
1626  static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1627 };
1628 
1629 
1631  public:
1632  inline OffsetsVector(int num_registers, Isolate* isolate)
1633  : offsets_vector_length_(num_registers) {
1634  if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
1635  vector_ = NewArray<int>(offsets_vector_length_);
1636  } else {
1637  vector_ = isolate->jsregexp_static_offsets_vector();
1638  }
1639  }
1640  inline ~OffsetsVector() {
1641  if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
1642  DeleteArray(vector_);
1643  vector_ = NULL;
1644  }
1645  }
1646  inline int* vector() { return vector_; }
1647  inline int length() { return offsets_vector_length_; }
1648 
1649  static const int kStaticOffsetsVectorSize =
1651 
1652  private:
1653  static Address static_offsets_vector_address(Isolate* isolate) {
1654  return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
1655  }
1656 
1657  int* vector_;
1658  int offsets_vector_length_;
1659 
1660  friend class ExternalReference;
1661 };
1662 
1663 
1664 } } // namespace v8::internal
1665 
1666 #endif // V8_JSREGEXP_H_
byte * Address
Definition: globals.h:172
#define FORWARD_DECLARE(Name)
Definition: jsregexp.h:408
void SetInterval(const Interval &interval)
Definition: jsregexp.cc:3511
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5555
AssertionNodeType type()
Definition: jsregexp.h:909
Analysis(bool ignore_case, bool is_ascii)
Definition: jsregexp.h:1556
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5672
RegExpNode * stop_node()
Definition: jsregexp.h:1442
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)
Definition: jsregexp.h:394
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
Definition: jsregexp.cc:3361
virtual void Accept(NodeVisitor *visitor)
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5127
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1140
static AssertionNode * AtStart(RegExpNode *on_success)
Definition: jsregexp.h:883
void SetInterval(int map_number, const Interval &interval)
Definition: jsregexp.h:1288
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
Definition: jsregexp.cc:1402
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:486
void set(int index, Object *value)
Definition: objects-inl.h:1695
RegExpNode * continue_node()
Definition: jsregexp.h:1168
TextNode(RegExpCharacterClass *that, RegExpNode *on_success)
Definition: jsregexp.h:821
static const int kStaticOffsetsVectorSize
Definition: jsregexp.h:1649
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1344
DeferredCapture(int reg, bool is_capture, Trace *trace)
Definition: jsregexp.h:1363
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
Definition: jsregexp.cc:1442
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2264
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3226
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:1126
void set_characters(int characters)
Definition: jsregexp.h:525
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2274
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.h:592
static Smi * FromInt(int value)
Definition: objects-inl.h:973
bool Contains(int value)
Definition: jsregexp.h:689
int to() const
Definition: jsregexp.h:694
void set_at_start(bool at_start)
Definition: jsregexp.h:1439
static const int kLastCaptureCount
Definition: jsregexp.h:143
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
Definition: jsregexp.cc:2384
void set_loop_label(Label *label)
Definition: jsregexp.h:1461
static const int kJSRegexpStaticOffsetsVectorSize
Definition: isolate.h:982
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
Definition: jsregexp.cc:1468
T Max(T a, T b)
Definition: utils.h:222
Position * positions(int index)
Definition: jsregexp.h:526
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.cc:3333
OffsetsVector(int num_registers, Isolate *isolate)
Definition: jsregexp.h:1632
RegExpNode(Zone *zone)
Definition: jsregexp.h:553
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2165
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1371
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Definition: jsregexp.cc:66
static TextElement CharClass(RegExpCharacterClass *char_class)
Definition: jsregexp.cc:851
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2702
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:776
void AddFromFollowing(NodeInfo *that)
Definition: jsregexp.h:470
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject, Zone *zone)
Definition: jsregexp.cc:504
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
Definition: jsregexp.cc:1413
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5466
uc16 to()
Definition: jsregexp.h:338
T & at(int i) const
Definition: list.h:85
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, Vector< int > registers, Zone *zone)
Definition: jsregexp.cc:540
Flag flags[]
Definition: flags.cc:1467
static int IrregexpNumberOfCaptures(FixedArray *re)
Definition: jsregexp.cc:471
void set_to(uc16 value)
Definition: jsregexp.h:339
RegExpNode * replacement_
Definition: jsregexp.h:652
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5355
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2836
void EnsureAnalyzed(RegExpNode *node)
Definition: jsregexp.cc:5571
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2329
void set_flush_budget(int to)
Definition: jsregexp.h:1464
CompilationResult(Object *code, int registers)
Definition: jsregexp.h:1609
static CharacterRange Everything()
Definition: jsregexp.h:258
DispatchTableConstructor(DispatchTable *table, bool ignore_case, Zone *zone)
Definition: jsregexp.h:1511
#define DECLARE_VISIT(Type)
Definition: jsregexp.h:1562
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1082
void AddValue(int value, Zone *zone)
Definition: jsregexp.h:340
bool Get(unsigned value)
Definition: jsregexp.cc:5452
static int GlobalOffsetsVectorSize(Handle< JSRegExp > regexp, int registers_per_match, int *max_matches)
Definition: jsregexp.cc:524
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:710
#define ASSERT(condition)
Definition: checks.h:270
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
Definition: jsregexp.cc:1421
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:974
void set_characters_preloaded(int count)
Definition: jsregexp.h:1462
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2209
struct v8::internal::ActionNode::@6::@8 u_increment_register
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
Definition: jsregexp.cc:3285
const char * error_message()
Definition: jsregexp.h:1569
friend class ExternalReference
Definition: jsregexp.h:1660
void BuildTable(ChoiceNode *node)
Definition: jsregexp.cc:5784
RegExpCompiler * compiler()
Definition: jsregexp.h:1274
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2339
Interval(int from, int to)
Definition: jsregexp.h:680
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:939
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
Definition: jsregexp.cc:3664
ZoneList< TextElement > * elements()
Definition: jsregexp.h:836
DeferredSetRegister(int reg, int value)
Definition: jsregexp.h:1377
void set_type(AssertionNodeType type)
Definition: jsregexp.h:910
void SetAll(int map_number)
Definition: jsregexp.h:1298
struct v8::internal::ActionNode::@6::@12 u_clear_captures
BoyerMoorePositionInfo * at(int i)
Definition: jsregexp.h:1280
static Interval Empty()
Definition: jsregexp.h:695
static void SetLastSubject(FixedArray *array, String *to)
Definition: jsregexp.h:168
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
Definition: jsregexp.cc:5392
static const int kLastSubjectOffset
Definition: jsregexp.h:152
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:281
static Smi * cast(Object *object)
static const int kRegExpExecutableMemoryLimit
Definition: jsregexp.h:197
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
Definition: jsregexp.cc:1453
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5162
static const int kLastCaptureCountOffset
Definition: jsregexp.h:150
RegExpCharacterClass * u_char_class
Definition: jsregexp.h:424
RegExpNode * FilterSuccessor(int depth)
Definition: jsregexp.cc:2694
static void SetLastInput(FixedArray *array, String *to)
Definition: jsregexp.h:172
GuardedAlternative(RegExpNode *node)
Definition: jsregexp.h:1030
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5709
static const int kRegWxpCompiledLimit
Definition: jsregexp.h:198
void Advance(int by, bool ascii)
Definition: jsregexp.cc:2621
void Merge(QuickCheckDetails *other, int from_index)
Definition: jsregexp.cc:2642
Label * loop_label()
Definition: jsregexp.h:1441
struct v8::internal::ActionNode::@6::@11 u_empty_match_check
#define UNREACHABLE()
Definition: checks.h:50
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2235
NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, GuardedAlternative then_do_this, Zone *zone)
Definition: jsregexp.h:1112
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4194
union v8::internal::TextElement::@5 data
static const unsigned kFirstLimit
Definition: jsregexp.h:304
void AddAlternative(GuardedAlternative node)
Definition: jsregexp.h:1055
static const int kLastSubject
Definition: jsregexp.h:144
static void SetLastCaptureCount(FixedArray *array, int to)
Definition: jsregexp.h:164
void fail(const char *error_message)
Definition: jsregexp.h:1573
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:254
RegExpNode * replacement()
Definition: jsregexp.h:616
CharacterRange(uc16 from, uc16 to)
Definition: jsregexp.h:247
Interval Union(Interval that)
Definition: jsregexp.h:681
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5688
void set_bound_checked_up_to(int to)
Definition: jsregexp.h:1463
void set_backtrack(Label *backtrack)
Definition: jsregexp.h:1459
virtual void Accept(NodeVisitor *visitor)
Definition: jsregexp.cc:1489
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2867
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.h:762
ChoiceNode(int expected_size, Zone *zone)
Definition: jsregexp.h:1047
const int kPointerSize
Definition: globals.h:234
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2764
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2298
struct v8::internal::ActionNode::@6::@10 u_submatch
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5247
virtual void Accept(NodeVisitor *visitor)
QuickCheckDetails * quick_check_performed()
Definition: jsregexp.h:1446
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)=0
void AddGuard(Guard *guard, Zone *zone)
Definition: jsregexp.cc:1395
static int GetCapture(FixedArray *array, int index)
Definition: jsregexp.h:160
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo, Zone *zone)
Definition: jsregexp.cc:617
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:656
LoopChoiceNode(bool body_can_be_zero_length, Zone *zone)
Definition: jsregexp.h:1147
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5839
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2287
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.h:1503
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5181
struct v8::internal::ActionNode::@6::@7 u_store_register
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4072
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
Definition: jsregexp.cc:491
static const int kFirstCapture
Definition: jsregexp.h:146
TriBool at_start()
Definition: jsregexp.h:1438
Label * backtrack()
Definition: jsregexp.h:1440
Entry(uc16 from, uc16 to, OutSet *out_set)
Definition: jsregexp.h:335
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.h:964
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.h:614
Zone * zone() const
Definition: jsregexp.h:648
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
Definition: jsregexp.h:1220
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo, Zone *zone)
Definition: jsregexp.cc:231
EndNode(Action action, Zone *zone)
Definition: jsregexp.h:960
void add_action(DeferredAction *new_action)
Definition: jsregexp.h:1454
static int Compare(uc16 a, uc16 b)
Definition: jsregexp.h:356
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3398
static int GetLastCaptureCount(FixedArray *array)
Definition: jsregexp.h:180
virtual void Accept(NodeVisitor *visitor)
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
Definition: jsregexp.cc:3544
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, int clear_capture_count, int clear_capture_start, Zone *zone)
Definition: jsregexp.h:990
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
Definition: jsregexp.h:630
static const int kLastInput
Definition: jsregexp.h:145
void AddRange(CharacterRange range)
Definition: jsregexp.h:1520
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:603
void set_being_calculated(bool b)
Definition: jsregexp.h:1081
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
Definition: jsregexp.cc:256
void ResetCompilationState()
Definition: jsregexp.h:476
int characters_preloaded()
Definition: jsregexp.h:1443
DeferredAction * actions()
Definition: jsregexp.h:1418
static const int kNodeIsTooComplexForGreedyLoops
Definition: jsregexp.h:588
uc16 from()
Definition: jsregexp.h:337
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2220
DispatchTable * GetTable(bool ignore_case)
Definition: jsregexp.cc:869
static int IrregexpMaxRegisterCount(FixedArray *re)
Definition: jsregexp.cc:460
RegExpNode * on_success()
Definition: jsregexp.h:707
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
Definition: jsregexp.cc:466
BoyerMooreLookahead * bm_info(bool not_at_start)
Definition: jsregexp.h:644
static Handle< String > ToString(Handle< Object > value)
static const int kLastMatchOverhead
Definition: jsregexp.h:147
Entry()
Definition: jsregexp.h:334
static const int kHeaderSize
Definition: objects.h:2233
void set_from(uc16 value)
Definition: jsregexp.h:263
static const int kNone
Definition: jsregexp.h:696
int bound_checked_up_to()
Definition: jsregexp.h:1444
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:589
void AddContinueAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3391
Guard(int reg, Relation op, int value)
Definition: jsregexp.h:1013
struct v8::internal::ActionNode::@6::@9 u_position_register
void set_to(uc16 value)
Definition: jsregexp.h:265
bool GetStoredPosition(int reg, int *cp_offset)
Definition: jsregexp.cc:1095
static AssertionNode * AfterNewline(RegExpNode *on_success)
Definition: jsregexp.h:892
int from() const
Definition: jsregexp.h:693
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
Definition: jsregexp.h:889
BackReferenceNode(int start_reg, int end_reg, RegExpNode *on_success)
Definition: jsregexp.h:926
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.cc:5643
static bool UsesNativeRegExp()
Definition: jsregexp.h:48
static const int kMaxCopiesCodeGenerated
Definition: jsregexp.h:640
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2459
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
static const int kFirstCaptureOffset
Definition: jsregexp.h:156
int Count(int map_number)
Definition: jsregexp.h:1276
uint16_t uc16
Definition: globals.h:273
static const int kLastInputOffset
Definition: jsregexp.h:154
void MakeCaseIndependent(bool is_ascii)
Definition: jsregexp.cc:3304
virtual void Accept(NodeVisitor *visitor)
ZoneList< GuardedAlternative > * alternatives()
Definition: jsregexp.h:1058
static const int kPayloadMask
Definition: jsregexp.h:289
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2685
void SetRest(int from_map)
Definition: jsregexp.h:1302
#define HEAP
Definition: isolate.h:1408
static const int kStartMarker
Definition: jsregexp.h:288
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
Definition: flags.cc:274
void ForEach(Callback *callback)
Definition: jsregexp.h:371
OutSet * Extend(unsigned value, Zone *zone)
Definition: jsregexp.cc:5421
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
ZoneList< GuardedAlternative > * alternatives_
Definition: jsregexp.h:1087
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping true
Definition: flags.cc:157
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
Definition: jsregexp.cc:111
virtual int GreedyLoopTextLength()
Definition: jsregexp.cc:3323
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:481
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3046
RegExpNode * set_replacement(RegExpNode *replacement)
Definition: jsregexp.h:620
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
Definition: jsregexp.cc:1432
void Set(int map_number, int character)
Definition: jsregexp.h:1282
static int IrregexpNumberOfRegisters(FixedArray *re)
Definition: jsregexp.cc:476
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2814
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:38
Definition: jsregexp.h:332
bool Rationalize(bool ascii)
Definition: jsregexp.cc:2360
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:967
TextNode(ZoneList< TextElement > *elms, RegExpNode *on_success)
Definition: jsregexp.h:817
Object * get(int index)
Definition: objects-inl.h:1675
void AddLoopAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3384
CompilationResult(const char *error_message)
Definition: jsregexp.h:1605
SeqRegExpNode(RegExpNode *on_success)
Definition: jsregexp.h:705
virtual void Accept(NodeVisitor *visitor)=0
RegExpNode * loop_node()
Definition: jsregexp.h:1167
int EatsAtLeastHelper(int still_to_find, int recursion_depth, RegExpNode *ignore_this_node, bool not_at_start)
Definition: jsregexp.cc:2310
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags)
Definition: jsregexp.cc:168
static TextElement Atom(RegExpAtom *atom)
Definition: jsregexp.cc:844
ZoneList< Guard * > * guards()
Definition: jsregexp.h:1034
virtual void Accept(NodeVisitor *visitor)
ElementInSetsRelation
Definition: jsregexp.h:232
static const int kFillInBMBudget
Definition: jsregexp.h:602
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3810
bool Matches(NodeInfo *that)
Definition: jsregexp.h:446
#define FOR_EACH_NODE_TYPE(VISIT)
Definition: jsregexp.h:385
void DeleteArray(T *array)
Definition: allocation.h:91
T Min(T a, T b)
Definition: utils.h:229
void set_on_success(RegExpNode *node)
Definition: jsregexp.h:708
virtual void Accept(NodeVisitor *visitor)
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
Definition: jsregexp.cc:1283
int kUninitializedRegExpNodePlaceHolder
void set_quick_check_performed(QuickCheckDetails *d)
Definition: jsregexp.h:1465
DeferredAction(ActionNode::Type type, int reg)
Definition: jsregexp.h:1348
void InvalidateCurrentCharacter()
Definition: jsregexp.cc:3280
bool IsEverything(uc16 max)
Definition: jsregexp.h:267
static const Entry NoValue()
Definition: jsregexp.h:355
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5079
OutSet * out_set()
Definition: jsregexp.h:343
FlagType type() const
Definition: flags.cc:1358
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:251
static AssertionNode * AtEnd(RegExpNode *on_success)
Definition: jsregexp.h:880
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2251
friend class DotPrinter
Definition: jsregexp.h:811
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.cc:3031
QuickCheckDetails(int characters)
Definition: jsregexp.h:505
void AddFromPreceding(NodeInfo *that)
Definition: jsregexp.h:455
void set_stop_node(RegExpNode *node)
Definition: jsregexp.h:1460
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5890
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2747
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2849
static void SetCapture(FixedArray *array, int index, int to)
Definition: jsregexp.h:176
static AssertionNode * AtBoundary(RegExpNode *on_success)
Definition: jsregexp.h:886
bool mentions_reg(int reg)
Definition: jsregexp.cc:1084
void set_node(RegExpNode *node)
Definition: jsregexp.h:1033
const int MB
Definition: globals.h:222