v8  3.14.5(node0.10.28)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
jsregexp.h
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_JSREGEXP_H_
29 #define V8_JSREGEXP_H_
30 
31 #include "allocation.h"
32 #include "assembler.h"
33 #include "zone-inl.h"
34 
35 namespace v8 {
36 namespace internal {
37 
38 class NodeVisitor;
39 class RegExpCompiler;
40 class RegExpMacroAssembler;
41 class RegExpNode;
42 class RegExpTree;
43 class BoyerMooreLookahead;
44 
45 class RegExpImpl {
46  public:
47  // Whether V8 is compiled with native regexp support or not.
48  static bool UsesNativeRegExp() {
49 #ifdef V8_INTERPRETED_REGEXP
50  return false;
51 #else
52  return true;
53 #endif
54  }
55 
56  // Creates a regular expression literal in the old space.
57  // This function calls the garbage collector if necessary.
59  Handle<String> pattern,
61  bool* has_pending_exception);
62 
63  // Returns a string representation of a regular expression.
64  // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
65  // This function calls the garbage collector if necessary.
67 
68  // Parses the RegExp pattern and prepares the JSRegExp object with
69  // generic data and choice of implementation - as well as what
70  // the implementation wants to store in the data field.
71  // Returns false if compilation fails.
73  Handle<String> pattern,
75  Zone* zone);
76 
77  // See ECMA-262 section 15.10.6.2.
78  // This function calls the garbage collector if necessary.
79  static Handle<Object> Exec(Handle<JSRegExp> regexp,
80  Handle<String> subject,
81  int index,
82  Handle<JSArray> lastMatchInfo);
83 
84  // Prepares a JSRegExp object with Irregexp-specific data.
85  static void IrregexpInitialize(Handle<JSRegExp> re,
86  Handle<String> pattern,
88  int capture_register_count);
89 
90 
91  static void AtomCompile(Handle<JSRegExp> re,
92  Handle<String> pattern,
94  Handle<String> match_pattern);
95 
96 
97  static int AtomExecRaw(Handle<JSRegExp> regexp,
98  Handle<String> subject,
99  int index,
100  int32_t* output,
101  int output_size);
102 
103 
105  Handle<String> subject,
106  int index,
107  Handle<JSArray> lastMatchInfo);
108 
110 
111  // Prepare a RegExp for being executed one or more times (using
112  // IrregexpExecOnce) on the subject.
113  // This ensures that the regexp is compiled for the subject, and that
114  // the subject is flat.
115  // Returns the number of integer spaces required by IrregexpExecOnce
116  // as its "registers" argument. If the regexp cannot be compiled,
117  // an exception is set as pending, and this function returns negative.
118  static int IrregexpPrepare(Handle<JSRegExp> regexp,
119  Handle<String> subject);
120 
121  // Execute a regular expression on the subject, starting from index.
122  // If matching succeeds, return the number of matches. This can be larger
123  // than one in the case of global regular expressions.
124  // The captures and subcaptures are stored into the registers vector.
125  // If matching fails, returns RE_FAILURE.
126  // If execution fails, sets a pending exception and returns RE_EXCEPTION.
127  static int IrregexpExecRaw(Handle<JSRegExp> regexp,
128  Handle<String> subject,
129  int index,
130  int32_t* output,
131  int output_size);
132 
133  // Execute an Irregexp bytecode pattern.
134  // On a successful match, the result is a JSArray containing
135  // captured positions. On a failure, the result is the null value.
136  // Returns an empty handle in case of an exception.
138  Handle<String> subject,
139  int index,
140  Handle<JSArray> lastMatchInfo);
141 
142  // Set last match info. If match is NULL, then setting captures is omitted.
143  static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
144  Handle<String> subject,
145  int capture_count,
146  int32_t* match);
147 
148 
149  class GlobalCache {
150  public:
152  Handle<String> subject,
153  bool is_global,
154  Isolate* isolate);
155 
156  ~GlobalCache();
157 
158  // Fetch the next entry in the cache for global regexp match results.
159  // This does not set the last match info. Upon failure, NULL is returned.
160  // The cause can be checked with Result(). The previous
161  // result is still in available in memory when a failure happens.
162  int32_t* FetchNext();
163 
165 
166  inline bool HasException() { return num_matches_ < 0; }
167 
168  private:
169  int num_matches_;
170  int max_matches_;
171  int current_match_index_;
172  int registers_per_match_;
173  // Pointer to the last set of captures.
174  int32_t* register_array_;
175  int register_array_size_;
176  Handle<JSRegExp> regexp_;
177  Handle<String> subject_;
178  };
179 
180 
181  // Array index in the lastMatchInfo array.
182  static const int kLastCaptureCount = 0;
183  static const int kLastSubject = 1;
184  static const int kLastInput = 2;
185  static const int kFirstCapture = 3;
186  static const int kLastMatchOverhead = 3;
187 
188  // Direct offset into the lastMatchInfo array.
189  static const int kLastCaptureCountOffset =
191  static const int kLastSubjectOffset =
193  static const int kLastInputOffset =
195  static const int kFirstCaptureOffset =
197 
198  // Used to access the lastMatchInfo array.
199  static int GetCapture(FixedArray* array, int index) {
200  return Smi::cast(array->get(index + kFirstCapture))->value();
201  }
202 
203  static void SetLastCaptureCount(FixedArray* array, int to) {
204  array->set(kLastCaptureCount, Smi::FromInt(to));
205  }
206 
207  static void SetLastSubject(FixedArray* array, String* to) {
208  array->set(kLastSubject, to);
209  }
210 
211  static void SetLastInput(FixedArray* array, String* to) {
212  array->set(kLastInput, to);
213  }
214 
215  static void SetCapture(FixedArray* array, int index, int to) {
216  array->set(index + kFirstCapture, Smi::FromInt(to));
217  }
218 
219  static int GetLastCaptureCount(FixedArray* array) {
220  return Smi::cast(array->get(kLastCaptureCount))->value();
221  }
222 
223  // For acting on the JSRegExp data FixedArray.
224  static int IrregexpMaxRegisterCount(FixedArray* re);
225  static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
226  static int IrregexpNumberOfCaptures(FixedArray* re);
227  static int IrregexpNumberOfRegisters(FixedArray* re);
228  static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
229  static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
230 
231  // Limit the space regexps take up on the heap. In order to limit this we
232  // would like to keep track of the amount of regexp code on the heap. This
233  // is not tracked, however. As a conservative approximation we track the
234  // total regexp code compiled including code that has subsequently been freed
235  // and the total executable memory at any point.
236  static const int kRegExpExecutableMemoryLimit = 16 * MB;
237  static const int kRegWxpCompiledLimit = 1 * MB;
238 
239  private:
240  static bool CompileIrregexp(
241  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
242  static inline bool EnsureCompiledIrregexp(
243  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
244 };
245 
246 
247 // Represents the location of one element relative to the intersection of
248 // two sets. Corresponds to the four areas of a Venn diagram.
254 };
255 
256 
257 // Represents code units in the range from from_ to to_, both ends are
258 // inclusive.
260  public:
261  CharacterRange() : from_(0), to_(0) { }
262  // For compatibility with the CHECK_OK macro
263  CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
264  CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
265  static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
266  Zone* zone);
268  static inline CharacterRange Singleton(uc16 value) {
269  return CharacterRange(value, value);
270  }
271  static inline CharacterRange Range(uc16 from, uc16 to) {
272  ASSERT(from <= to);
273  return CharacterRange(from, to);
274  }
275  static inline CharacterRange Everything() {
276  return CharacterRange(0, 0xFFFF);
277  }
278  bool Contains(uc16 i) { return from_ <= i && i <= to_; }
279  uc16 from() const { return from_; }
280  void set_from(uc16 value) { from_ = value; }
281  uc16 to() const { return to_; }
282  void set_to(uc16 value) { to_ = value; }
283  bool is_valid() { return from_ <= to_; }
284  bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
285  bool IsSingleton() { return (from_ == to_); }
286  void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii,
287  Zone* zone);
288  static void Split(ZoneList<CharacterRange>* base,
289  Vector<const int> overlay,
290  ZoneList<CharacterRange>** included,
291  ZoneList<CharacterRange>** excluded,
292  Zone* zone);
293  // Whether a range list is in canonical form: Ranges ordered by from value,
294  // and ranges non-overlapping and non-adjacent.
295  static bool IsCanonical(ZoneList<CharacterRange>* ranges);
296  // Convert range list to canonical form. The characters covered by the ranges
297  // will still be the same, but no character is in more than one range, and
298  // adjacent ranges are merged. The resulting list may be shorter than the
299  // original, but cannot be longer.
300  static void Canonicalize(ZoneList<CharacterRange>* ranges);
301  // Negate the contents of a character range in canonical form.
302  static void Negate(ZoneList<CharacterRange>* src,
304  Zone* zone);
305  static const int kStartMarker = (1 << 24);
306  static const int kPayloadMask = (1 << 24) - 1;
307 
308  private:
309  uc16 from_;
310  uc16 to_;
311 };
312 
313 
314 // A set of unsigned integers that behaves especially well on small
315 // integers (< 32). May do zone-allocation.
316 class OutSet: public ZoneObject {
317  public:
318  OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
319  OutSet* Extend(unsigned value, Zone* zone);
320  bool Get(unsigned value);
321  static const unsigned kFirstLimit = 32;
322 
323  private:
324  // Destructively set a value in this set. In most cases you want
325  // to use Extend instead to ensure that only one instance exists
326  // that contains the same values.
327  void Set(unsigned value, Zone* zone);
328 
329  // The successors are a list of sets that contain the same values
330  // as this set and the one more value that is not present in this
331  // set.
332  ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
333 
334  OutSet(uint32_t first, ZoneList<unsigned>* remaining)
335  : first_(first), remaining_(remaining), successors_(NULL) { }
336  uint32_t first_;
337  ZoneList<unsigned>* remaining_;
338  ZoneList<OutSet*>* successors_;
339  friend class Trace;
340 };
341 
342 
343 // A mapping from integers, specified as ranges, to a set of integers.
344 // Used for mapping character ranges to choices.
345 class DispatchTable : public ZoneObject {
346  public:
347  explicit DispatchTable(Zone* zone) : tree_(zone) { }
348 
349  class Entry {
350  public:
351  Entry() : from_(0), to_(0), out_set_(NULL) { }
353  : from_(from), to_(to), out_set_(out_set) { }
354  uc16 from() { return from_; }
355  uc16 to() { return to_; }
356  void set_to(uc16 value) { to_ = value; }
357  void AddValue(int value, Zone* zone) {
358  out_set_ = out_set_->Extend(value, zone);
359  }
360  OutSet* out_set() { return out_set_; }
361  private:
362  uc16 from_;
363  uc16 to_;
364  OutSet* out_set_;
365  };
366 
367  class Config {
368  public:
369  typedef uc16 Key;
370  typedef Entry Value;
371  static const uc16 kNoKey;
372  static const Entry NoValue() { return Value(); }
373  static inline int Compare(uc16 a, uc16 b) {
374  if (a == b)
375  return 0;
376  else if (a < b)
377  return -1;
378  else
379  return 1;
380  }
381  };
382 
383  void AddRange(CharacterRange range, int value, Zone* zone);
384  OutSet* Get(uc16 value);
385  void Dump();
386 
387  template <typename Callback>
388  void ForEach(Callback* callback) {
389  return tree()->ForEach(callback);
390  }
391 
392  private:
393  // There can't be a static empty set since it allocates its
394  // successors in a zone and caches them.
395  OutSet* empty() { return &empty_; }
396  OutSet empty_;
397  ZoneSplayTree<Config>* tree() { return &tree_; }
398  ZoneSplayTree<Config> tree_;
399 };
400 
401 
402 #define FOR_EACH_NODE_TYPE(VISIT) \
403  VISIT(End) \
404  VISIT(Action) \
405  VISIT(Choice) \
406  VISIT(BackReference) \
407  VISIT(Assertion) \
408  VISIT(Text)
409 
410 
411 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
412  VISIT(Disjunction) \
413  VISIT(Alternative) \
414  VISIT(Assertion) \
415  VISIT(CharacterClass) \
416  VISIT(Atom) \
417  VISIT(Quantifier) \
418  VISIT(Capture) \
419  VISIT(Lookahead) \
420  VISIT(BackReference) \
421  VISIT(Empty) \
422  VISIT(Text)
423 
424 
425 #define FORWARD_DECLARE(Name) class RegExp##Name;
427 #undef FORWARD_DECLARE
428 
429 
430 class TextElement {
431  public:
434  explicit TextElement(Type t) : type(t), cp_offset(-1) { }
435  static TextElement Atom(RegExpAtom* atom);
436  static TextElement CharClass(RegExpCharacterClass* char_class);
437  int length();
439  union {
442  } data;
444 };
445 
446 
447 class Trace;
448 
449 
450 struct NodeInfo {
457  at_end(false),
458  visited(false),
460 
461  // Returns true if the interests and assumptions of this node
462  // matches the given one.
463  bool Matches(NodeInfo* that) {
464  return (at_end == that->at_end) &&
468  }
469 
470  // Updates the interests of this node given the interests of the
471  // node preceding it.
473  at_end |= that->at_end;
477  }
478 
479  bool HasLookbehind() {
480  return follows_word_interest ||
483  }
484 
485  // Sets the interests of this node to include the interests of the
486  // following node.
491  }
492 
494  being_analyzed = false;
495  been_analyzed = false;
496  }
497 
498  bool being_analyzed: 1;
499  bool been_analyzed: 1;
500 
501  // These bits are set of this node has to know what the preceding
502  // character was.
506 
507  bool at_end: 1;
508  bool visited: 1;
510 };
511 
512 
513 // Details of a quick mask-compare check that can look ahead in the
514 // input stream.
516  public:
518  : characters_(0),
519  mask_(0),
520  value_(0),
521  cannot_match_(false) { }
523  : characters_(characters),
524  mask_(0),
525  value_(0),
526  cannot_match_(false) { }
527  bool Rationalize(bool ascii);
528  // Merge in the information from another branch of an alternation.
529  void Merge(QuickCheckDetails* other, int from_index);
530  // Advance the current position by some amount.
531  void Advance(int by, bool ascii);
532  void Clear();
533  bool cannot_match() { return cannot_match_; }
534  void set_cannot_match() { cannot_match_ = true; }
535  struct Position {
540  };
541  int characters() { return characters_; }
542  void set_characters(int characters) { characters_ = characters; }
543  Position* positions(int index) {
544  ASSERT(index >= 0);
545  ASSERT(index < characters_);
546  return positions_ + index;
547  }
548  uint32_t mask() { return mask_; }
549  uint32_t value() { return value_; }
550 
551  private:
552  // How many characters do we have quick check information from. This is
553  // the same for all branches of a choice node.
554  int characters_;
555  Position positions_[4];
556  // These values are the condensate of the above array after Rationalize().
557  uint32_t mask_;
558  uint32_t value_;
559  // If set to true, there is no way this quick check can match at all.
560  // E.g., if it requires to be at the start of the input, and isn't.
561  bool cannot_match_;
562 };
563 
564 
566 
567 
568 class RegExpNode: public ZoneObject {
569  public:
570  explicit RegExpNode(Zone* zone)
571  : replacement_(NULL), trace_count_(0), zone_(zone) {
572  bm_info_[0] = bm_info_[1] = NULL;
573  }
574  virtual ~RegExpNode();
575  virtual void Accept(NodeVisitor* visitor) = 0;
576  // Generates a goto to this node or actually generates the code at this point.
577  virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
578  // How many characters must this node consume at a minimum in order to
579  // succeed. If we have found at least 'still_to_find' characters that
580  // must be consumed there is no need to ask any following nodes whether
581  // they are sure to eat any more characters. The not_at_start argument is
582  // used to indicate that we know we are not at the start of the input. In
583  // this case anchored branches will always fail and can be ignored when
584  // determining how many characters are consumed on success.
585  virtual int EatsAtLeast(int still_to_find,
586  int recursion_depth,
587  bool not_at_start) = 0;
588  // Emits some quick code that checks whether the preloaded characters match.
589  // Falls through on certain failure, jumps to the label on possible success.
590  // If the node cannot make a quick check it does nothing and returns false.
591  bool EmitQuickCheck(RegExpCompiler* compiler,
592  Trace* trace,
593  bool preload_has_checked_bounds,
594  Label* on_possible_success,
595  QuickCheckDetails* details_return,
596  bool fall_through_on_failure);
597  // For a given number of characters this returns a mask and a value. The
598  // next n characters are anded with the mask and compared with the value.
599  // A comparison failure indicates the node cannot match the next n characters.
600  // A comparison success indicates the node may match.
601  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
602  RegExpCompiler* compiler,
603  int characters_filled_in,
604  bool not_at_start) = 0;
605  static const int kNodeIsTooComplexForGreedyLoops = -1;
607  // Only returns the successor for a text node of length 1 that matches any
608  // character and that has no guards on it.
610  RegExpCompiler* compiler) {
611  return NULL;
612  }
613 
614  // Collects information on the possible code units (mod 128) that can match if
615  // we look forward. This is used for a Boyer-Moore-like string searching
616  // implementation. TODO(erikcorry): This should share more code with
617  // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
618  // the number of nodes we are willing to look at in order to create this data.
619  static const int kFillInBMBudget = 200;
620  virtual void FillInBMInfo(int offset,
621  int recursion_depth,
622  int budget,
624  bool not_at_start) {
625  UNREACHABLE();
626  }
627 
628  // If we know that the input is ASCII then there are some nodes that can
629  // never match. This method returns a node that can be substituted for
630  // itself, or NULL if the node can never match.
631  virtual RegExpNode* FilterASCII(int depth) { return this; }
632  // Helper for FilterASCII.
634  ASSERT(info()->replacement_calculated);
635  return replacement_;
636  }
638  info()->replacement_calculated = true;
640  return replacement; // For convenience.
641  }
642 
643  // We want to avoid recalculating the lookahead info, so we store it on the
644  // node. Only info that is for this node is stored. We can tell that the
645  // info is for this node when offset == 0, so the information is calculated
646  // relative to this node.
647  void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
648  if (offset == 0) set_bm_info(not_at_start, bm);
649  }
650 
651  Label* label() { return &label_; }
652  // If non-generic code is generated for a node (i.e. the node is not at the
653  // start of the trace) then it cannot be reused. This variable sets a limit
654  // on how often we allow that to happen before we insist on starting a new
655  // trace and generating generic code for a node that can be reused by flushing
656  // the deferred actions in the current trace and generating a goto.
657  static const int kMaxCopiesCodeGenerated = 10;
658 
659  NodeInfo* info() { return &info_; }
660 
661  BoyerMooreLookahead* bm_info(bool not_at_start) {
662  return bm_info_[not_at_start ? 1 : 0];
663  }
664 
665  Zone* zone() const { return zone_; }
666 
667  protected:
670 
671  LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
672 
673  void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
674  bm_info_[not_at_start ? 1 : 0] = bm;
675  }
676 
677  private:
678  static const int kFirstCharBudget = 10;
679  Label label_;
680  NodeInfo info_;
681  // This variable keeps track of how many times code has been generated for
682  // this node (in different traces). We don't keep track of where the
683  // generated code is located unless the code is generated at the start of
684  // a trace, in which case it is generic and can be reused by flushing the
685  // deferred operations in the current trace and generating a goto.
686  int trace_count_;
687  BoyerMooreLookahead* bm_info_[2];
688 
689  Zone* zone_;
690 };
691 
692 
693 // A simple closed interval.
694 class Interval {
695  public:
696  Interval() : from_(kNone), to_(kNone) { }
697  Interval(int from, int to) : from_(from), to_(to) { }
699  if (that.from_ == kNone)
700  return *this;
701  else if (from_ == kNone)
702  return that;
703  else
704  return Interval(Min(from_, that.from_), Max(to_, that.to_));
705  }
706  bool Contains(int value) {
707  return (from_ <= value) && (value <= to_);
708  }
709  bool is_empty() { return from_ == kNone; }
710  int from() const { return from_; }
711  int to() const { return to_; }
712  static Interval Empty() { return Interval(); }
713  static const int kNone = -1;
714  private:
715  int from_;
716  int to_;
717 };
718 
719 
720 class SeqRegExpNode: public RegExpNode {
721  public:
723  : RegExpNode(on_success->zone()), on_success_(on_success) { }
724  RegExpNode* on_success() { return on_success_; }
725  void set_on_success(RegExpNode* node) { on_success_ = node; }
726  virtual RegExpNode* FilterASCII(int depth);
727  virtual void FillInBMInfo(int offset,
728  int recursion_depth,
729  int budget,
731  bool not_at_start) {
732  on_success_->FillInBMInfo(
733  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
734  if (offset == 0) set_bm_info(not_at_start, bm);
735  }
736 
737  protected:
738  RegExpNode* FilterSuccessor(int depth);
739 
740  private:
741  RegExpNode* on_success_;
742 };
743 
744 
745 class ActionNode: public SeqRegExpNode {
746  public:
747  enum Type {
755  };
756  static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
758  static ActionNode* StorePosition(int reg,
759  bool is_capture,
762  static ActionNode* BeginSubmatch(int stack_pointer_reg,
763  int position_reg,
765  static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
766  int restore_reg,
767  int clear_capture_count,
768  int clear_capture_from,
772  int repetition_limit,
774  virtual void Accept(NodeVisitor* visitor);
775  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
776  virtual int EatsAtLeast(int still_to_find,
777  int recursion_depth,
778  bool not_at_start);
780  RegExpCompiler* compiler,
781  int filled_in,
782  bool not_at_start) {
784  details, compiler, filled_in, not_at_start);
785  }
786  virtual void FillInBMInfo(int offset,
787  int recursion_depth,
788  int budget,
790  bool not_at_start);
791  Type type() { return type_; }
792  // TODO(erikcorry): We should allow some action nodes in greedy loops.
794 
795  private:
796  union {
797  struct {
798  int reg;
799  int value;
801  struct {
802  int reg;
804  struct {
805  int reg;
808  struct {
813  } u_submatch;
814  struct {
819  struct {
821  int range_to;
823  } data_;
824  ActionNode(Type type, RegExpNode* on_success)
825  : SeqRegExpNode(on_success),
826  type_(type) { }
827  Type type_;
828  friend class DotPrinter;
829 };
830 
831 
832 class TextNode: public SeqRegExpNode {
833  public:
836  : SeqRegExpNode(on_success),
837  elms_(elms) { }
840  : SeqRegExpNode(on_success),
841  elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
842  elms_->Add(TextElement::CharClass(that), zone());
843  }
844  virtual void Accept(NodeVisitor* visitor);
845  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
846  virtual int EatsAtLeast(int still_to_find,
847  int recursion_depth,
848  bool not_at_start);
849  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
850  RegExpCompiler* compiler,
851  int characters_filled_in,
852  bool not_at_start);
853  ZoneList<TextElement>* elements() { return elms_; }
854  void MakeCaseIndependent(bool is_ascii);
855  virtual int GreedyLoopTextLength();
857  RegExpCompiler* compiler);
858  virtual void FillInBMInfo(int offset,
859  int recursion_depth,
860  int budget,
862  bool not_at_start);
863  void CalculateOffsets();
864  virtual RegExpNode* FilterASCII(int depth);
865 
866  private:
867  enum TextEmitPassType {
868  NON_ASCII_MATCH, // Check for characters that can't match.
869  SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
870  NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
871  CASE_CHARACTER_MATCH, // Case-independent single character check.
872  CHARACTER_CLASS_MATCH // Character class.
873  };
874  static bool SkipPass(int pass, bool ignore_case);
875  static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
876  static const int kLastPass = CHARACTER_CLASS_MATCH;
877  void TextEmitPass(RegExpCompiler* compiler,
878  TextEmitPassType pass,
879  bool preloaded,
880  Trace* trace,
881  bool first_element_checked,
882  int* checked_up_to);
883  int Length();
884  ZoneList<TextElement>* elms_;
885 };
886 
887 
889  public:
896  };
898  return new(on_success->zone()) AssertionNode(AT_END, on_success);
899  }
901  return new(on_success->zone()) AssertionNode(AT_START, on_success);
902  }
904  return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
905  }
907  return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
908  }
910  return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
911  }
912  virtual void Accept(NodeVisitor* visitor);
913  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
914  virtual int EatsAtLeast(int still_to_find,
915  int recursion_depth,
916  bool not_at_start);
917  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
918  RegExpCompiler* compiler,
919  int filled_in,
920  bool not_at_start);
921  virtual void FillInBMInfo(int offset,
922  int recursion_depth,
923  int budget,
925  bool not_at_start);
926  AssertionNodeType type() { return type_; }
927  void set_type(AssertionNodeType type) { type_ = type; }
928 
929  private:
930  void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
931  enum IfPrevious { kIsNonWord, kIsWord };
932  void BacktrackIfPrevious(RegExpCompiler* compiler,
933  Trace* trace,
934  IfPrevious backtrack_if_previous);
935  AssertionNode(AssertionNodeType t, RegExpNode* on_success)
936  : SeqRegExpNode(on_success), type_(t) { }
937  AssertionNodeType type_;
938 };
939 
940 
942  public:
943  BackReferenceNode(int start_reg,
944  int end_reg,
946  : SeqRegExpNode(on_success),
947  start_reg_(start_reg),
948  end_reg_(end_reg) { }
949  virtual void Accept(NodeVisitor* visitor);
950  int start_register() { return start_reg_; }
951  int end_register() { return end_reg_; }
952  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
953  virtual int EatsAtLeast(int still_to_find,
954  int recursion_depth,
955  bool not_at_start);
957  RegExpCompiler* compiler,
958  int characters_filled_in,
959  bool not_at_start) {
960  return;
961  }
962  virtual void FillInBMInfo(int offset,
963  int recursion_depth,
964  int budget,
966  bool not_at_start);
967 
968  private:
969  int start_reg_;
970  int end_reg_;
971 };
972 
973 
974 class EndNode: public RegExpNode {
975  public:
977  explicit EndNode(Action action, Zone* zone)
978  : RegExpNode(zone), action_(action) { }
979  virtual void Accept(NodeVisitor* visitor);
980  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
981  virtual int EatsAtLeast(int still_to_find,
982  int recursion_depth,
983  bool not_at_start) { return 0; }
985  RegExpCompiler* compiler,
986  int characters_filled_in,
987  bool not_at_start) {
988  // Returning 0 from EatsAtLeast should ensure we never get here.
989  UNREACHABLE();
990  }
991  virtual void FillInBMInfo(int offset,
992  int recursion_depth,
993  int budget,
995  bool not_at_start) {
996  // Returning 0 from EatsAtLeast should ensure we never get here.
997  UNREACHABLE();
998  }
999 
1000  private:
1001  Action action_;
1002 };
1003 
1004 
1006  public:
1007  NegativeSubmatchSuccess(int stack_pointer_reg,
1008  int position_reg,
1009  int clear_capture_count,
1010  int clear_capture_start,
1011  Zone* zone)
1013  stack_pointer_register_(stack_pointer_reg),
1014  current_position_register_(position_reg),
1015  clear_capture_count_(clear_capture_count),
1016  clear_capture_start_(clear_capture_start) { }
1017  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1018 
1019  private:
1020  int stack_pointer_register_;
1021  int current_position_register_;
1022  int clear_capture_count_;
1023  int clear_capture_start_;
1024 };
1025 
1026 
1027 class Guard: public ZoneObject {
1028  public:
1029  enum Relation { LT, GEQ };
1031  : reg_(reg),
1032  op_(op),
1033  value_(value) { }
1034  int reg() { return reg_; }
1035  Relation op() { return op_; }
1036  int value() { return value_; }
1037 
1038  private:
1039  int reg_;
1040  Relation op_;
1041  int value_;
1042 };
1043 
1044 
1046  public:
1047  explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
1048  void AddGuard(Guard* guard, Zone* zone);
1049  RegExpNode* node() { return node_; }
1050  void set_node(RegExpNode* node) { node_ = node; }
1051  ZoneList<Guard*>* guards() { return guards_; }
1052 
1053  private:
1054  RegExpNode* node_;
1055  ZoneList<Guard*>* guards_;
1056 };
1057 
1058 
1059 class AlternativeGeneration;
1060 
1061 
1062 class ChoiceNode: public RegExpNode {
1063  public:
1064  explicit ChoiceNode(int expected_size, Zone* zone)
1065  : RegExpNode(zone),
1066  alternatives_(new(zone)
1067  ZoneList<GuardedAlternative>(expected_size, zone)),
1068  table_(NULL),
1069  not_at_start_(false),
1070  being_calculated_(false) { }
1071  virtual void Accept(NodeVisitor* visitor);
1073  alternatives()->Add(node, zone());
1074  }
1076  DispatchTable* GetTable(bool ignore_case);
1077  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1078  virtual int EatsAtLeast(int still_to_find,
1079  int recursion_depth,
1080  bool not_at_start);
1081  int EatsAtLeastHelper(int still_to_find,
1082  int recursion_depth,
1083  RegExpNode* ignore_this_node,
1084  bool not_at_start);
1085  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1086  RegExpCompiler* compiler,
1087  int characters_filled_in,
1088  bool not_at_start);
1089  virtual void FillInBMInfo(int offset,
1090  int recursion_depth,
1091  int budget,
1092  BoyerMooreLookahead* bm,
1093  bool not_at_start);
1094 
1095  bool being_calculated() { return being_calculated_; }
1096  bool not_at_start() { return not_at_start_; }
1097  void set_not_at_start() { not_at_start_ = true; }
1098  void set_being_calculated(bool b) { being_calculated_ = b; }
1099  virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1100  virtual RegExpNode* FilterASCII(int depth);
1101 
1102  protected:
1105 
1106  private:
1108  friend class Analysis;
1109  void GenerateGuard(RegExpMacroAssembler* macro_assembler,
1110  Guard* guard,
1111  Trace* trace);
1112  int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
1113  void EmitOutOfLineContinuation(RegExpCompiler* compiler,
1114  Trace* trace,
1115  GuardedAlternative alternative,
1116  AlternativeGeneration* alt_gen,
1117  int preload_characters,
1118  bool next_expects_preload);
1119  DispatchTable* table_;
1120  // If true, this node is never checked at the start of the input.
1121  // Allows a new trace to start with at_start() set to false.
1122  bool not_at_start_;
1123  bool being_calculated_;
1124 };
1125 
1126 
1128  public:
1130  GuardedAlternative then_do_this,
1131  Zone* zone)
1132  : ChoiceNode(2, zone) {
1133  AddAlternative(this_must_fail);
1134  AddAlternative(then_do_this);
1135  }
1136  virtual int EatsAtLeast(int still_to_find,
1137  int recursion_depth,
1138  bool not_at_start);
1139  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1140  RegExpCompiler* compiler,
1141  int characters_filled_in,
1142  bool not_at_start);
1143  virtual void FillInBMInfo(int offset,
1144  int recursion_depth,
1145  int budget,
1146  BoyerMooreLookahead* bm,
1147  bool not_at_start) {
1148  alternatives_->at(1).node()->FillInBMInfo(
1149  offset, recursion_depth + 1, budget - 1, bm, not_at_start);
1150  if (offset == 0) set_bm_info(not_at_start, bm);
1151  }
1152  // For a negative lookahead we don't emit the quick check for the
1153  // alternative that is expected to fail. This is because quick check code
1154  // starts by loading enough characters for the alternative that takes fewest
1155  // characters, but on a negative lookahead the negative branch did not take
1156  // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1157  virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1158  virtual RegExpNode* FilterASCII(int depth);
1159 };
1160 
1161 
1163  public:
1165  : ChoiceNode(2, zone),
1166  loop_node_(NULL),
1167  continue_node_(NULL),
1168  body_can_be_zero_length_(body_can_be_zero_length) { }
1171  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1172  virtual int EatsAtLeast(int still_to_find,
1173  int recursion_depth,
1174  bool not_at_start);
1175  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1176  RegExpCompiler* compiler,
1177  int characters_filled_in,
1178  bool not_at_start);
1179  virtual void FillInBMInfo(int offset,
1180  int recursion_depth,
1181  int budget,
1182  BoyerMooreLookahead* bm,
1183  bool not_at_start);
1184  RegExpNode* loop_node() { return loop_node_; }
1185  RegExpNode* continue_node() { return continue_node_; }
1186  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1187  virtual void Accept(NodeVisitor* visitor);
1188  virtual RegExpNode* FilterASCII(int depth);
1189 
1190  private:
1191  // AddAlternative is made private for loop nodes because alternatives
1192  // should not be added freely, we need to keep track of which node
1193  // goes back to the node itself.
1194  void AddAlternative(GuardedAlternative node) {
1196  }
1197 
1198  RegExpNode* loop_node_;
1199  RegExpNode* continue_node_;
1200  bool body_can_be_zero_length_;
1201 };
1202 
1203 
1204 // Improve the speed that we scan for an initial point where a non-anchored
1205 // regexp can match by using a Boyer-Moore-like table. This is done by
1206 // identifying non-greedy non-capturing loops in the nodes that eat any
1207 // character one at a time. For example in the middle of the regexp
1208 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
1209 // inserted at the start of any non-anchored regexp.
1210 //
1211 // When we have found such a loop we look ahead in the nodes to find the set of
1212 // characters that can come at given distances. For example for the regexp
1213 // /.?foo/ we know that there are at least 3 characters ahead of us, and the
1214 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1215 // the lookahead info where the set of characters is reasonably constrained. In
1216 // our example this is from index 1 to 2 (0 is not constrained). We can now
1217 // look 3 characters ahead and if we don't find one of [f, o] (the union of
1218 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1219 //
1220 // For Unicode input strings we do the same, but modulo 128.
1221 //
1222 // We also look at the first string fed to the regexp and use that to get a hint
1223 // of the character frequencies in the inputs. This affects the assessment of
1224 // whether the set of characters is 'reasonably constrained'.
1225 //
1226 // We also have another lookahead mechanism (called quick check in the code),
1227 // which uses a wide load of multiple characters followed by a mask and compare
1228 // to determine whether a match is possible at this point.
1230  kNotYet = 0,
1233  kLatticeUnknown = 3 // Can also mean both in and out.
1234 };
1235 
1236 
1238  return static_cast<ContainedInLattice>(a | b);
1239 }
1240 
1241 
1243  const int* ranges,
1244  int ranges_size,
1245  Interval new_range);
1246 
1247 
1249  public:
1251  : map_(new(zone) ZoneList<bool>(kMapSize, zone)),
1252  map_count_(0),
1253  w_(kNotYet),
1254  s_(kNotYet),
1255  d_(kNotYet),
1256  surrogate_(kNotYet) {
1257  for (int i = 0; i < kMapSize; i++) {
1258  map_->Add(false, zone);
1259  }
1260  }
1261 
1262  bool& at(int i) { return map_->at(i); }
1263 
1264  static const int kMapSize = 128;
1265  static const int kMask = kMapSize - 1;
1266 
1267  int map_count() const { return map_count_; }
1268 
1269  void Set(int character);
1270  void SetInterval(const Interval& interval);
1271  void SetAll();
1272  bool is_non_word() { return w_ == kLatticeOut; }
1273  bool is_word() { return w_ == kLatticeIn; }
1274 
1275  private:
1276  ZoneList<bool>* map_;
1277  int map_count_; // Number of set bits in the map.
1278  ContainedInLattice w_; // The \w character class.
1279  ContainedInLattice s_; // The \s character class.
1280  ContainedInLattice d_; // The \d character class.
1281  ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
1282 };
1283 
1284 
1286  public:
1288 
1289  int length() { return length_; }
1290  int max_char() { return max_char_; }
1291  RegExpCompiler* compiler() { return compiler_; }
1292 
1293  int Count(int map_number) {
1294  return bitmaps_->at(map_number)->map_count();
1295  }
1296 
1297  BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
1298 
1299  void Set(int map_number, int character) {
1300  if (character > max_char_) return;
1301  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1302  info->Set(character);
1303  }
1304 
1305  void SetInterval(int map_number, const Interval& interval) {
1306  if (interval.from() > max_char_) return;
1307  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1308  if (interval.to() > max_char_) {
1309  info->SetInterval(Interval(interval.from(), max_char_));
1310  } else {
1311  info->SetInterval(interval);
1312  }
1313  }
1314 
1315  void SetAll(int map_number) {
1316  bitmaps_->at(map_number)->SetAll();
1317  }
1318 
1319  void SetRest(int from_map) {
1320  for (int i = from_map; i < length_; i++) SetAll(i);
1321  }
1323 
1324  private:
1325  // This is the value obtained by EatsAtLeast. If we do not have at least this
1326  // many characters left in the sample string then the match is bound to fail.
1327  // Therefore it is OK to read a character this far ahead of the current match
1328  // point.
1329  int length_;
1330  RegExpCompiler* compiler_;
1331  // 0x7f for ASCII, 0xffff for UTF-16.
1332  int max_char_;
1334 
1335  int GetSkipTable(int min_lookahead,
1336  int max_lookahead,
1337  Handle<ByteArray> boolean_skip_table);
1338  bool FindWorthwhileInterval(int* from, int* to);
1339  int FindBestInterval(
1340  int max_number_of_chars, int old_biggest_points, int* from, int* to);
1341 };
1342 
1343 
1344 // There are many ways to generate code for a node. This class encapsulates
1345 // the current way we should be generating. In other words it encapsulates
1346 // the current state of the code generator. The effect of this is that we
1347 // generate code for paths that the matcher can take through the regular
1348 // expression. A given node in the regexp can be code-generated several times
1349 // as it can be part of several traces. For example for the regexp:
1350 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
1351 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
1352 // to match foo is generated only once (the traces have a common prefix). The
1353 // code to store the capture is deferred and generated (twice) after the places
1354 // where baz has been matched.
1355 class Trace {
1356  public:
1357  // A value for a property that is either known to be true, know to be false,
1358  // or not known.
1359  enum TriBool {
1360  UNKNOWN = -1, FALSE = 0, TRUE = 1
1361  };
1362 
1364  public:
1366  : type_(type), reg_(reg), next_(NULL) { }
1367  DeferredAction* next() { return next_; }
1368  bool Mentions(int reg);
1369  int reg() { return reg_; }
1370  ActionNode::Type type() { return type_; }
1371  private:
1372  ActionNode::Type type_;
1373  int reg_;
1374  DeferredAction* next_;
1375  friend class Trace;
1376  };
1377 
1379  public:
1381  : DeferredAction(ActionNode::STORE_POSITION, reg),
1382  cp_offset_(trace->cp_offset()),
1383  is_capture_(is_capture) { }
1384  int cp_offset() { return cp_offset_; }
1385  bool is_capture() { return is_capture_; }
1386  private:
1387  int cp_offset_;
1388  bool is_capture_;
1389  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
1390  };
1391 
1393  public:
1395  : DeferredAction(ActionNode::SET_REGISTER, reg),
1396  value_(value) { }
1397  int value() { return value_; }
1398  private:
1399  int value_;
1400  };
1401 
1403  public:
1405  : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
1406  range_(range) { }
1407  Interval range() { return range_; }
1408  private:
1409  Interval range_;
1410  };
1411 
1413  public:
1415  : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
1416  };
1417 
1419  : cp_offset_(0),
1420  actions_(NULL),
1421  backtrack_(NULL),
1422  stop_node_(NULL),
1423  loop_label_(NULL),
1424  characters_preloaded_(0),
1425  bound_checked_up_to_(0),
1426  flush_budget_(100),
1427  at_start_(UNKNOWN) { }
1428 
1429  // End the trace. This involves flushing the deferred actions in the trace
1430  // and pushing a backtrack location onto the backtrack stack. Once this is
1431  // done we can start a new trace or go to one that has already been
1432  // generated.
1433  void Flush(RegExpCompiler* compiler, RegExpNode* successor);
1434  int cp_offset() { return cp_offset_; }
1435  DeferredAction* actions() { return actions_; }
1436  // A trivial trace is one that has no deferred actions or other state that
1437  // affects the assumptions used when generating code. There is no recorded
1438  // backtrack location in a trivial trace, so with a trivial trace we will
1439  // generate code that, on a failure to match, gets the backtrack location
1440  // from the backtrack stack rather than using a direct jump instruction. We
1441  // always start code generation with a trivial trace and non-trivial traces
1442  // are created as we emit code for nodes or add to the list of deferred
1443  // actions in the trace. The location of the code generated for a node using
1444  // a trivial trace is recorded in a label in the node so that gotos can be
1445  // generated to that code.
1446  bool is_trivial() {
1447  return backtrack_ == NULL &&
1448  actions_ == NULL &&
1449  cp_offset_ == 0 &&
1450  characters_preloaded_ == 0 &&
1451  bound_checked_up_to_ == 0 &&
1452  quick_check_performed_.characters() == 0 &&
1453  at_start_ == UNKNOWN;
1454  }
1455  TriBool at_start() { return at_start_; }
1456  void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
1457  Label* backtrack() { return backtrack_; }
1458  Label* loop_label() { return loop_label_; }
1459  RegExpNode* stop_node() { return stop_node_; }
1460  int characters_preloaded() { return characters_preloaded_; }
1461  int bound_checked_up_to() { return bound_checked_up_to_; }
1462  int flush_budget() { return flush_budget_; }
1463  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1464  bool mentions_reg(int reg);
1465  // Returns true if a deferred position store exists to the specified
1466  // register and stores the offset in the out-parameter. Otherwise
1467  // returns false.
1468  bool GetStoredPosition(int reg, int* cp_offset);
1469  // These set methods and AdvanceCurrentPositionInTrace should be used only on
1470  // new traces - the intention is that traces are immutable after creation.
1471  void add_action(DeferredAction* new_action) {
1472  ASSERT(new_action->next_ == NULL);
1473  new_action->next_ = actions_;
1474  actions_ = new_action;
1475  }
1476  void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1477  void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1478  void set_loop_label(Label* label) { loop_label_ = label; }
1479  void set_characters_preloaded(int count) { characters_preloaded_ = count; }
1480  void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
1481  void set_flush_budget(int to) { flush_budget_ = to; }
1483  quick_check_performed_ = *d;
1484  }
1486  void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
1487 
1488  private:
1489  int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
1490  void PerformDeferredActions(RegExpMacroAssembler* macro,
1491  int max_register,
1492  OutSet& affected_registers,
1493  OutSet* registers_to_pop,
1494  OutSet* registers_to_clear,
1495  Zone* zone);
1496  void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1497  int max_register,
1498  OutSet& registers_to_pop,
1499  OutSet& registers_to_clear);
1500  int cp_offset_;
1501  DeferredAction* actions_;
1502  Label* backtrack_;
1503  RegExpNode* stop_node_;
1504  Label* loop_label_;
1505  int characters_preloaded_;
1506  int bound_checked_up_to_;
1507  QuickCheckDetails quick_check_performed_;
1508  int flush_budget_;
1509  TriBool at_start_;
1510 };
1511 
1512 
1514  public:
1515  virtual ~NodeVisitor() { }
1516 #define DECLARE_VISIT(Type) \
1517  virtual void Visit##Type(Type##Node* that) = 0;
1519 #undef DECLARE_VISIT
1520  virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
1521 };
1522 
1523 
1524 // Node visitor used to add the start set of the alternatives to the
1525 // dispatch table of a choice node.
1527  public:
1529  Zone* zone)
1530  : table_(table),
1531  choice_index_(-1),
1532  ignore_case_(ignore_case),
1533  zone_(zone) { }
1534 
1535  void BuildTable(ChoiceNode* node);
1536 
1537  void AddRange(CharacterRange range) {
1538  table()->AddRange(range, choice_index_, zone_);
1539  }
1540 
1541  void AddInverse(ZoneList<CharacterRange>* ranges);
1542 
1543 #define DECLARE_VISIT(Type) \
1544  virtual void Visit##Type(Type##Node* that);
1546 #undef DECLARE_VISIT
1547 
1548  DispatchTable* table() { return table_; }
1549  void set_choice_index(int value) { choice_index_ = value; }
1550 
1551  protected:
1556 };
1557 
1558 
1559 // Assertion propagation moves information about assertions such as
1560 // \b to the affected nodes. For instance, in /.\b./ information must
1561 // be propagated to the first '.' that whatever follows needs to know
1562 // if it matched a word or a non-word, and to the second '.' that it
1563 // has to check if it succeeds a word or non-word. In this case the
1564 // result will be something like:
1565 //
1566 // +-------+ +------------+
1567 // | . | | . |
1568 // +-------+ ---> +------------+
1569 // | word? | | check word |
1570 // +-------+ +------------+
1571 class Analysis: public NodeVisitor {
1572  public:
1573  Analysis(bool ignore_case, bool is_ascii)
1574  : ignore_case_(ignore_case),
1575  is_ascii_(is_ascii),
1576  error_message_(NULL) { }
1577  void EnsureAnalyzed(RegExpNode* node);
1578 
1579 #define DECLARE_VISIT(Type) \
1580  virtual void Visit##Type(Type##Node* that);
1582 #undef DECLARE_VISIT
1583  virtual void VisitLoopChoice(LoopChoiceNode* that);
1584 
1585  bool has_failed() { return error_message_ != NULL; }
1586  const char* error_message() {
1587  ASSERT(error_message_ != NULL);
1588  return error_message_;
1589  }
1590  void fail(const char* error_message) {
1591  error_message_ = error_message;
1592  }
1593 
1594  private:
1595  bool ignore_case_;
1596  bool is_ascii_;
1597  const char* error_message_;
1598 
1599  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
1600 };
1601 
1602 
1605  : tree(NULL),
1606  node(NULL),
1607  simple(true),
1609  capture_count(0) { }
1612  bool simple;
1616 };
1617 
1618 
1619 class RegExpEngine: public AllStatic {
1620  public:
1622  explicit CompilationResult(const char* error_message)
1623  : error_message(error_message),
1624  code(HEAP->the_hole_value()),
1625  num_registers(0) {}
1626  CompilationResult(Object* code, int registers)
1627  : error_message(NULL),
1628  code(code),
1629  num_registers(registers) {}
1630  const char* error_message;
1633  };
1634 
1636  bool ignore_case,
1637  bool global,
1638  bool multiline,
1639  Handle<String> pattern,
1640  Handle<String> sample_subject,
1641  bool is_ascii, Zone* zone);
1642 
1643  static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1644 };
1645 
1646 
1647 } } // namespace v8::internal
1648 
1649 #endif // V8_JSREGEXP_H_
#define FORWARD_DECLARE(Name)
Definition: jsregexp.h:425
void SetInterval(const Interval &interval)
Definition: jsregexp.cc:3663
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5707
AssertionNodeType type()
Definition: jsregexp.h:926
Analysis(bool ignore_case, bool is_ascii)
Definition: jsregexp.h:1573
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5824
RegExpNode * stop_node()
Definition: jsregexp.h:1459
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)
Definition: jsregexp.h:411
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
Definition: jsregexp.cc:3513
virtual void Accept(NodeVisitor *visitor)
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5279
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1157
static AssertionNode * AtStart(RegExpNode *on_success)
Definition: jsregexp.h:900
void SetInterval(int map_number, const Interval &interval)
Definition: jsregexp.h:1305
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
Definition: jsregexp.cc:1554
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:509
void set(int index, Object *value)
Definition: objects-inl.h:1757
RegExpNode * continue_node()
Definition: jsregexp.h:1185
TextNode(RegExpCharacterClass *that, RegExpNode *on_success)
Definition: jsregexp.h:838
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1496
DeferredCapture(int reg, bool is_capture, Trace *trace)
Definition: jsregexp.h:1380
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
Definition: jsregexp.cc:1594
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2416
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3378
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:1143
void set_characters(int characters)
Definition: jsregexp.h:542
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2426
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.h:609
static Smi * FromInt(int value)
Definition: objects-inl.h:981
struct v8::internal::ActionNode::@10::@12 u_increment_register
bool Contains(int value)
Definition: jsregexp.h:706
int to() const
Definition: jsregexp.h:711
void set_at_start(bool at_start)
Definition: jsregexp.h:1456
static const int kLastCaptureCount
Definition: jsregexp.h:182
GlobalCache(Handle< JSRegExp > regexp, Handle< String > subject, bool is_global, Isolate *isolate)
Definition: jsregexp.cc:706
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
Definition: jsregexp.cc:2536
void set_loop_label(Label *label)
Definition: jsregexp.h:1478
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
Definition: jsregexp.cc:1620
T Max(T a, T b)
Definition: utils.h:222
Position * positions(int index)
Definition: jsregexp.h:543
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.cc:3485
RegExpNode(Zone *zone)
Definition: jsregexp.h:570
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2317
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1523
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Definition: jsregexp.cc:66
static TextElement CharClass(RegExpCharacterClass *char_class)
Definition: jsregexp.cc:1003
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2854
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:793
void AddFromFollowing(NodeInfo *that)
Definition: jsregexp.h:487
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
Definition: jsregexp.cc:1565
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5618
uc16 to()
Definition: jsregexp.h:355
T & at(int i) const
Definition: list.h:90
static int IrregexpNumberOfCaptures(FixedArray *re)
Definition: jsregexp.cc:494
void set_to(uc16 value)
Definition: jsregexp.h:356
int int32_t
Definition: unicode.cc:47
RegExpNode * replacement_
Definition: jsregexp.h:669
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5507
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2988
void EnsureAnalyzed(RegExpNode *node)
Definition: jsregexp.cc:5723
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2481
void set_flush_budget(int to)
Definition: jsregexp.h:1481
CompilationResult(Object *code, int registers)
Definition: jsregexp.h:1626
static CharacterRange Everything()
Definition: jsregexp.h:275
DispatchTableConstructor(DispatchTable *table, bool ignore_case, Zone *zone)
Definition: jsregexp.h:1528
#define DECLARE_VISIT(Type)
Definition: jsregexp.h:1579
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1099
void AddValue(int value, Zone *zone)
Definition: jsregexp.h:357
bool Get(unsigned value)
Definition: jsregexp.cc:5604
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:727
#define ASSERT(condition)
Definition: checks.h:270
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
Definition: jsregexp.cc:1573
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:991
void set_characters_preloaded(int count)
Definition: jsregexp.h:1479
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2361
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
Definition: jsregexp.cc:3437
const char * error_message()
Definition: jsregexp.h:1586
void BuildTable(ChoiceNode *node)
Definition: jsregexp.cc:5936
RegExpCompiler * compiler()
Definition: jsregexp.h:1291
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2491
Interval(int from, int to)
Definition: jsregexp.h:697
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:956
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
Definition: jsregexp.cc:3816
ZoneList< TextElement > * elements()
Definition: jsregexp.h:853
DeferredSetRegister(int reg, int value)
Definition: jsregexp.h:1394
void set_type(AssertionNodeType type)
Definition: jsregexp.h:927
void SetAll(int map_number)
Definition: jsregexp.h:1315
BoyerMoorePositionInfo * at(int i)
Definition: jsregexp.h:1297
static Interval Empty()
Definition: jsregexp.h:712
static void SetLastSubject(FixedArray *array, String *to)
Definition: jsregexp.h:207
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
Definition: jsregexp.cc:5544
static const int kLastSubjectOffset
Definition: jsregexp.h:191
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:340
static Smi * cast(Object *object)
static const int kRegExpExecutableMemoryLimit
Definition: jsregexp.h:236
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
Definition: jsregexp.cc:1605
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5314
struct v8::internal::ActionNode::@10::@14 u_submatch
static const int kLastCaptureCountOffset
Definition: jsregexp.h:189
RegExpCharacterClass * u_char_class
Definition: jsregexp.h:441
RegExpNode * FilterSuccessor(int depth)
Definition: jsregexp.cc:2846
static void SetLastInput(FixedArray *array, String *to)
Definition: jsregexp.h:211
GuardedAlternative(RegExpNode *node)
Definition: jsregexp.h:1047
union v8::internal::TextElement::@9 data
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5861
static const int kRegWxpCompiledLimit
Definition: jsregexp.h:237
void Advance(int by, bool ascii)
Definition: jsregexp.cc:2773
void Merge(QuickCheckDetails *other, int from_index)
Definition: jsregexp.cc:2794
Label * loop_label()
Definition: jsregexp.h:1458
#define UNREACHABLE()
Definition: checks.h:50
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject)
Definition: jsregexp.cc:527
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2387
NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, GuardedAlternative then_do_this, Zone *zone)
Definition: jsregexp.h:1129
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4346
static const unsigned kFirstLimit
Definition: jsregexp.h:321
void AddAlternative(GuardedAlternative node)
Definition: jsregexp.h:1072
static const int kLastSubject
Definition: jsregexp.h:183
static void SetLastCaptureCount(FixedArray *array, int to)
Definition: jsregexp.h:203
void fail(const char *error_message)
Definition: jsregexp.h:1590
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:271
RegExpNode * replacement()
Definition: jsregexp.h:633
CharacterRange(uc16 from, uc16 to)
Definition: jsregexp.h:264
Interval Union(Interval that)
Definition: jsregexp.h:698
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5840
void set_bound_checked_up_to(int to)
Definition: jsregexp.h:1480
void set_backtrack(Label *backtrack)
Definition: jsregexp.h:1476
virtual void Accept(NodeVisitor *visitor)
Definition: jsregexp.cc:1641
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:3019
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.h:779
ChoiceNode(int expected_size, Zone *zone)
Definition: jsregexp.h:1064
const int kPointerSize
Definition: globals.h:220
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2916
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2450
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5399
virtual void Accept(NodeVisitor *visitor)
QuickCheckDetails * quick_check_performed()
Definition: jsregexp.h:1463
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)=0
static Handle< JSArray > SetLastMatchInfo(Handle< JSArray > last_match_info, Handle< String > subject, int capture_count, int32_t *match)
Definition: jsregexp.cc:685
void AddGuard(Guard *guard, Zone *zone)
Definition: jsregexp.cc:1547
static int GetCapture(FixedArray *array, int index)
Definition: jsregexp.h:199
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:673
LoopChoiceNode(bool body_can_be_zero_length, Zone *zone)
Definition: jsregexp.h:1164
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5991
struct v8::internal::ActionNode::@10::@13 u_position_register
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2439
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.h:1520
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5333
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4224
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
Definition: jsregexp.cc:514
static const int kFirstCapture
Definition: jsregexp.h:185
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags, Zone *zone)
Definition: jsregexp.cc:168
struct v8::internal::ActionNode::@10::@15 u_empty_match_check
TriBool at_start()
Definition: jsregexp.h:1455
Label * backtrack()
Definition: jsregexp.h:1457
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:232
Entry(uc16 from, uc16 to, OutSet *out_set)
Definition: jsregexp.h:352
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.h:981
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.h:631
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:550
Zone * zone() const
Definition: jsregexp.h:665
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
Definition: jsregexp.h:1237
EndNode(Action action, Zone *zone)
Definition: jsregexp.h:977
void add_action(DeferredAction *new_action)
Definition: jsregexp.h:1471
static int Compare(uc16 a, uc16 b)
Definition: jsregexp.h:373
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3550
static int GetLastCaptureCount(FixedArray *array)
Definition: jsregexp.h:219
virtual void Accept(NodeVisitor *visitor)
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
Definition: jsregexp.cc:3696
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, int clear_capture_count, int clear_capture_start, Zone *zone)
Definition: jsregexp.h:1007
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
Definition: jsregexp.h:647
static const int kLastInput
Definition: jsregexp.h:184
void AddRange(CharacterRange range)
Definition: jsregexp.h:1537
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:620
void set_being_calculated(bool b)
Definition: jsregexp.h:1098
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
Definition: jsregexp.cc:256
void ResetCompilationState()
Definition: jsregexp.h:493
int characters_preloaded()
Definition: jsregexp.h:1460
DeferredAction * actions()
Definition: jsregexp.h:1435
activate correct semantics for inheriting readonliness false
Definition: flags.cc:141
static const int kNodeIsTooComplexForGreedyLoops
Definition: jsregexp.h:605
uc16 from()
Definition: jsregexp.h:354
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2372
DispatchTable * GetTable(bool ignore_case)
Definition: jsregexp.cc:1021
static int IrregexpMaxRegisterCount(FixedArray *re)
Definition: jsregexp.cc:483
RegExpNode * on_success()
Definition: jsregexp.h:724
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
Definition: jsregexp.cc:489
BoyerMooreLookahead * bm_info(bool not_at_start)
Definition: jsregexp.h:661
static int AtomExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:281
static Handle< String > ToString(Handle< Object > value)
static const int kLastMatchOverhead
Definition: jsregexp.h:186
Entry()
Definition: jsregexp.h:351
static const int kHeaderSize
Definition: objects.h:2296
void set_from(uc16 value)
Definition: jsregexp.h:280
static const int kNone
Definition: jsregexp.h:713
int bound_checked_up_to()
Definition: jsregexp.h:1461
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:606
void AddContinueAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3543
Guard(int reg, Relation op, int value)
Definition: jsregexp.h:1030
void set_to(uc16 value)
Definition: jsregexp.h:282
bool GetStoredPosition(int reg, int *cp_offset)
Definition: jsregexp.cc:1247
static AssertionNode * AfterNewline(RegExpNode *on_success)
Definition: jsregexp.h:909
int from() const
Definition: jsregexp.h:710
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
Definition: jsregexp.h:906
BackReferenceNode(int start_reg, int end_reg, RegExpNode *on_success)
Definition: jsregexp.h:943
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.cc:5795
static bool UsesNativeRegExp()
Definition: jsregexp.h:48
static const int kMaxCopiesCodeGenerated
Definition: jsregexp.h:657
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2611
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
static const int kFirstCaptureOffset
Definition: jsregexp.h:195
int Count(int map_number)
Definition: jsregexp.h:1293
uint16_t uc16
Definition: globals.h:259
static const int kLastInputOffset
Definition: jsregexp.h:193
void MakeCaseIndependent(bool is_ascii)
Definition: jsregexp.cc:3456
virtual void Accept(NodeVisitor *visitor)
ZoneList< GuardedAlternative > * alternatives()
Definition: jsregexp.h:1075
static const int kPayloadMask
Definition: jsregexp.h:306
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2837
void SetRest(int from_map)
Definition: jsregexp.h:1319
#define HEAP
Definition: isolate.h:1433
static const int kStartMarker
Definition: jsregexp.h:305
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
void ForEach(Callback *callback)
Definition: jsregexp.h:388
OutSet * Extend(unsigned value, Zone *zone)
Definition: jsregexp.cc:5573
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
ZoneList< GuardedAlternative > * alternatives_
Definition: jsregexp.h:1104
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
Definition: jsregexp.cc:111
virtual int GreedyLoopTextLength()
Definition: jsregexp.cc:3475
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:504
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3198
RegExpNode * set_replacement(RegExpNode *replacement)
Definition: jsregexp.h:637
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
Definition: jsregexp.cc:1584
void Set(int map_number, int character)
Definition: jsregexp.h:1299
static int IrregexpNumberOfRegisters(FixedArray *re)
Definition: jsregexp.cc:499
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2966
struct v8::internal::ActionNode::@10::@16 u_clear_captures
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:38
Definition: jsregexp.h:349
bool Rationalize(bool ascii)
Definition: jsregexp.cc:2512
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:984
TextNode(ZoneList< TextElement > *elms, RegExpNode *on_success)
Definition: jsregexp.h:834
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
Definition: flags.cc:301
Object * get(int index)
Definition: objects-inl.h:1737
void AddLoopAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3536
CompilationResult(const char *error_message)
Definition: jsregexp.h:1622
SeqRegExpNode(RegExpNode *on_success)
Definition: jsregexp.h:722
virtual void Accept(NodeVisitor *visitor)=0
RegExpNode * loop_node()
Definition: jsregexp.h:1184
int EatsAtLeastHelper(int still_to_find, int recursion_depth, RegExpNode *ignore_this_node, bool not_at_start)
Definition: jsregexp.cc:2462
static TextElement Atom(RegExpAtom *atom)
Definition: jsregexp.cc:996
ZoneList< Guard * > * guards()
Definition: jsregexp.h:1051
virtual void Accept(NodeVisitor *visitor)
ElementInSetsRelation
Definition: jsregexp.h:249
static const int kFillInBMBudget
Definition: jsregexp.h:619
struct v8::internal::ActionNode::@10::@11 u_store_register
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3962
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:637
bool Matches(NodeInfo *that)
Definition: jsregexp.h:463
#define FOR_EACH_NODE_TYPE(VISIT)
Definition: jsregexp.h:402
T Min(T a, T b)
Definition: utils.h:229
void set_on_success(RegExpNode *node)
Definition: jsregexp.h:725
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if expose natives in global object expose gc extension number of stack frames to capture disable builtin natives files print a stack trace if an assertion failure occurs use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations prepare for turning on always opt minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions automatically set the debug break flag when debugger commands are in the queue always cause a debug break before aborting maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print more details following each garbage collection print amount of external allocated memory after each time it is adjusted flush code that we expect not to use again before full gc do incremental marking steps track object counts and memory usage use caching Perform compaction on every full GC Never perform compaction on full GC testing only Compact code space on full incremental collections Default seed for initializing random allows verbose printing trace parsing and preparsing Check icache flushes in ARM and MIPS simulator Stack alingment in bytes in print stack trace when throwing exceptions randomize hashes to avoid predictable hash Fixed seed to use to hash property activate a timer that switches between V8 threads testing_bool_flag float flag Seed used for threading test randomness A filename with extra code to be included in the Print usage including flags
Definition: flags.cc:495
virtual void Accept(NodeVisitor *visitor)
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
Definition: jsregexp.cc:1435
int kUninitializedRegExpNodePlaceHolder
void set_quick_check_performed(QuickCheckDetails *d)
Definition: jsregexp.h:1482
DeferredAction(ActionNode::Type type, int reg)
Definition: jsregexp.h:1365
void InvalidateCurrentCharacter()
Definition: jsregexp.cc:3432
bool IsEverything(uc16 max)
Definition: jsregexp.h:284
static const Entry NoValue()
Definition: jsregexp.h:372
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5231
OutSet * out_set()
Definition: jsregexp.h:360
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:268
static AssertionNode * AtEnd(RegExpNode *on_success)
Definition: jsregexp.h:897
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2403
friend class DotPrinter
Definition: jsregexp.h:828
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.cc:3183
QuickCheckDetails(int characters)
Definition: jsregexp.h:522
void AddFromPreceding(NodeInfo *that)
Definition: jsregexp.h:472
void set_stop_node(RegExpNode *node)
Definition: jsregexp.h:1477
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:6042
virtual RegExpNode * FilterASCII(int depth)
Definition: jsregexp.cc:2899
virtual void FillInBMInfo(int offset, int recursion_depth, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:3001
static void SetCapture(FixedArray *array, int index, int to)
Definition: jsregexp.h:215
static AssertionNode * AtBoundary(RegExpNode *on_success)
Definition: jsregexp.h:903
bool mentions_reg(int reg)
Definition: jsregexp.cc:1236
void set_node(RegExpNode *node)
Definition: jsregexp.h:1050
const int MB
Definition: globals.h:208