v8  3.25.30(node0.11.13)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
jsregexp.h
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_JSREGEXP_H_
29 #define V8_JSREGEXP_H_
30 
31 #include "allocation.h"
32 #include "assembler.h"
33 #include "zone-inl.h"
34 
35 namespace v8 {
36 namespace internal {
37 
38 class NodeVisitor;
39 class RegExpCompiler;
40 class RegExpMacroAssembler;
41 class RegExpNode;
42 class RegExpTree;
43 class BoyerMooreLookahead;
44 
45 class RegExpImpl {
46  public:
47  // Whether V8 is compiled with native regexp support or not.
48  static bool UsesNativeRegExp() {
49 #ifdef V8_INTERPRETED_REGEXP
50  return false;
51 #else
52  return true;
53 #endif
54  }
55 
56  // Creates a regular expression literal in the old space.
57  // This function calls the garbage collector if necessary.
59  Handle<String> pattern,
61  bool* has_pending_exception);
62 
63  // Returns a string representation of a regular expression.
64  // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
65  // This function calls the garbage collector if necessary.
67 
68  // Parses the RegExp pattern and prepares the JSRegExp object with
69  // generic data and choice of implementation - as well as what
70  // the implementation wants to store in the data field.
71  // Returns false if compilation fails.
73  Handle<String> pattern,
75 
76  // See ECMA-262 section 15.10.6.2.
77  // This function calls the garbage collector if necessary.
78  static Handle<Object> Exec(Handle<JSRegExp> regexp,
79  Handle<String> subject,
80  int index,
81  Handle<JSArray> lastMatchInfo);
82 
83  // Prepares a JSRegExp object with Irregexp-specific data.
84  static void IrregexpInitialize(Handle<JSRegExp> re,
85  Handle<String> pattern,
87  int capture_register_count);
88 
89 
90  static void AtomCompile(Handle<JSRegExp> re,
91  Handle<String> pattern,
93  Handle<String> match_pattern);
94 
95 
96  static int AtomExecRaw(Handle<JSRegExp> regexp,
97  Handle<String> subject,
98  int index,
99  int32_t* output,
100  int output_size);
101 
102 
104  Handle<String> subject,
105  int index,
106  Handle<JSArray> lastMatchInfo);
107 
109 
110  // Prepare a RegExp for being executed one or more times (using
111  // IrregexpExecOnce) on the subject.
112  // This ensures that the regexp is compiled for the subject, and that
113  // the subject is flat.
114  // Returns the number of integer spaces required by IrregexpExecOnce
115  // as its "registers" argument. If the regexp cannot be compiled,
116  // an exception is set as pending, and this function returns negative.
117  static int IrregexpPrepare(Handle<JSRegExp> regexp,
118  Handle<String> subject);
119 
120  // Execute a regular expression on the subject, starting from index.
121  // If matching succeeds, return the number of matches. This can be larger
122  // than one in the case of global regular expressions.
123  // The captures and subcaptures are stored into the registers vector.
124  // If matching fails, returns RE_FAILURE.
125  // If execution fails, sets a pending exception and returns RE_EXCEPTION.
126  static int IrregexpExecRaw(Handle<JSRegExp> regexp,
127  Handle<String> subject,
128  int index,
129  int32_t* output,
130  int output_size);
131 
132  // Execute an Irregexp bytecode pattern.
133  // On a successful match, the result is a JSArray containing
134  // captured positions. On a failure, the result is the null value.
135  // Returns an empty handle in case of an exception.
137  Handle<String> subject,
138  int index,
139  Handle<JSArray> lastMatchInfo);
140 
141  // Set last match info. If match is NULL, then setting captures is omitted.
142  static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
143  Handle<String> subject,
144  int capture_count,
145  int32_t* match);
146 
147 
148  class GlobalCache {
149  public:
151  Handle<String> subject,
152  bool is_global,
153  Isolate* isolate);
154 
155  INLINE(~GlobalCache());
156 
157  // Fetch the next entry in the cache for global regexp match results.
158  // This does not set the last match info. Upon failure, NULL is returned.
159  // The cause can be checked with Result(). The previous
160  // result is still in available in memory when a failure happens.
161  INLINE(int32_t* FetchNext());
162 
163  INLINE(int32_t* LastSuccessfulMatch());
164 
165  INLINE(bool HasException()) { return num_matches_ < 0; }
166 
167  private:
168  int num_matches_;
169  int max_matches_;
170  int current_match_index_;
171  int registers_per_match_;
172  // Pointer to the last set of captures.
173  int32_t* register_array_;
174  int register_array_size_;
175  Handle<JSRegExp> regexp_;
176  Handle<String> subject_;
177  };
178 
179 
180  // Array index in the lastMatchInfo array.
181  static const int kLastCaptureCount = 0;
182  static const int kLastSubject = 1;
183  static const int kLastInput = 2;
184  static const int kFirstCapture = 3;
185  static const int kLastMatchOverhead = 3;
186 
187  // Direct offset into the lastMatchInfo array.
188  static const int kLastCaptureCountOffset =
190  static const int kLastSubjectOffset =
192  static const int kLastInputOffset =
194  static const int kFirstCaptureOffset =
196 
197  // Used to access the lastMatchInfo array.
198  static int GetCapture(FixedArray* array, int index) {
199  return Smi::cast(array->get(index + kFirstCapture))->value();
200  }
201 
202  static void SetLastCaptureCount(FixedArray* array, int to) {
203  array->set(kLastCaptureCount, Smi::FromInt(to));
204  }
205 
206  static void SetLastSubject(FixedArray* array, String* to) {
207  array->set(kLastSubject, to);
208  }
209 
210  static void SetLastInput(FixedArray* array, String* to) {
211  array->set(kLastInput, to);
212  }
213 
214  static void SetCapture(FixedArray* array, int index, int to) {
215  array->set(index + kFirstCapture, Smi::FromInt(to));
216  }
217 
218  static int GetLastCaptureCount(FixedArray* array) {
219  return Smi::cast(array->get(kLastCaptureCount))->value();
220  }
221 
222  // For acting on the JSRegExp data FixedArray.
223  static int IrregexpMaxRegisterCount(FixedArray* re);
224  static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
225  static int IrregexpNumberOfCaptures(FixedArray* re);
226  static int IrregexpNumberOfRegisters(FixedArray* re);
227  static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
228  static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
229 
230  // Limit the space regexps take up on the heap. In order to limit this we
231  // would like to keep track of the amount of regexp code on the heap. This
232  // is not tracked, however. As a conservative approximation we track the
233  // total regexp code compiled including code that has subsequently been freed
234  // and the total executable memory at any point.
235  static const int kRegExpExecutableMemoryLimit = 16 * MB;
236  static const int kRegWxpCompiledLimit = 1 * MB;
237 
238  private:
239  static bool CompileIrregexp(
240  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
241  static inline bool EnsureCompiledIrregexp(
242  Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
243 };
244 
245 
246 // Represents the location of one element relative to the intersection of
247 // two sets. Corresponds to the four areas of a Venn diagram.
253 };
254 
255 
256 // Represents code units in the range from from_ to to_, both ends are
257 // inclusive.
259  public:
260  CharacterRange() : from_(0), to_(0) { }
261  // For compatibility with the CHECK_OK macro
262  CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
263  CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
264  static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
265  Zone* zone);
267  static inline CharacterRange Singleton(uc16 value) {
268  return CharacterRange(value, value);
269  }
270  static inline CharacterRange Range(uc16 from, uc16 to) {
271  ASSERT(from <= to);
272  return CharacterRange(from, to);
273  }
274  static inline CharacterRange Everything() {
275  return CharacterRange(0, 0xFFFF);
276  }
277  bool Contains(uc16 i) { return from_ <= i && i <= to_; }
278  uc16 from() const { return from_; }
279  void set_from(uc16 value) { from_ = value; }
280  uc16 to() const { return to_; }
281  void set_to(uc16 value) { to_ = value; }
282  bool is_valid() { return from_ <= to_; }
283  bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
284  bool IsSingleton() { return (from_ == to_); }
285  void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii,
286  Zone* zone);
287  static void Split(ZoneList<CharacterRange>* base,
288  Vector<const int> overlay,
289  ZoneList<CharacterRange>** included,
290  ZoneList<CharacterRange>** excluded,
291  Zone* zone);
292  // Whether a range list is in canonical form: Ranges ordered by from value,
293  // and ranges non-overlapping and non-adjacent.
294  static bool IsCanonical(ZoneList<CharacterRange>* ranges);
295  // Convert range list to canonical form. The characters covered by the ranges
296  // will still be the same, but no character is in more than one range, and
297  // adjacent ranges are merged. The resulting list may be shorter than the
298  // original, but cannot be longer.
299  static void Canonicalize(ZoneList<CharacterRange>* ranges);
300  // Negate the contents of a character range in canonical form.
301  static void Negate(ZoneList<CharacterRange>* src,
303  Zone* zone);
304  static const int kStartMarker = (1 << 24);
305  static const int kPayloadMask = (1 << 24) - 1;
306 
307  private:
308  uc16 from_;
309  uc16 to_;
310 };
311 
312 
313 // A set of unsigned integers that behaves especially well on small
314 // integers (< 32). May do zone-allocation.
315 class OutSet: public ZoneObject {
316  public:
317  OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
318  OutSet* Extend(unsigned value, Zone* zone);
319  bool Get(unsigned value);
320  static const unsigned kFirstLimit = 32;
321 
322  private:
323  // Destructively set a value in this set. In most cases you want
324  // to use Extend instead to ensure that only one instance exists
325  // that contains the same values.
326  void Set(unsigned value, Zone* zone);
327 
328  // The successors are a list of sets that contain the same values
329  // as this set and the one more value that is not present in this
330  // set.
331  ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
332 
333  OutSet(uint32_t first, ZoneList<unsigned>* remaining)
334  : first_(first), remaining_(remaining), successors_(NULL) { }
335  uint32_t first_;
336  ZoneList<unsigned>* remaining_;
337  ZoneList<OutSet*>* successors_;
338  friend class Trace;
339 };
340 
341 
342 // A mapping from integers, specified as ranges, to a set of integers.
343 // Used for mapping character ranges to choices.
344 class DispatchTable : public ZoneObject {
345  public:
346  explicit DispatchTable(Zone* zone) : tree_(zone) { }
347 
348  class Entry {
349  public:
350  Entry() : from_(0), to_(0), out_set_(NULL) { }
352  : from_(from), to_(to), out_set_(out_set) { }
353  uc16 from() { return from_; }
354  uc16 to() { return to_; }
355  void set_to(uc16 value) { to_ = value; }
356  void AddValue(int value, Zone* zone) {
357  out_set_ = out_set_->Extend(value, zone);
358  }
359  OutSet* out_set() { return out_set_; }
360  private:
361  uc16 from_;
362  uc16 to_;
363  OutSet* out_set_;
364  };
365 
366  class Config {
367  public:
368  typedef uc16 Key;
369  typedef Entry Value;
370  static const uc16 kNoKey;
371  static const Entry NoValue() { return Value(); }
372  static inline int Compare(uc16 a, uc16 b) {
373  if (a == b)
374  return 0;
375  else if (a < b)
376  return -1;
377  else
378  return 1;
379  }
380  };
381 
382  void AddRange(CharacterRange range, int value, Zone* zone);
383  OutSet* Get(uc16 value);
384  void Dump();
385 
386  template <typename Callback>
387  void ForEach(Callback* callback) {
388  return tree()->ForEach(callback);
389  }
390 
391  private:
392  // There can't be a static empty set since it allocates its
393  // successors in a zone and caches them.
394  OutSet* empty() { return &empty_; }
395  OutSet empty_;
396  ZoneSplayTree<Config>* tree() { return &tree_; }
397  ZoneSplayTree<Config> tree_;
398 };
399 
400 
401 #define FOR_EACH_NODE_TYPE(VISIT) \
402  VISIT(End) \
403  VISIT(Action) \
404  VISIT(Choice) \
405  VISIT(BackReference) \
406  VISIT(Assertion) \
407  VISIT(Text)
408 
409 
410 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
411  VISIT(Disjunction) \
412  VISIT(Alternative) \
413  VISIT(Assertion) \
414  VISIT(CharacterClass) \
415  VISIT(Atom) \
416  VISIT(Quantifier) \
417  VISIT(Capture) \
418  VISIT(Lookahead) \
419  VISIT(BackReference) \
420  VISIT(Empty) \
421  VISIT(Text)
422 
423 
424 #define FORWARD_DECLARE(Name) class RegExp##Name;
426 #undef FORWARD_DECLARE
427 
428 
429 class TextElement V8_FINAL BASE_EMBEDDED {
430  public:
431  enum TextType {
433  CHAR_CLASS
434  };
435 
436  static TextElement Atom(RegExpAtom* atom);
437  static TextElement CharClass(RegExpCharacterClass* char_class);
438 
439  int cp_offset() const { return cp_offset_; }
440  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
441  int length() const;
442 
443  TextType text_type() const { return text_type_; }
444 
445  RegExpTree* tree() const { return tree_; }
446 
447  RegExpAtom* atom() const {
448  ASSERT(text_type() == ATOM);
449  return reinterpret_cast<RegExpAtom*>(tree());
450  }
451 
452  RegExpCharacterClass* char_class() const {
453  ASSERT(text_type() == CHAR_CLASS);
454  return reinterpret_cast<RegExpCharacterClass*>(tree());
455  }
456 
457  private:
458  TextElement(TextType text_type, RegExpTree* tree)
459  : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
460 
461  int cp_offset_;
462  TextType text_type_;
463  RegExpTree* tree_;
464 };
465 
466 
467 class Trace;
468 
469 
470 struct NodeInfo {
477  at_end(false),
478  visited(false),
480 
481  // Returns true if the interests and assumptions of this node
482  // matches the given one.
483  bool Matches(NodeInfo* that) {
484  return (at_end == that->at_end) &&
488  }
489 
490  // Updates the interests of this node given the interests of the
491  // node preceding it.
493  at_end |= that->at_end;
497  }
498 
499  bool HasLookbehind() {
500  return follows_word_interest ||
503  }
504 
505  // Sets the interests of this node to include the interests of the
506  // following node.
511  }
512 
514  being_analyzed = false;
515  been_analyzed = false;
516  }
517 
518  bool being_analyzed: 1;
519  bool been_analyzed: 1;
520 
521  // These bits are set of this node has to know what the preceding
522  // character was.
526 
527  bool at_end: 1;
528  bool visited: 1;
530 };
531 
532 
533 // Details of a quick mask-compare check that can look ahead in the
534 // input stream.
536  public:
538  : characters_(0),
539  mask_(0),
540  value_(0),
541  cannot_match_(false) { }
543  : characters_(characters),
544  mask_(0),
545  value_(0),
546  cannot_match_(false) { }
547  bool Rationalize(bool ascii);
548  // Merge in the information from another branch of an alternation.
549  void Merge(QuickCheckDetails* other, int from_index);
550  // Advance the current position by some amount.
551  void Advance(int by, bool ascii);
552  void Clear();
553  bool cannot_match() { return cannot_match_; }
554  void set_cannot_match() { cannot_match_ = true; }
555  struct Position {
560  };
561  int characters() { return characters_; }
562  void set_characters(int characters) { characters_ = characters; }
563  Position* positions(int index) {
564  ASSERT(index >= 0);
565  ASSERT(index < characters_);
566  return positions_ + index;
567  }
568  uint32_t mask() { return mask_; }
569  uint32_t value() { return value_; }
570 
571  private:
572  // How many characters do we have quick check information from. This is
573  // the same for all branches of a choice node.
574  int characters_;
575  Position positions_[4];
576  // These values are the condensate of the above array after Rationalize().
577  uint32_t mask_;
578  uint32_t value_;
579  // If set to true, there is no way this quick check can match at all.
580  // E.g., if it requires to be at the start of the input, and isn't.
581  bool cannot_match_;
582 };
583 
584 
586 
587 
588 class RegExpNode: public ZoneObject {
589  public:
590  explicit RegExpNode(Zone* zone)
591  : replacement_(NULL), trace_count_(0), zone_(zone) {
592  bm_info_[0] = bm_info_[1] = NULL;
593  }
594  virtual ~RegExpNode();
595  virtual void Accept(NodeVisitor* visitor) = 0;
596  // Generates a goto to this node or actually generates the code at this point.
597  virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
598  // How many characters must this node consume at a minimum in order to
599  // succeed. If we have found at least 'still_to_find' characters that
600  // must be consumed there is no need to ask any following nodes whether
601  // they are sure to eat any more characters. The not_at_start argument is
602  // used to indicate that we know we are not at the start of the input. In
603  // this case anchored branches will always fail and can be ignored when
604  // determining how many characters are consumed on success.
605  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
606  // Emits some quick code that checks whether the preloaded characters match.
607  // Falls through on certain failure, jumps to the label on possible success.
608  // If the node cannot make a quick check it does nothing and returns false.
609  bool EmitQuickCheck(RegExpCompiler* compiler,
610  Trace* trace,
611  bool preload_has_checked_bounds,
612  Label* on_possible_success,
613  QuickCheckDetails* details_return,
614  bool fall_through_on_failure);
615  // For a given number of characters this returns a mask and a value. The
616  // next n characters are anded with the mask and compared with the value.
617  // A comparison failure indicates the node cannot match the next n characters.
618  // A comparison success indicates the node may match.
619  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
620  RegExpCompiler* compiler,
621  int characters_filled_in,
622  bool not_at_start) = 0;
623  static const int kNodeIsTooComplexForGreedyLoops = -1;
625  // Only returns the successor for a text node of length 1 that matches any
626  // character and that has no guards on it.
628  RegExpCompiler* compiler) {
629  return NULL;
630  }
631 
632  // Collects information on the possible code units (mod 128) that can match if
633  // we look forward. This is used for a Boyer-Moore-like string searching
634  // implementation. TODO(erikcorry): This should share more code with
635  // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
636  // the number of nodes we are willing to look at in order to create this data.
637  static const int kRecursionBudget = 200;
638  virtual void FillInBMInfo(int offset,
639  int budget,
641  bool not_at_start) {
642  UNREACHABLE();
643  }
644 
645  // If we know that the input is ASCII then there are some nodes that can
646  // never match. This method returns a node that can be substituted for
647  // itself, or NULL if the node can never match.
648  virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; }
649  // Helper for FilterASCII.
651  ASSERT(info()->replacement_calculated);
652  return replacement_;
653  }
655  info()->replacement_calculated = true;
657  return replacement; // For convenience.
658  }
659 
660  // We want to avoid recalculating the lookahead info, so we store it on the
661  // node. Only info that is for this node is stored. We can tell that the
662  // info is for this node when offset == 0, so the information is calculated
663  // relative to this node.
664  void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
665  if (offset == 0) set_bm_info(not_at_start, bm);
666  }
667 
668  Label* label() { return &label_; }
669  // If non-generic code is generated for a node (i.e. the node is not at the
670  // start of the trace) then it cannot be reused. This variable sets a limit
671  // on how often we allow that to happen before we insist on starting a new
672  // trace and generating generic code for a node that can be reused by flushing
673  // the deferred actions in the current trace and generating a goto.
674  static const int kMaxCopiesCodeGenerated = 10;
675 
676  NodeInfo* info() { return &info_; }
677 
678  BoyerMooreLookahead* bm_info(bool not_at_start) {
679  return bm_info_[not_at_start ? 1 : 0];
680  }
681 
682  Zone* zone() const { return zone_; }
683 
684  protected:
687 
688  LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
689 
690  void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
691  bm_info_[not_at_start ? 1 : 0] = bm;
692  }
693 
694  private:
695  static const int kFirstCharBudget = 10;
696  Label label_;
697  NodeInfo info_;
698  // This variable keeps track of how many times code has been generated for
699  // this node (in different traces). We don't keep track of where the
700  // generated code is located unless the code is generated at the start of
701  // a trace, in which case it is generic and can be reused by flushing the
702  // deferred operations in the current trace and generating a goto.
703  int trace_count_;
704  BoyerMooreLookahead* bm_info_[2];
705 
706  Zone* zone_;
707 };
708 
709 
710 // A simple closed interval.
711 class Interval {
712  public:
713  Interval() : from_(kNone), to_(kNone) { }
714  Interval(int from, int to) : from_(from), to_(to) { }
716  if (that.from_ == kNone)
717  return *this;
718  else if (from_ == kNone)
719  return that;
720  else
721  return Interval(Min(from_, that.from_), Max(to_, that.to_));
722  }
723  bool Contains(int value) {
724  return (from_ <= value) && (value <= to_);
725  }
726  bool is_empty() { return from_ == kNone; }
727  int from() const { return from_; }
728  int to() const { return to_; }
729  static Interval Empty() { return Interval(); }
730  static const int kNone = -1;
731  private:
732  int from_;
733  int to_;
734 };
735 
736 
737 class SeqRegExpNode: public RegExpNode {
738  public:
740  : RegExpNode(on_success->zone()), on_success_(on_success) { }
741  RegExpNode* on_success() { return on_success_; }
742  void set_on_success(RegExpNode* node) { on_success_ = node; }
743  virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
744  virtual void FillInBMInfo(int offset,
745  int budget,
747  bool not_at_start) {
748  on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start);
749  if (offset == 0) set_bm_info(not_at_start, bm);
750  }
751 
752  protected:
753  RegExpNode* FilterSuccessor(int depth, bool ignore_case);
754 
755  private:
756  RegExpNode* on_success_;
757 };
758 
759 
760 class ActionNode: public SeqRegExpNode {
761  public:
762  enum ActionType {
770  };
771  static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
773  static ActionNode* StorePosition(int reg,
774  bool is_capture,
777  static ActionNode* BeginSubmatch(int stack_pointer_reg,
778  int position_reg,
780  static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
781  int restore_reg,
782  int clear_capture_count,
783  int clear_capture_from,
787  int repetition_limit,
789  virtual void Accept(NodeVisitor* visitor);
790  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
791  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
793  RegExpCompiler* compiler,
794  int filled_in,
795  bool not_at_start) {
797  details, compiler, filled_in, not_at_start);
798  }
799  virtual void FillInBMInfo(int offset,
800  int budget,
802  bool not_at_start);
803  ActionType action_type() { return action_type_; }
804  // TODO(erikcorry): We should allow some action nodes in greedy loops.
806 
807  private:
808  union {
809  struct {
810  int reg;
811  int value;
813  struct {
814  int reg;
816  struct {
817  int reg;
820  struct {
825  } u_submatch;
826  struct {
831  struct {
833  int range_to;
835  } data_;
837  : SeqRegExpNode(on_success),
838  action_type_(action_type) { }
839  ActionType action_type_;
840  friend class DotPrinter;
841 };
842 
843 
844 class TextNode: public SeqRegExpNode {
845  public:
848  : SeqRegExpNode(on_success),
849  elms_(elms) { }
850  TextNode(RegExpCharacterClass* that,
852  : SeqRegExpNode(on_success),
853  elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
854  elms_->Add(TextElement::CharClass(that), zone());
855  }
856  virtual void Accept(NodeVisitor* visitor);
857  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
858  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
859  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
860  RegExpCompiler* compiler,
861  int characters_filled_in,
862  bool not_at_start);
863  ZoneList<TextElement>* elements() { return elms_; }
864  void MakeCaseIndependent(bool is_ascii);
865  virtual int GreedyLoopTextLength();
867  RegExpCompiler* compiler);
868  virtual void FillInBMInfo(int offset,
869  int budget,
871  bool not_at_start);
872  void CalculateOffsets();
873  virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
874 
875  private:
876  enum TextEmitPassType {
877  NON_ASCII_MATCH, // Check for characters that can't match.
878  SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
879  NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
880  CASE_CHARACTER_MATCH, // Case-independent single character check.
881  CHARACTER_CLASS_MATCH // Character class.
882  };
883  static bool SkipPass(int pass, bool ignore_case);
884  static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
885  static const int kLastPass = CHARACTER_CLASS_MATCH;
886  void TextEmitPass(RegExpCompiler* compiler,
887  TextEmitPassType pass,
888  bool preloaded,
889  Trace* trace,
890  bool first_element_checked,
891  int* checked_up_to);
892  int Length();
893  ZoneList<TextElement>* elms_;
894 };
895 
896 
898  public:
905  };
907  return new(on_success->zone()) AssertionNode(AT_END, on_success);
908  }
910  return new(on_success->zone()) AssertionNode(AT_START, on_success);
911  }
913  return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
914  }
916  return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
917  }
919  return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
920  }
921  virtual void Accept(NodeVisitor* visitor);
922  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
923  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
924  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
925  RegExpCompiler* compiler,
926  int filled_in,
927  bool not_at_start);
928  virtual void FillInBMInfo(int offset,
929  int budget,
931  bool not_at_start);
932  AssertionType assertion_type() { return assertion_type_; }
933 
934  private:
935  void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
936  enum IfPrevious { kIsNonWord, kIsWord };
937  void BacktrackIfPrevious(RegExpCompiler* compiler,
938  Trace* trace,
939  IfPrevious backtrack_if_previous);
940  AssertionNode(AssertionType t, RegExpNode* on_success)
941  : SeqRegExpNode(on_success), assertion_type_(t) { }
942  AssertionType assertion_type_;
943 };
944 
945 
947  public:
948  BackReferenceNode(int start_reg,
949  int end_reg,
951  : SeqRegExpNode(on_success),
952  start_reg_(start_reg),
953  end_reg_(end_reg) { }
954  virtual void Accept(NodeVisitor* visitor);
955  int start_register() { return start_reg_; }
956  int end_register() { return end_reg_; }
957  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
958  virtual int EatsAtLeast(int still_to_find,
959  int recursion_depth,
960  bool not_at_start);
962  RegExpCompiler* compiler,
963  int characters_filled_in,
964  bool not_at_start) {
965  return;
966  }
967  virtual void FillInBMInfo(int offset,
968  int budget,
970  bool not_at_start);
971 
972  private:
973  int start_reg_;
974  int end_reg_;
975 };
976 
977 
978 class EndNode: public RegExpNode {
979  public:
981  explicit EndNode(Action action, Zone* zone)
982  : RegExpNode(zone), action_(action) { }
983  virtual void Accept(NodeVisitor* visitor);
984  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
985  virtual int EatsAtLeast(int still_to_find,
986  int recursion_depth,
987  bool not_at_start) { return 0; }
989  RegExpCompiler* compiler,
990  int characters_filled_in,
991  bool not_at_start) {
992  // Returning 0 from EatsAtLeast should ensure we never get here.
993  UNREACHABLE();
994  }
995  virtual void FillInBMInfo(int offset,
996  int budget,
998  bool not_at_start) {
999  // Returning 0 from EatsAtLeast should ensure we never get here.
1000  UNREACHABLE();
1001  }
1002 
1003  private:
1004  Action action_;
1005 };
1006 
1007 
1009  public:
1010  NegativeSubmatchSuccess(int stack_pointer_reg,
1011  int position_reg,
1012  int clear_capture_count,
1013  int clear_capture_start,
1014  Zone* zone)
1016  stack_pointer_register_(stack_pointer_reg),
1017  current_position_register_(position_reg),
1018  clear_capture_count_(clear_capture_count),
1019  clear_capture_start_(clear_capture_start) { }
1020  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1021 
1022  private:
1023  int stack_pointer_register_;
1024  int current_position_register_;
1025  int clear_capture_count_;
1026  int clear_capture_start_;
1027 };
1028 
1029 
1030 class Guard: public ZoneObject {
1031  public:
1032  enum Relation { LT, GEQ };
1034  : reg_(reg),
1035  op_(op),
1036  value_(value) { }
1037  int reg() { return reg_; }
1038  Relation op() { return op_; }
1039  int value() { return value_; }
1040 
1041  private:
1042  int reg_;
1043  Relation op_;
1044  int value_;
1045 };
1046 
1047 
1049  public:
1050  explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
1051  void AddGuard(Guard* guard, Zone* zone);
1052  RegExpNode* node() { return node_; }
1053  void set_node(RegExpNode* node) { node_ = node; }
1054  ZoneList<Guard*>* guards() { return guards_; }
1055 
1056  private:
1057  RegExpNode* node_;
1058  ZoneList<Guard*>* guards_;
1059 };
1060 
1061 
1062 class AlternativeGeneration;
1063 
1064 
1065 class ChoiceNode: public RegExpNode {
1066  public:
1068  : RegExpNode(zone),
1069  alternatives_(new(zone)
1070  ZoneList<GuardedAlternative>(expected_size, zone)),
1071  table_(NULL),
1072  not_at_start_(false),
1073  being_calculated_(false) { }
1074  virtual void Accept(NodeVisitor* visitor);
1076  alternatives()->Add(node, zone());
1077  }
1079  DispatchTable* GetTable(bool ignore_case);
1080  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1081  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
1082  int EatsAtLeastHelper(int still_to_find,
1083  int budget,
1084  RegExpNode* ignore_this_node,
1085  bool not_at_start);
1086  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1087  RegExpCompiler* compiler,
1088  int characters_filled_in,
1089  bool not_at_start);
1090  virtual void FillInBMInfo(int offset,
1091  int budget,
1092  BoyerMooreLookahead* bm,
1093  bool not_at_start);
1094 
1095  bool being_calculated() { return being_calculated_; }
1096  bool not_at_start() { return not_at_start_; }
1097  void set_not_at_start() { not_at_start_ = true; }
1098  void set_being_calculated(bool b) { being_calculated_ = b; }
1099  virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1100  virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
1101 
1102  protected:
1105 
1106  private:
1108  friend class Analysis;
1109  void GenerateGuard(RegExpMacroAssembler* macro_assembler,
1110  Guard* guard,
1111  Trace* trace);
1112  int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
1113  void EmitOutOfLineContinuation(RegExpCompiler* compiler,
1114  Trace* trace,
1115  GuardedAlternative alternative,
1116  AlternativeGeneration* alt_gen,
1117  int preload_characters,
1118  bool next_expects_preload);
1119  DispatchTable* table_;
1120  // If true, this node is never checked at the start of the input.
1121  // Allows a new trace to start with at_start() set to false.
1122  bool not_at_start_;
1123  bool being_calculated_;
1124 };
1125 
1126 
1128  public:
1130  GuardedAlternative then_do_this,
1131  Zone* zone)
1132  : ChoiceNode(2, zone) {
1133  AddAlternative(this_must_fail);
1134  AddAlternative(then_do_this);
1135  }
1136  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
1137  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1138  RegExpCompiler* compiler,
1139  int characters_filled_in,
1140  bool not_at_start);
1141  virtual void FillInBMInfo(int offset,
1142  int budget,
1143  BoyerMooreLookahead* bm,
1144  bool not_at_start) {
1145  alternatives_->at(1).node()->FillInBMInfo(
1146  offset, budget - 1, bm, not_at_start);
1147  if (offset == 0) set_bm_info(not_at_start, bm);
1148  }
1149  // For a negative lookahead we don't emit the quick check for the
1150  // alternative that is expected to fail. This is because quick check code
1151  // starts by loading enough characters for the alternative that takes fewest
1152  // characters, but on a negative lookahead the negative branch did not take
1153  // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1154  virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1155  virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
1156 };
1157 
1158 
1160  public:
1162  : ChoiceNode(2, zone),
1163  loop_node_(NULL),
1164  continue_node_(NULL),
1165  body_can_be_zero_length_(body_can_be_zero_length) { }
1168  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1169  virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
1170  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1171  RegExpCompiler* compiler,
1172  int characters_filled_in,
1173  bool not_at_start);
1174  virtual void FillInBMInfo(int offset,
1175  int budget,
1176  BoyerMooreLookahead* bm,
1177  bool not_at_start);
1178  RegExpNode* loop_node() { return loop_node_; }
1179  RegExpNode* continue_node() { return continue_node_; }
1180  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1181  virtual void Accept(NodeVisitor* visitor);
1182  virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
1183 
1184  private:
1185  // AddAlternative is made private for loop nodes because alternatives
1186  // should not be added freely, we need to keep track of which node
1187  // goes back to the node itself.
1188  void AddAlternative(GuardedAlternative node) {
1190  }
1191 
1192  RegExpNode* loop_node_;
1193  RegExpNode* continue_node_;
1194  bool body_can_be_zero_length_;
1195 };
1196 
1197 
1198 // Improve the speed that we scan for an initial point where a non-anchored
1199 // regexp can match by using a Boyer-Moore-like table. This is done by
1200 // identifying non-greedy non-capturing loops in the nodes that eat any
1201 // character one at a time. For example in the middle of the regexp
1202 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
1203 // inserted at the start of any non-anchored regexp.
1204 //
1205 // When we have found such a loop we look ahead in the nodes to find the set of
1206 // characters that can come at given distances. For example for the regexp
1207 // /.?foo/ we know that there are at least 3 characters ahead of us, and the
1208 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1209 // the lookahead info where the set of characters is reasonably constrained. In
1210 // our example this is from index 1 to 2 (0 is not constrained). We can now
1211 // look 3 characters ahead and if we don't find one of [f, o] (the union of
1212 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1213 //
1214 // For Unicode input strings we do the same, but modulo 128.
1215 //
1216 // We also look at the first string fed to the regexp and use that to get a hint
1217 // of the character frequencies in the inputs. This affects the assessment of
1218 // whether the set of characters is 'reasonably constrained'.
1219 //
1220 // We also have another lookahead mechanism (called quick check in the code),
1221 // which uses a wide load of multiple characters followed by a mask and compare
1222 // to determine whether a match is possible at this point.
1224  kNotYet = 0,
1227  kLatticeUnknown = 3 // Can also mean both in and out.
1228 };
1229 
1230 
1232  return static_cast<ContainedInLattice>(a | b);
1233 }
1234 
1235 
1237  const int* ranges,
1238  int ranges_size,
1239  Interval new_range);
1240 
1241 
1243  public:
1245  : map_(new(zone) ZoneList<bool>(kMapSize, zone)),
1246  map_count_(0),
1247  w_(kNotYet),
1248  s_(kNotYet),
1249  d_(kNotYet),
1250  surrogate_(kNotYet) {
1251  for (int i = 0; i < kMapSize; i++) {
1252  map_->Add(false, zone);
1253  }
1254  }
1255 
1256  bool& at(int i) { return map_->at(i); }
1257 
1258  static const int kMapSize = 128;
1259  static const int kMask = kMapSize - 1;
1260 
1261  int map_count() const { return map_count_; }
1262 
1263  void Set(int character);
1264  void SetInterval(const Interval& interval);
1265  void SetAll();
1266  bool is_non_word() { return w_ == kLatticeOut; }
1267  bool is_word() { return w_ == kLatticeIn; }
1268 
1269  private:
1270  ZoneList<bool>* map_;
1271  int map_count_; // Number of set bits in the map.
1272  ContainedInLattice w_; // The \w character class.
1273  ContainedInLattice s_; // The \s character class.
1274  ContainedInLattice d_; // The \d character class.
1275  ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
1276 };
1277 
1278 
1280  public:
1282 
1283  int length() { return length_; }
1284  int max_char() { return max_char_; }
1285  RegExpCompiler* compiler() { return compiler_; }
1286 
1287  int Count(int map_number) {
1288  return bitmaps_->at(map_number)->map_count();
1289  }
1290 
1291  BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
1292 
1293  void Set(int map_number, int character) {
1294  if (character > max_char_) return;
1295  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1296  info->Set(character);
1297  }
1298 
1299  void SetInterval(int map_number, const Interval& interval) {
1300  if (interval.from() > max_char_) return;
1301  BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1302  if (interval.to() > max_char_) {
1303  info->SetInterval(Interval(interval.from(), max_char_));
1304  } else {
1305  info->SetInterval(interval);
1306  }
1307  }
1308 
1309  void SetAll(int map_number) {
1310  bitmaps_->at(map_number)->SetAll();
1311  }
1312 
1313  void SetRest(int from_map) {
1314  for (int i = from_map; i < length_; i++) SetAll(i);
1315  }
1317 
1318  private:
1319  // This is the value obtained by EatsAtLeast. If we do not have at least this
1320  // many characters left in the sample string then the match is bound to fail.
1321  // Therefore it is OK to read a character this far ahead of the current match
1322  // point.
1323  int length_;
1324  RegExpCompiler* compiler_;
1325  // 0x7f for ASCII, 0xffff for UTF-16.
1326  int max_char_;
1328 
1329  int GetSkipTable(int min_lookahead,
1330  int max_lookahead,
1331  Handle<ByteArray> boolean_skip_table);
1332  bool FindWorthwhileInterval(int* from, int* to);
1333  int FindBestInterval(
1334  int max_number_of_chars, int old_biggest_points, int* from, int* to);
1335 };
1336 
1337 
1338 // There are many ways to generate code for a node. This class encapsulates
1339 // the current way we should be generating. In other words it encapsulates
1340 // the current state of the code generator. The effect of this is that we
1341 // generate code for paths that the matcher can take through the regular
1342 // expression. A given node in the regexp can be code-generated several times
1343 // as it can be part of several traces. For example for the regexp:
1344 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
1345 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
1346 // to match foo is generated only once (the traces have a common prefix). The
1347 // code to store the capture is deferred and generated (twice) after the places
1348 // where baz has been matched.
1349 class Trace {
1350  public:
1351  // A value for a property that is either known to be true, know to be false,
1352  // or not known.
1353  enum TriBool {
1355  };
1356 
1358  public:
1360  : action_type_(action_type), reg_(reg), next_(NULL) { }
1361  DeferredAction* next() { return next_; }
1362  bool Mentions(int reg);
1363  int reg() { return reg_; }
1364  ActionNode::ActionType action_type() { return action_type_; }
1365  private:
1366  ActionNode::ActionType action_type_;
1367  int reg_;
1368  DeferredAction* next_;
1369  friend class Trace;
1370  };
1371 
1373  public:
1375  : DeferredAction(ActionNode::STORE_POSITION, reg),
1376  cp_offset_(trace->cp_offset()),
1377  is_capture_(is_capture) { }
1378  int cp_offset() { return cp_offset_; }
1379  bool is_capture() { return is_capture_; }
1380  private:
1381  int cp_offset_;
1382  bool is_capture_;
1383  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
1384  };
1385 
1387  public:
1389  : DeferredAction(ActionNode::SET_REGISTER, reg),
1390  value_(value) { }
1391  int value() { return value_; }
1392  private:
1393  int value_;
1394  };
1395 
1397  public:
1399  : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
1400  range_(range) { }
1401  Interval range() { return range_; }
1402  private:
1403  Interval range_;
1404  };
1405 
1407  public:
1409  : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
1410  };
1411 
1413  : cp_offset_(0),
1414  actions_(NULL),
1415  backtrack_(NULL),
1416  stop_node_(NULL),
1417  loop_label_(NULL),
1418  characters_preloaded_(0),
1419  bound_checked_up_to_(0),
1420  flush_budget_(100),
1421  at_start_(UNKNOWN) { }
1422 
1423  // End the trace. This involves flushing the deferred actions in the trace
1424  // and pushing a backtrack location onto the backtrack stack. Once this is
1425  // done we can start a new trace or go to one that has already been
1426  // generated.
1427  void Flush(RegExpCompiler* compiler, RegExpNode* successor);
1428  int cp_offset() { return cp_offset_; }
1429  DeferredAction* actions() { return actions_; }
1430  // A trivial trace is one that has no deferred actions or other state that
1431  // affects the assumptions used when generating code. There is no recorded
1432  // backtrack location in a trivial trace, so with a trivial trace we will
1433  // generate code that, on a failure to match, gets the backtrack location
1434  // from the backtrack stack rather than using a direct jump instruction. We
1435  // always start code generation with a trivial trace and non-trivial traces
1436  // are created as we emit code for nodes or add to the list of deferred
1437  // actions in the trace. The location of the code generated for a node using
1438  // a trivial trace is recorded in a label in the node so that gotos can be
1439  // generated to that code.
1440  bool is_trivial() {
1441  return backtrack_ == NULL &&
1442  actions_ == NULL &&
1443  cp_offset_ == 0 &&
1444  characters_preloaded_ == 0 &&
1445  bound_checked_up_to_ == 0 &&
1446  quick_check_performed_.characters() == 0 &&
1447  at_start_ == UNKNOWN;
1448  }
1449  TriBool at_start() { return at_start_; }
1450  void set_at_start(bool at_start) {
1451  at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
1452  }
1453  Label* backtrack() { return backtrack_; }
1454  Label* loop_label() { return loop_label_; }
1455  RegExpNode* stop_node() { return stop_node_; }
1456  int characters_preloaded() { return characters_preloaded_; }
1457  int bound_checked_up_to() { return bound_checked_up_to_; }
1458  int flush_budget() { return flush_budget_; }
1459  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1460  bool mentions_reg(int reg);
1461  // Returns true if a deferred position store exists to the specified
1462  // register and stores the offset in the out-parameter. Otherwise
1463  // returns false.
1464  bool GetStoredPosition(int reg, int* cp_offset);
1465  // These set methods and AdvanceCurrentPositionInTrace should be used only on
1466  // new traces - the intention is that traces are immutable after creation.
1467  void add_action(DeferredAction* new_action) {
1468  ASSERT(new_action->next_ == NULL);
1469  new_action->next_ = actions_;
1470  actions_ = new_action;
1471  }
1472  void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1473  void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1474  void set_loop_label(Label* label) { loop_label_ = label; }
1475  void set_characters_preloaded(int count) { characters_preloaded_ = count; }
1476  void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
1477  void set_flush_budget(int to) { flush_budget_ = to; }
1479  quick_check_performed_ = *d;
1480  }
1482  void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
1483 
1484  private:
1485  int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
1486  void PerformDeferredActions(RegExpMacroAssembler* macro,
1487  int max_register,
1488  OutSet& affected_registers,
1489  OutSet* registers_to_pop,
1490  OutSet* registers_to_clear,
1491  Zone* zone);
1492  void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1493  int max_register,
1494  OutSet& registers_to_pop,
1495  OutSet& registers_to_clear);
1496  int cp_offset_;
1497  DeferredAction* actions_;
1498  Label* backtrack_;
1499  RegExpNode* stop_node_;
1500  Label* loop_label_;
1501  int characters_preloaded_;
1502  int bound_checked_up_to_;
1503  QuickCheckDetails quick_check_performed_;
1504  int flush_budget_;
1505  TriBool at_start_;
1506 };
1507 
1508 
1510  public:
1511  virtual ~NodeVisitor() { }
1512 #define DECLARE_VISIT(Type) \
1513  virtual void Visit##Type(Type##Node* that) = 0;
1515 #undef DECLARE_VISIT
1516  virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
1517 };
1518 
1519 
1520 // Node visitor used to add the start set of the alternatives to the
1521 // dispatch table of a choice node.
1523  public:
1525  Zone* zone)
1526  : table_(table),
1527  choice_index_(-1),
1528  ignore_case_(ignore_case),
1529  zone_(zone) { }
1530 
1531  void BuildTable(ChoiceNode* node);
1532 
1533  void AddRange(CharacterRange range) {
1534  table()->AddRange(range, choice_index_, zone_);
1535  }
1536 
1537  void AddInverse(ZoneList<CharacterRange>* ranges);
1538 
1539 #define DECLARE_VISIT(Type) \
1540  virtual void Visit##Type(Type##Node* that);
1542 #undef DECLARE_VISIT
1543 
1544  DispatchTable* table() { return table_; }
1545  void set_choice_index(int value) { choice_index_ = value; }
1546 
1547  protected:
1552 };
1553 
1554 
1555 // Assertion propagation moves information about assertions such as
1556 // \b to the affected nodes. For instance, in /.\b./ information must
1557 // be propagated to the first '.' that whatever follows needs to know
1558 // if it matched a word or a non-word, and to the second '.' that it
1559 // has to check if it succeeds a word or non-word. In this case the
1560 // result will be something like:
1561 //
1562 // +-------+ +------------+
1563 // | . | | . |
1564 // +-------+ ---> +------------+
1565 // | word? | | check word |
1566 // +-------+ +------------+
1567 class Analysis: public NodeVisitor {
1568  public:
1569  Analysis(bool ignore_case, bool is_ascii)
1570  : ignore_case_(ignore_case),
1571  is_ascii_(is_ascii),
1572  error_message_(NULL) { }
1573  void EnsureAnalyzed(RegExpNode* node);
1574 
1575 #define DECLARE_VISIT(Type) \
1576  virtual void Visit##Type(Type##Node* that);
1578 #undef DECLARE_VISIT
1579  virtual void VisitLoopChoice(LoopChoiceNode* that);
1580 
1581  bool has_failed() { return error_message_ != NULL; }
1582  const char* error_message() {
1583  ASSERT(error_message_ != NULL);
1584  return error_message_;
1585  }
1586  void fail(const char* error_message) {
1587  error_message_ = error_message;
1588  }
1589 
1590  private:
1591  bool ignore_case_;
1592  bool is_ascii_;
1593  const char* error_message_;
1594 
1595  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
1596 };
1597 
1598 
1601  : tree(NULL),
1602  node(NULL),
1603  simple(true),
1605  capture_count(0) { }
1608  bool simple;
1612 };
1613 
1614 
1615 class RegExpEngine: public AllStatic {
1616  public:
1618  CompilationResult(Isolate* isolate, const char* error_message)
1619  : error_message(error_message),
1620  code(isolate->heap()->the_hole_value()),
1621  num_registers(0) {}
1622  CompilationResult(Object* code, int registers)
1623  : error_message(NULL),
1624  code(code),
1625  num_registers(registers) {}
1626  const char* error_message;
1629  };
1630 
1632  bool ignore_case,
1633  bool global,
1634  bool multiline,
1635  Handle<String> pattern,
1636  Handle<String> sample_subject,
1637  bool is_ascii, Zone* zone);
1638 
1639  static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1640 };
1641 
1642 
1643 } } // namespace v8::internal
1644 
1645 #endif // V8_JSREGEXP_H_
#define FORWARD_DECLARE(Name)
Definition: jsregexp.h:424
void SetInterval(const Interval &interval)
Definition: jsregexp.cc:3629
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
Definition: flags.cc:269
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5675
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.h:648
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5803
Analysis(bool ignore_case, bool is_ascii)
Definition: jsregexp.h:1569
struct v8::internal::ActionNode::@17::@22 u_empty_match_check
RegExpNode * stop_node()
Definition: jsregexp.h:1455
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)
Definition: jsregexp.h:410
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
Definition: jsregexp.cc:3476
virtual void Accept(NodeVisitor *visitor)
RegExpCharacterClass * char_class() const
Definition: jsregexp.h:452
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5247
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1154
static AssertionNode * AtStart(RegExpNode *on_success)
Definition: jsregexp.h:909
void SetInterval(int map_number, const Interval &interval)
Definition: jsregexp.h:1299
static ActionNode * SetRegister(int reg, int val, RegExpNode *on_success)
Definition: jsregexp.cc:1497
static Code * IrregexpNativeCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:511
void set(int index, Object *value)
Definition: objects-inl.h:2147
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths true
Definition: flags.cc:208
RegExpNode * continue_node()
Definition: jsregexp.h:1179
TextNode(RegExpCharacterClass *that, RegExpNode *on_success)
Definition: jsregexp.h:850
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5788
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1439
DeferredCapture(int reg, bool is_capture, Trace *trace)
Definition: jsregexp.h:1374
static ActionNode * BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
Definition: jsregexp.cc:1537
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.cc:2356
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3345
void set_characters(int characters)
Definition: jsregexp.h:562
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.h:627
static Smi * FromInt(int value)
Definition: objects-inl.h:1209
bool Contains(int value)
Definition: jsregexp.h:723
int to() const
Definition: jsregexp.h:728
void set_at_start(bool at_start)
Definition: jsregexp.h:1450
static const int kLastCaptureCount
Definition: jsregexp.h:181
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:744
GlobalCache(Handle< JSRegExp > regexp, Handle< String > subject, bool is_global, Isolate *isolate)
Definition: jsregexp.cc:711
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:995
struct v8::internal::ActionNode::@17::@20 u_position_register
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
Definition: jsregexp.cc:2477
void set_loop_label(Label *label)
Definition: jsregexp.h:1474
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
Definition: jsregexp.cc:1563
T Max(T a, T b)
Definition: utils.h:227
Position * positions(int index)
Definition: jsregexp.h:563
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
Definition: jsregexp.cc:3448
RegExpNode(Zone *zone)
Definition: jsregexp.h:590
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2261
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:1466
static Handle< Object > CreateRegExpLiteral(Handle< JSFunction > constructor, Handle< String > pattern, Handle< String > flags, bool *has_pending_exception)
Definition: jsregexp.cc:69
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:805
void AddFromFollowing(NodeInfo *that)
Definition: jsregexp.h:507
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
Definition: jsregexp.cc:1508
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5586
uc16 to()
Definition: jsregexp.h:354
T & at(int i) const
Definition: list.h:90
static int IrregexpNumberOfCaptures(FixedArray *re)
Definition: jsregexp.cc:496
void set_to(uc16 value)
Definition: jsregexp.h:355
int int32_t
Definition: unicode.cc:47
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2779
RegExpNode * replacement_
Definition: jsregexp.h:686
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5475
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2959
RegExpNode * FilterSuccessor(int depth, bool ignore_case)
Definition: jsregexp.cc:2788
void EnsureAnalyzed(RegExpNode *node)
Definition: jsregexp.cc:5691
void set_flush_budget(int to)
Definition: jsregexp.h:1477
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2972
CompilationResult(Object *code, int registers)
Definition: jsregexp.h:1622
static CharacterRange Everything()
Definition: jsregexp.h:274
DispatchTableConstructor(DispatchTable *table, bool ignore_case, Zone *zone)
Definition: jsregexp.h:1524
#define DECLARE_VISIT(Type)
Definition: jsregexp.h:1575
virtual bool try_to_emit_quick_check_for_alternative(int i)
Definition: jsregexp.h:1099
void AddValue(int value, Zone *zone)
Definition: jsregexp.h:356
bool Get(unsigned value)
Definition: jsregexp.cc:5572
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2366
#define ASSERT(condition)
Definition: checks.h:329
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
Definition: jsregexp.cc:1516
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2316
void set_characters_preloaded(int count)
Definition: jsregexp.h:1475
TextType text_type() const
Definition: jsregexp.h:443
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
Definition: jsregexp.cc:3404
const char * error_message()
Definition: jsregexp.h:1582
void BuildTable(ChoiceNode *node)
Definition: jsregexp.cc:5895
RegExpCompiler * compiler()
Definition: jsregexp.h:1285
Interval(int from, int to)
Definition: jsregexp.h:714
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:961
bool EmitSkipInstructions(RegExpMacroAssembler *masm)
Definition: jsregexp.cc:3782
ZoneList< TextElement > * elements()
Definition: jsregexp.h:863
DeferredSetRegister(int reg, int value)
Definition: jsregexp.h:1388
void SetAll(int map_number)
Definition: jsregexp.h:1309
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2379
BoyerMoorePositionInfo * at(int i)
Definition: jsregexp.h:1291
static Interval Empty()
Definition: jsregexp.h:729
static void SetLastSubject(FixedArray *array, String *to)
Definition: jsregexp.h:206
struct v8::internal::ActionNode::@17::@21 u_submatch
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2867
static void Negate(ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
Definition: jsregexp.cc:5512
static const int kLastSubjectOffset
Definition: jsregexp.h:190
static Handle< Object > AtomExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:342
static Smi * cast(Object *object)
static const int kRegExpExecutableMemoryLimit
Definition: jsregexp.h:235
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
Definition: jsregexp.cc:1548
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5282
static const int kLastCaptureCountOffset
Definition: jsregexp.h:188
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2936
static void SetLastInput(FixedArray *array, String *to)
Definition: jsregexp.h:210
GuardedAlternative(RegExpNode *node)
Definition: jsregexp.h:1050
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:1141
static const int kRegWxpCompiledLimit
Definition: jsregexp.h:236
void Advance(int by, bool ascii)
Definition: jsregexp.cc:2715
void Merge(QuickCheckDetails *other, int from_index)
Definition: jsregexp.cc:2736
Label * loop_label()
Definition: jsregexp.h:1454
#define UNREACHABLE()
Definition: checks.h:52
static int IrregexpPrepare(Handle< JSRegExp > regexp, Handle< String > subject)
Definition: jsregexp.cc:529
RegExpTree * tree() const
Definition: jsregexp.h:445
NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, GuardedAlternative then_do_this, Zone *zone)
Definition: jsregexp.h:1129
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4314
static const int kRecursionBudget
Definition: jsregexp.h:637
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:5822
static const unsigned kFirstLimit
Definition: jsregexp.h:320
void AddAlternative(GuardedAlternative node)
Definition: jsregexp.h:1075
static const int kLastSubject
Definition: jsregexp.h:182
static void SetLastCaptureCount(FixedArray *array, int to)
Definition: jsregexp.h:202
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long expose natives in global object expose freeBuffer extension expose gc extension under the specified name expose externalize string extension number of stack frames to capture disable builtin natives files print name of functions for which code is generated use random jit cookie to mask large constants trace lazy optimization use adaptive optimizations always try to OSR functions trace optimize function deoptimization minimum length for automatic enable preparsing maximum number of optimization attempts before giving up cache prototype transitions trace debugging JSON request response trace out of bounds accesses to external arrays trace_js_array_abuse automatically set the debug break flag when debugger commands are in the queue abort by crashing maximum length of function source code printed in a stack trace max size of the new max size of the old max size of executable always perform global GCs print one trace line following each garbage collection do not print trace line after scavenger collection print statistics of the maximum memory committed for the heap in only print modified registers Don t break for ASM_UNIMPLEMENTED_BREAK macros print stack trace when an illegal exception is thrown randomize hashes to avoid predictable hash Fixed seed to use to hash property Print the time it takes to deserialize the snapshot testing_bool_flag testing_int_flag string flag tmp file in which to serialize heap Print the time it takes to lazily compile hydrogen code stubs concurrent_recompilation concurrent_sweeping Print usage including flags
Definition: flags.cc:665
void fail(const char *error_message)
Definition: jsregexp.h:1586
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:270
RegExpNode * replacement()
Definition: jsregexp.h:650
CharacterRange(uc16 from, uc16 to)
Definition: jsregexp.h:263
Interval Union(Interval that)
Definition: jsregexp.h:715
void set_bound_checked_up_to(int to)
Definition: jsregexp.h:1476
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2885
void set_backtrack(Label *backtrack)
Definition: jsregexp.h:1472
virtual void Accept(NodeVisitor *visitor)
Definition: jsregexp.cc:1584
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2986
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.h:792
ChoiceNode(int expected_size, Zone *zone)
Definition: jsregexp.h:1067
const int kPointerSize
Definition: globals.h:268
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2390
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5367
virtual void Accept(NodeVisitor *visitor)
QuickCheckDetails * quick_check_performed()
Definition: jsregexp.h:1459
static Handle< JSArray > SetLastMatchInfo(Handle< JSArray > last_match_info, Handle< String > subject, int capture_count, int32_t *match)
Definition: jsregexp.cc:688
void AddGuard(Guard *guard, Zone *zone)
Definition: jsregexp.cc:1490
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2329
static int GetCapture(FixedArray *array, int index)
Definition: jsregexp.h:198
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:690
LoopChoiceNode(bool body_can_be_zero_length, Zone *zone)
Definition: jsregexp.h:1161
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5950
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.h:1516
AssertionType assertion_type()
Definition: jsregexp.h:932
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5301
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:4192
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2305
static void IrregexpInitialize(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, int capture_register_count)
Definition: jsregexp.cc:516
static const int kFirstCapture
Definition: jsregexp.h:184
TriBool at_start()
Definition: jsregexp.h:1449
Label * backtrack()
Definition: jsregexp.h:1453
static Handle< Object > Exec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:234
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2432
Entry(uc16 from, uc16 to, OutSet *out_set)
Definition: jsregexp.h:351
virtual int EatsAtLeast(int still_to_find, int recursion_depth, bool not_at_start)
Definition: jsregexp.h:985
RegExpAtom * atom() const
Definition: jsregexp.h:447
static int IrregexpExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:552
Zone * zone() const
Definition: jsregexp.h:682
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
Definition: jsregexp.h:1231
EndNode(Action action, Zone *zone)
Definition: jsregexp.h:981
void add_action(DeferredAction *new_action)
Definition: jsregexp.h:1467
static int Compare(uc16 a, uc16 b)
Definition: jsregexp.h:372
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3513
static int GetLastCaptureCount(FixedArray *array)
Definition: jsregexp.h:218
virtual void Accept(NodeVisitor *visitor)
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
Definition: jsregexp.cc:3662
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, int clear_capture_count, int clear_capture_start, Zone *zone)
Definition: jsregexp.h:1010
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
Definition: jsregexp.h:664
static const int kLastInput
Definition: jsregexp.h:183
void AddRange(CharacterRange range)
Definition: jsregexp.h:1533
void set_cp_offset(int cp_offset)
Definition: jsregexp.h:440
void set_being_calculated(bool b)
Definition: jsregexp.h:1098
static void AtomCompile(Handle< JSRegExp > re, Handle< String > pattern, JSRegExp::Flags flags, Handle< String > match_pattern)
Definition: jsregexp.cc:258
#define BASE_EMBEDDED
Definition: allocation.h:68
void ResetCompilationState()
Definition: jsregexp.h:513
int characters_preloaded()
Definition: jsregexp.h:1456
DeferredAction * actions()
Definition: jsregexp.h:1429
static const int kNodeIsTooComplexForGreedyLoops
Definition: jsregexp.h:623
uc16 from()
Definition: jsregexp.h:353
DispatchTable * GetTable(bool ignore_case)
Definition: jsregexp.cc:962
static int IrregexpMaxRegisterCount(FixedArray *re)
Definition: jsregexp.cc:485
int EatsAtLeastHelper(int still_to_find, int budget, RegExpNode *ignore_this_node, bool not_at_start)
Definition: jsregexp.cc:2402
RegExpNode * on_success()
Definition: jsregexp.h:741
static void SetIrregexpMaxRegisterCount(FixedArray *re, int value)
Definition: jsregexp.cc:491
BoyerMooreLookahead * bm_info(bool not_at_start)
Definition: jsregexp.h:678
static int AtomExecRaw(Handle< JSRegExp > regexp, Handle< String > subject, int index, int32_t *output, int output_size)
Definition: jsregexp.cc:283
static Handle< String > ToString(Handle< Object > value)
virtual RegExpNode * FilterASCII(int depth, bool ignore_case)
Definition: jsregexp.cc:2813
static const int kLastMatchOverhead
Definition: jsregexp.h:185
struct v8::internal::ActionNode::@17::@19 u_increment_register
Entry()
Definition: jsregexp.h:350
static const int kHeaderSize
Definition: objects.h:3016
void set_from(uc16 value)
Definition: jsregexp.h:279
static const int kNone
Definition: jsregexp.h:730
int bound_checked_up_to()
Definition: jsregexp.h:1457
virtual int GreedyLoopTextLength()
Definition: jsregexp.h:624
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)
Definition: jsregexp.cc:2422
void AddContinueAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3506
Guard(int reg, Relation op, int value)
Definition: jsregexp.h:1033
void set_to(uc16 value)
Definition: jsregexp.h:281
bool GetStoredPosition(int reg, int *cp_offset)
Definition: jsregexp.cc:1190
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.cc:2345
static AssertionNode * AfterNewline(RegExpNode *on_success)
Definition: jsregexp.h:918
int from() const
Definition: jsregexp.h:727
static AssertionNode * AtNonBoundary(RegExpNode *on_success)
Definition: jsregexp.h:915
BackReferenceNode(int start_reg, int end_reg, RegExpNode *on_success)
Definition: jsregexp.h:948
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function info
Definition: flags.cc:317
virtual void VisitLoopChoice(LoopChoiceNode *that)
Definition: jsregexp.cc:5759
static bool UsesNativeRegExp()
Definition: jsregexp.h:48
static const int kMaxCopiesCodeGenerated
Definition: jsregexp.h:674
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2553
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
static const int kFirstCaptureOffset
Definition: jsregexp.h:194
int Count(int map_number)
Definition: jsregexp.h:1287
uint16_t uc16
Definition: globals.h:309
static const int kLastInputOffset
Definition: jsregexp.h:192
void MakeCaseIndependent(bool is_ascii)
Definition: jsregexp.cc:3423
virtual void Accept(NodeVisitor *visitor)
ZoneList< GuardedAlternative > * alternatives()
Definition: jsregexp.h:1078
static const int kPayloadMask
Definition: jsregexp.h:305
void SetRest(int from_map)
Definition: jsregexp.h:1313
static const int kStartMarker
Definition: jsregexp.h:304
#define ASSERT_EQ(v1, v2)
Definition: checks.h:330
void ForEach(Callback *callback)
Definition: jsregexp.h:387
OutSet * Extend(unsigned value, Zone *zone)
Definition: jsregexp.cc:5541
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
struct v8::internal::ActionNode::@17::@18 u_store_register
ZoneList< GuardedAlternative > * alternatives_
Definition: jsregexp.h:1104
ContainedInLattice AddRange(ContainedInLattice containment, const int *ranges, int ranges_length, Interval new_range)
Definition: jsregexp.cc:114
virtual int GreedyLoopTextLength()
Definition: jsregexp.cc:3442
static ByteArray * IrregexpByteCode(FixedArray *re, bool is_ascii)
Definition: jsregexp.cc:506
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3169
RegExpNode * set_replacement(RegExpNode *replacement)
Definition: jsregexp.h:654
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
Definition: jsregexp.cc:1527
void Set(int map_number, int character)
Definition: jsregexp.h:1293
static int IrregexpNumberOfRegisters(FixedArray *re)
Definition: jsregexp.cc:501
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:39
Definition: jsregexp.h:348
bool Rationalize(bool ascii)
Definition: jsregexp.cc:2453
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.h:988
TextNode(ZoneList< TextElement > *elms, RegExpNode *on_success)
Definition: jsregexp.h:846
Object * get(int index)
Definition: objects-inl.h:2127
void AddLoopAlternative(GuardedAlternative alt)
Definition: jsregexp.cc:3499
SeqRegExpNode(RegExpNode *on_success)
Definition: jsregexp.h:739
virtual void Accept(NodeVisitor *visitor)=0
RegExpNode * loop_node()
Definition: jsregexp.h:1178
static Handle< Object > Compile(Handle< JSRegExp > re, Handle< String > pattern, Handle< String > flags)
Definition: jsregexp.cc:171
ActionNode::ActionType action_type()
Definition: jsregexp.h:1364
CompilationResult(Isolate *isolate, const char *error_message)
Definition: jsregexp.h:1618
ZoneList< Guard * > * guards()
Definition: jsregexp.h:1054
virtual void Accept(NodeVisitor *visitor)
ElementInSetsRelation
Definition: jsregexp.h:248
virtual void Emit(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:3928
static Handle< Object > IrregexpExec(Handle< JSRegExp > regexp, Handle< String > subject, int index, Handle< JSArray > lastMatchInfo)
Definition: jsregexp.cc:640
bool Matches(NodeInfo *that)
Definition: jsregexp.h:483
#define FOR_EACH_NODE_TYPE(VISIT)
Definition: jsregexp.h:401
T Min(T a, T b)
Definition: utils.h:234
ActionType action_type()
Definition: jsregexp.h:803
void set_on_success(RegExpNode *node)
Definition: jsregexp.h:742
virtual void Accept(NodeVisitor *visitor)
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
Definition: jsregexp.cc:1378
int kUninitializedRegExpNodePlaceHolder
void set_quick_check_performed(QuickCheckDetails *d)
Definition: jsregexp.h:1478
struct v8::internal::ActionNode::@17::@23 u_clear_captures
int expected_size
void InvalidateCurrentCharacter()
Definition: jsregexp.cc:3399
bool IsEverything(uc16 max)
Definition: jsregexp.h:283
DeferredAction(ActionNode::ActionType action_type, int reg)
Definition: jsregexp.h:1359
static const Entry NoValue()
Definition: jsregexp.h:371
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5199
OutSet * out_set()
Definition: jsregexp.h:359
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:638
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)=0
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:267
static AssertionNode * AtEnd(RegExpNode *on_success)
Definition: jsregexp.h:906
friend class DotPrinter
Definition: jsregexp.h:840
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start)
Definition: jsregexp.cc:3154
QuickCheckDetails(int characters)
Definition: jsregexp.h:542
void AddFromPreceding(NodeInfo *that)
Definition: jsregexp.h:492
void set_stop_node(RegExpNode *node)
Definition: jsregexp.h:1473
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:6001
static void SetCapture(FixedArray *array, int index, int to)
Definition: jsregexp.h:214
static AssertionNode * AtBoundary(RegExpNode *on_success)
Definition: jsregexp.h:912
bool mentions_reg(int reg)
Definition: jsregexp.cc:1179
void set_node(RegExpNode *node)
Definition: jsregexp.h:1053
const int MB
Definition: globals.h:246