v8  3.14.5(node0.10.28)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
regexp-macro-assembler.h
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
29 #define V8_REGEXP_MACRO_ASSEMBLER_H_
30 
31 #include "ast.h"
32 
33 namespace v8 {
34 namespace internal {
35 
38  Label* on_match;
39 };
40 
41 
43  public:
44  // The implementation must be able to handle at least:
45  static const int kMaxRegister = (1 << 16) - 1;
46  static const int kMaxCPOffset = (1 << 15) - 1;
47  static const int kMinCPOffset = -(1 << 15);
48 
49  static const int kTableSizeBits = 7;
50  static const int kTableSize = 1 << kTableSizeBits;
51  static const int kTableMask = kTableSize - 1;
52 
59  };
60 
64  };
65 
66  explicit RegExpMacroAssembler(Zone* zone);
67  virtual ~RegExpMacroAssembler();
68  // The maximal number of pushes between stack checks. Users must supply
69  // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
70  // at least once for every stack_limit() pushes that are executed.
71  virtual int stack_limit_slack() = 0;
72  virtual bool CanReadUnaligned();
73  virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
74  virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
75  // Continues execution from the position pushed on the top of the backtrack
76  // stack by an earlier PushBacktrack(Label*).
77  virtual void Backtrack() = 0;
78  virtual void Bind(Label* label) = 0;
79  virtual void CheckAtStart(Label* on_at_start) = 0;
80  // Dispatch after looking the current character up in a 2-bits-per-entry
81  // map. The destinations vector has up to 4 labels.
82  virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
83  // Bitwise and the current character with the given constant and then
84  // check for a match with c.
85  virtual void CheckCharacterAfterAnd(unsigned c,
86  unsigned and_with,
87  Label* on_equal) = 0;
88  virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
89  virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
90  // Check the current character for a match with a literal string. If we
91  // fail to match then goto the on_failure label. If check_eos is set then
92  // the end of input always fails. If check_eos is clear then it is the
93  // caller's responsibility to ensure that the end of string is not hit.
94  // If the label is NULL then we should pop a backtrack address off
95  // the stack and go to that.
96  virtual void CheckCharacters(
98  int cp_offset,
99  Label* on_failure,
100  bool check_eos) = 0;
101  virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
102  virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
103  virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
104  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
105  Label* on_no_match) = 0;
106  // Check the current character for a match with a literal character. If we
107  // fail to match then goto the on_failure label. End of input always
108  // matches. If the label is NULL then we should pop a backtrack address off
109  // the stack and go to that.
110  virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
111  virtual void CheckNotCharacterAfterAnd(unsigned c,
112  unsigned and_with,
113  Label* on_not_equal) = 0;
114  // Subtract a constant from the current character, then and with the given
115  // constant and then check for a match with c.
116  virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
117  uc16 minus,
118  uc16 and_with,
119  Label* on_not_equal) = 0;
120  virtual void CheckCharacterInRange(uc16 from,
121  uc16 to, // Both inclusive.
122  Label* on_in_range) = 0;
123  virtual void CheckCharacterNotInRange(uc16 from,
124  uc16 to, // Both inclusive.
125  Label* on_not_in_range) = 0;
126 
127  // The current character (modulus the kTableSize) is looked up in the byte
128  // array, and if the found byte is non-zero, we jump to the on_bit_set label.
129  virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
130 
131  // Checks whether the given offset from the current position is before
132  // the end of the string. May overwrite the current character.
133  virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
134  LoadCurrentCharacter(cp_offset, on_outside_input, true);
135  }
136  // Check whether a standard/default character class matches the current
137  // character. Returns false if the type of special character class does
138  // not have custom support.
139  // May clobber the current loaded character.
140  virtual bool CheckSpecialCharacterClass(uc16 type,
141  Label* on_no_match) {
142  return false;
143  }
144  virtual void Fail() = 0;
145  virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
146  virtual void GoTo(Label* label) = 0;
147  // Check whether a register is >= a given constant and go to a label if it
148  // is. Backtracks instead if the label is NULL.
149  virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
150  // Check whether a register is < a given constant and go to a label if it is.
151  // Backtracks instead if the label is NULL.
152  virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
153  // Check whether a register is == to the current position and go to a
154  // label if it is.
155  virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
157  virtual void LoadCurrentCharacter(int cp_offset,
158  Label* on_end_of_input,
159  bool check_bounds = true,
160  int characters = 1) = 0;
161  virtual void PopCurrentPosition() = 0;
162  virtual void PopRegister(int register_index) = 0;
163  // Pushes the label on the backtrack stack, so that a following Backtrack
164  // will go to this label. Always checks the backtrack stack limit.
165  virtual void PushBacktrack(Label* label) = 0;
166  virtual void PushCurrentPosition() = 0;
167  virtual void PushRegister(int register_index,
168  StackCheckFlag check_stack_limit) = 0;
169  virtual void ReadCurrentPositionFromRegister(int reg) = 0;
170  virtual void ReadStackPointerFromRegister(int reg) = 0;
171  virtual void SetCurrentPositionFromEnd(int by) = 0;
172  virtual void SetRegister(int register_index, int to) = 0;
173  // Return whether the matching (with a global regexp) will be restarted.
174  virtual bool Succeed() = 0;
175  virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
176  virtual void ClearRegisters(int reg_from, int reg_to) = 0;
177  virtual void WriteStackPointerToRegister(int reg) = 0;
178 
179  // Controls the generation of large inlined constants in the code.
180  void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
181  bool slow_safe() { return slow_safe_compiler_; }
182 
184  // Set whether the regular expression has the global flag. Exiting due to
185  // a failure in a global regexp may still mean success overall.
186  inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
187  inline bool global() { return global_mode_ != NOT_GLOBAL; }
189  return global_mode_ == GLOBAL;
190  }
191 
192  Zone* zone() const { return zone_; }
193 
194  private:
195  bool slow_safe_compiler_;
196  bool global_mode_;
197  Zone* zone_;
198 };
199 
200 
201 #ifndef V8_INTERPRETED_REGEXP // Avoid compiling unused code.
202 
204  public:
205  // Type of input string to generate code for.
206  enum Mode { ASCII = 1, UC16 = 2 };
207 
208  // Result of calling generated native RegExp code.
209  // RETRY: Something significant changed during execution, and the matching
210  // should be retried from scratch.
211  // EXCEPTION: Something failed during execution. If no exception has been
212  // thrown, it's an internal out-of-memory, and the caller should
213  // throw the exception.
214  // FAILURE: Matching failed.
215  // SUCCESS: Matching succeeded, and the output array has been filled with
216  // capture positions.
217  enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
218 
220  virtual ~NativeRegExpMacroAssembler();
221  virtual bool CanReadUnaligned();
222 
223  static Result Match(Handle<Code> regexp,
224  Handle<String> subject,
225  int* offsets_vector,
226  int offsets_vector_length,
227  int previous_index,
228  Isolate* isolate);
229 
230  // Compares two-byte strings case insensitively.
231  // Called from generated RegExp code.
232  static int CaseInsensitiveCompareUC16(Address byte_offset1,
233  Address byte_offset2,
234  size_t byte_length,
235  Isolate* isolate);
236 
237  // Called from RegExp if the backtrack stack limit is hit.
238  // Tries to expand the stack. Returns the new stack-pointer if
239  // successful, and updates the stack_top address, or returns 0 if unable
240  // to grow the stack.
241  // This function must not trigger a garbage collection.
242  static Address GrowStack(Address stack_pointer, Address* stack_top,
243  Isolate* isolate);
244 
245  static const byte* StringCharacterPosition(String* subject, int start_index);
246 
247  // Byte map of ASCII characters with a 0xff if the character is a word
248  // character (digit, letter or underscore) and 0x00 otherwise.
249  // Used by generated RegExp code.
250  static const byte word_character_map[128];
251 
253  return const_cast<Address>(&word_character_map[0]);
254  }
255 
256  static Result Execute(Code* code,
257  String* input,
258  int start_offset,
259  const byte* input_start,
260  const byte* input_end,
261  int* output,
262  int output_size,
263  Isolate* isolate);
264 };
265 
266 #endif // V8_INTERPRETED_REGEXP
267 
268 } } // namespace v8::internal
269 
270 #endif // V8_REGEXP_MACRO_ASSEMBLER_H_
byte * Address
Definition: globals.h:157
virtual void WriteStackPointerToRegister(int reg)=0
virtual IrregexpImplementation Implementation()=0
virtual void GoTo(Label *label)=0
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
virtual void CheckNotBackReference(int start_reg, Label *on_no_match)=0
virtual void SetRegister(int register_index, int to)=0
virtual void ClearRegisters(int reg_from, int reg_to)=0
virtual void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1)=0
virtual void ReadCurrentPositionFromRegister(int reg)=0
virtual void AdvanceCurrentPosition(int by)=0
uint8_t byte
Definition: globals.h:156
virtual void CheckCharacterGT(uc16 limit, Label *on_greater)=0
static int CaseInsensitiveCompareUC16(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
virtual void PushRegister(int register_index, StackCheckFlag check_stack_limit)=0
virtual void ReadStackPointerFromRegister(int reg)=0
virtual void CheckAtStart(Label *on_at_start)=0
virtual void CheckCharacterNotInRange(uc16 from, uc16 to, Label *on_not_in_range)=0
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, Label *on_no_match)=0
virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 and_with, Label *on_not_equal)=0
virtual void CheckNotAtStart(Label *on_not_at_start)=0
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
virtual void AdvanceRegister(int reg, int by)=0
virtual void PopRegister(int register_index)=0
static const byte * StringCharacterPosition(String *subject, int start_index)
virtual bool CheckSpecialCharacterClass(uc16 type, Label *on_no_match)
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset)=0
virtual void Bind(Label *label)=0
uint16_t uc16
Definition: globals.h:259
virtual void CheckCharacterLT(uc16 limit, Label *on_less)=0
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
virtual void SetCurrentPositionFromEnd(int by)=0
static Address GrowStack(Address stack_pointer, Address *stack_top, Isolate *isolate)
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
virtual void IfRegisterEqPos(int reg, Label *if_eq)=0
virtual void CheckBitInTable(Handle< ByteArray > table, Label *on_bit_set)=0
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
virtual void CheckCharacters(Vector< const uc16 > str, int cp_offset, Label *on_failure, bool check_eos)=0
virtual void CheckNotCharacter(unsigned c, Label *on_not_equal)=0
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_not_equal)=0
virtual void CheckCharacterInRange(uc16 from, uc16 to, Label *on_in_range)=0
virtual Handle< HeapObject > GetCode(Handle< String > source)=0
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
virtual void PushBacktrack(Label *label)=0