v8  3.25.30(node0.11.13)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
regexp-macro-assembler.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 #include "ast.h"
30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h"
33 #include "simulator.h"
34 
35 namespace v8 {
36 namespace internal {
37 
39  : slow_safe_compiler_(false),
40  global_mode_(NOT_GLOBAL),
41  zone_(zone) {
42 }
43 
44 
46 }
47 
48 
50 #ifdef V8_HOST_CAN_READ_UNALIGNED
51  return true;
52 #else
53  return false;
54 #endif
55 }
56 
57 
58 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
59 
61  : RegExpMacroAssembler(zone) {
62 }
63 
64 
66 }
67 
68 
70  return FLAG_enable_unaligned_accesses && !slow_safe();
71 }
72 
74  String* subject,
75  int start_index) {
76  // Not just flat, but ultra flat.
77  ASSERT(subject->IsExternalString() || subject->IsSeqString());
78  ASSERT(start_index >= 0);
79  ASSERT(start_index <= subject->length());
80  if (subject->IsOneByteRepresentation()) {
81  const byte* address;
82  if (StringShape(subject).IsExternal()) {
83  const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
84  address = reinterpret_cast<const byte*>(data);
85  } else {
86  ASSERT(subject->IsSeqOneByteString());
87  const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
88  address = reinterpret_cast<const byte*>(data);
89  }
90  return address + start_index;
91  }
92  const uc16* data;
93  if (StringShape(subject).IsExternal()) {
94  data = ExternalTwoByteString::cast(subject)->GetChars();
95  } else {
96  ASSERT(subject->IsSeqTwoByteString());
97  data = SeqTwoByteString::cast(subject)->GetChars();
98  }
99  return reinterpret_cast<const byte*>(data + start_index);
100 }
101 
102 
104  Handle<Code> regexp_code,
105  Handle<String> subject,
106  int* offsets_vector,
107  int offsets_vector_length,
108  int previous_index,
109  Isolate* isolate) {
110 
111  ASSERT(subject->IsFlat());
112  ASSERT(previous_index >= 0);
113  ASSERT(previous_index <= subject->length());
114 
115  // No allocations before calling the regexp, but we can't use
116  // DisallowHeapAllocation, since regexps might be preempted, and another
117  // thread might do allocation anyway.
118 
119  String* subject_ptr = *subject;
120  // Character offsets into string.
121  int start_offset = previous_index;
122  int char_length = subject_ptr->length() - start_offset;
123  int slice_offset = 0;
124 
125  // The string has been flattened, so if it is a cons string it contains the
126  // full string in the first part.
127  if (StringShape(subject_ptr).IsCons()) {
128  ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
129  subject_ptr = ConsString::cast(subject_ptr)->first();
130  } else if (StringShape(subject_ptr).IsSliced()) {
131  SlicedString* slice = SlicedString::cast(subject_ptr);
132  subject_ptr = slice->parent();
133  slice_offset = slice->offset();
134  }
135  // Ensure that an underlying string has the same ASCII-ness.
136  bool is_ascii = subject_ptr->IsOneByteRepresentation();
137  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
138  // String is now either Sequential or External
139  int char_size_shift = is_ascii ? 0 : 1;
140 
141  const byte* input_start =
142  StringCharacterPosition(subject_ptr, start_offset + slice_offset);
143  int byte_length = char_length << char_size_shift;
144  const byte* input_end = input_start + byte_length;
145  Result res = Execute(*regexp_code,
146  *subject,
147  start_offset,
148  input_start,
149  input_end,
150  offsets_vector,
151  offsets_vector_length,
152  isolate);
153  return res;
154 }
155 
156 
158  Code* code,
159  String* input, // This needs to be the unpacked (sliced, cons) string.
160  int start_offset,
161  const byte* input_start,
162  const byte* input_end,
163  int* output,
164  int output_size,
165  Isolate* isolate) {
166  // Ensure that the minimum stack has been allocated.
167  RegExpStackScope stack_scope(isolate);
168  Address stack_base = stack_scope.stack()->stack_base();
169 
170  int direct_call = 0;
171  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
172  input,
173  start_offset,
174  input_start,
175  input_end,
176  output,
177  output_size,
178  stack_base,
179  direct_call,
180  isolate);
181  ASSERT(result >= RETRY);
182 
183  if (result == EXCEPTION && !isolate->has_pending_exception()) {
184  // We detected a stack overflow (on the backtrack stack) in RegExp code,
185  // but haven't created the exception yet.
186  isolate->StackOverflow();
187  }
188  return static_cast<Result>(result);
189 }
190 
191 
193  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 
198  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
201  0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
202 
203  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
204  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
205  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
206  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
207 
208  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
209  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
210  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
211  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
212  // Latin-1 range
213  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
214  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
215  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
216  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
217 
218  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
219  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
220  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
221  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
222 
223  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
224  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
225  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
226  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
227 
228  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
229  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
230  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
231  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
232 };
233 
234 
236  Address byte_offset1,
237  Address byte_offset2,
238  size_t byte_length,
239  Isolate* isolate) {
242  // This function is not allowed to cause a garbage collection.
243  // A GC might move the calling generated code and invalidate the
244  // return address on the stack.
245  ASSERT(byte_length % 2 == 0);
246  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
247  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
248  size_t length = byte_length >> 1;
249 
250  for (size_t i = 0; i < length; i++) {
251  unibrow::uchar c1 = substring1[i];
252  unibrow::uchar c2 = substring2[i];
253  if (c1 != c2) {
254  unibrow::uchar s1[1] = { c1 };
255  canonicalize->get(c1, '\0', s1);
256  if (s1[0] != c2) {
257  unibrow::uchar s2[1] = { c2 };
258  canonicalize->get(c2, '\0', s2);
259  if (s1[0] != s2[0]) {
260  return 0;
261  }
262  }
263  }
264  }
265  return 1;
266 }
267 
268 
270  Address* stack_base,
271  Isolate* isolate) {
272  RegExpStack* regexp_stack = isolate->regexp_stack();
273  size_t size = regexp_stack->stack_capacity();
274  Address old_stack_base = regexp_stack->stack_base();
275  ASSERT(old_stack_base == *stack_base);
276  ASSERT(stack_pointer <= old_stack_base);
277  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
278  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
279  if (new_stack_base == NULL) {
280  return NULL;
281  }
282  *stack_base = new_stack_base;
283  intptr_t stack_content_size = old_stack_base - stack_pointer;
284  return new_stack_base - stack_content_size;
285 }
286 
287 #endif // V8_INTERPRETED_REGEXP
288 
289 } } // namespace v8::internal
byte * Address
Definition: globals.h:186
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
Definition: flags.cc:269
const SwVfpRegister s2
Failure * StackOverflow()
Definition: isolate.cc:871
#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8)
Definition: simulator-arm.h:59
Address EnsureCapacity(size_t size)
Definition: regexp-stack.cc:88
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
static ExternalTwoByteString * cast(Object *obj)
static SeqOneByteString * cast(Object *obj)
RegExpStack * regexp_stack()
Definition: isolate.h:961
#define ASSERT(condition)
Definition: checks.h:329
RegExpStack * stack() const
Definition: regexp-stack.h:49
static ExternalAsciiString * cast(Object *obj)
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:50
uint8_t byte
Definition: globals.h:185
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object size
Definition: flags.cc:211
static SlicedString * cast(Object *obj)
unibrow::Mapping< unibrow::Ecma262Canonicalize > * regexp_macro_assembler_canonicalize()
Definition: isolate.h:957
static int CaseInsensitiveCompareUC16(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
bool has_pending_exception()
Definition: isolate.h:587
const SwVfpRegister s1
static SeqTwoByteString * cast(Object *obj)
static const byte * StringCharacterPosition(String *subject, int start_index)
uint16_t uc16
Definition: globals.h:309
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
#define ASSERT_EQ(v1, v2)
Definition: checks.h:330
static Address GrowStack(Address stack_pointer, Address *stack_top, Isolate *isolate)
bool IsOneByteRepresentation()
Definition: objects-inl.h:321
static ConsString * cast(Object *obj)
unsigned int uchar
Definition: unicode.h:40