v8  3.14.5(node0.10.28)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
regexp-macro-assembler.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 #include "ast.h"
30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h"
33 #include "simulator.h"
34 
35 namespace v8 {
36 namespace internal {
37 
39  : slow_safe_compiler_(false),
40  global_mode_(NOT_GLOBAL),
41  zone_(zone) {
42 }
43 
44 
46 }
47 
48 
50 #ifdef V8_HOST_CAN_READ_UNALIGNED
51  return true;
52 #else
53  return false;
54 #endif
55 }
56 
57 
58 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
59 
61  : RegExpMacroAssembler(zone) {
62 }
63 
64 
66 }
67 
68 
70  return FLAG_enable_unaligned_accesses && !slow_safe();
71 }
72 
74  String* subject,
75  int start_index) {
76  // Not just flat, but ultra flat.
77  ASSERT(subject->IsExternalString() || subject->IsSeqString());
78  ASSERT(start_index >= 0);
79  ASSERT(start_index <= subject->length());
80  if (subject->IsAsciiRepresentation()) {
81  const byte* address;
82  if (StringShape(subject).IsExternal()) {
83  const char* data = ExternalAsciiString::cast(subject)->GetChars();
84  address = reinterpret_cast<const byte*>(data);
85  } else {
86  ASSERT(subject->IsSeqAsciiString());
87  char* data = SeqAsciiString::cast(subject)->GetChars();
88  address = reinterpret_cast<const byte*>(data);
89  }
90  return address + start_index;
91  }
92  const uc16* data;
93  if (StringShape(subject).IsExternal()) {
94  data = ExternalTwoByteString::cast(subject)->GetChars();
95  } else {
96  ASSERT(subject->IsSeqTwoByteString());
97  data = SeqTwoByteString::cast(subject)->GetChars();
98  }
99  return reinterpret_cast<const byte*>(data + start_index);
100 }
101 
102 
104  Handle<Code> regexp_code,
105  Handle<String> subject,
106  int* offsets_vector,
107  int offsets_vector_length,
108  int previous_index,
109  Isolate* isolate) {
110 
111  ASSERT(subject->IsFlat());
112  ASSERT(previous_index >= 0);
113  ASSERT(previous_index <= subject->length());
114 
115  // No allocations before calling the regexp, but we can't use
116  // AssertNoAllocation, since regexps might be preempted, and another thread
117  // might do allocation anyway.
118 
119  String* subject_ptr = *subject;
120  // Character offsets into string.
121  int start_offset = previous_index;
122  int char_length = subject_ptr->length() - start_offset;
123  int slice_offset = 0;
124 
125  // The string has been flattened, so if it is a cons string it contains the
126  // full string in the first part.
127  if (StringShape(subject_ptr).IsCons()) {
128  ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
129  subject_ptr = ConsString::cast(subject_ptr)->first();
130  } else if (StringShape(subject_ptr).IsSliced()) {
131  SlicedString* slice = SlicedString::cast(subject_ptr);
132  subject_ptr = slice->parent();
133  slice_offset = slice->offset();
134  }
135  // Ensure that an underlying string has the same ASCII-ness.
136  bool is_ascii = subject_ptr->IsAsciiRepresentation();
137  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
138  // String is now either Sequential or External
139  int char_size_shift = is_ascii ? 0 : 1;
140 
141  const byte* input_start =
142  StringCharacterPosition(subject_ptr, start_offset + slice_offset);
143  int byte_length = char_length << char_size_shift;
144  const byte* input_end = input_start + byte_length;
145  Result res = Execute(*regexp_code,
146  *subject,
147  start_offset,
148  input_start,
149  input_end,
150  offsets_vector,
151  offsets_vector_length,
152  isolate);
153  return res;
154 }
155 
156 
158  Code* code,
159  String* input, // This needs to be the unpacked (sliced, cons) string.
160  int start_offset,
161  const byte* input_start,
162  const byte* input_end,
163  int* output,
164  int output_size,
165  Isolate* isolate) {
166  ASSERT(isolate == Isolate::Current());
167  // Ensure that the minimum stack has been allocated.
168  RegExpStackScope stack_scope(isolate);
169  Address stack_base = stack_scope.stack()->stack_base();
170 
171  int direct_call = 0;
172  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
173  input,
174  start_offset,
175  input_start,
176  input_end,
177  output,
178  output_size,
179  stack_base,
180  direct_call,
181  isolate);
182  ASSERT(result >= RETRY);
183 
184  if (result == EXCEPTION && !isolate->has_pending_exception()) {
185  // We detected a stack overflow (on the backtrack stack) in RegExp code,
186  // but haven't created the exception yet.
187  isolate->StackOverflow();
188  }
189  return static_cast<Result>(result);
190 }
191 
192 
194  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198 
199  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
202  0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
203 
204  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
205  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
206  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
207  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
208 
209  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
210  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
211  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
212  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
213 };
214 
215 
217  Address byte_offset1,
218  Address byte_offset2,
219  size_t byte_length,
220  Isolate* isolate) {
221  ASSERT(isolate == Isolate::Current());
224  // This function is not allowed to cause a garbage collection.
225  // A GC might move the calling generated code and invalidate the
226  // return address on the stack.
227  ASSERT(byte_length % 2 == 0);
228  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
229  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
230  size_t length = byte_length >> 1;
231 
232  for (size_t i = 0; i < length; i++) {
233  unibrow::uchar c1 = substring1[i];
234  unibrow::uchar c2 = substring2[i];
235  if (c1 != c2) {
236  unibrow::uchar s1[1] = { c1 };
237  canonicalize->get(c1, '\0', s1);
238  if (s1[0] != c2) {
239  unibrow::uchar s2[1] = { c2 };
240  canonicalize->get(c2, '\0', s2);
241  if (s1[0] != s2[0]) {
242  return 0;
243  }
244  }
245  }
246  }
247  return 1;
248 }
249 
250 
252  Address* stack_base,
253  Isolate* isolate) {
254  ASSERT(isolate == Isolate::Current());
255  RegExpStack* regexp_stack = isolate->regexp_stack();
256  size_t size = regexp_stack->stack_capacity();
257  Address old_stack_base = regexp_stack->stack_base();
258  ASSERT(old_stack_base == *stack_base);
259  ASSERT(stack_pointer <= old_stack_base);
260  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
261  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
262  if (new_stack_base == NULL) {
263  return NULL;
264  }
265  *stack_base = new_stack_base;
266  intptr_t stack_content_size = old_stack_base - stack_pointer;
267  return new_stack_base - stack_content_size;
268 }
269 
270 #endif // V8_INTERPRETED_REGEXP
271 
272 } } // namespace v8::internal
byte * Address
Definition: globals.h:157
const SwVfpRegister s2
Failure * StackOverflow()
Definition: isolate.cc:924
#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8)
Definition: simulator-arm.h:59
Address EnsureCapacity(size_t size)
Definition: regexp-stack.cc:90
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
bool IsAsciiRepresentation()
Definition: objects-inl.h:290
static ExternalTwoByteString * cast(Object *obj)
RegExpStack * regexp_stack()
Definition: isolate.h:937
#define ASSERT(condition)
Definition: checks.h:270
RegExpStack * stack() const
Definition: regexp-stack.h:49
static ExternalAsciiString * cast(Object *obj)
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:49
uint8_t byte
Definition: globals.h:156
static SlicedString * cast(Object *obj)
unibrow::Mapping< unibrow::Ecma262Canonicalize > * regexp_macro_assembler_canonicalize()
Definition: isolate.h:933
static int CaseInsensitiveCompareUC16(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
static SeqAsciiString * cast(Object *obj)
bool has_pending_exception()
Definition: isolate.h:561
activate correct semantics for inheriting readonliness false
Definition: flags.cc:141
const SwVfpRegister s1
static SeqTwoByteString * cast(Object *obj)
static const byte * StringCharacterPosition(String *subject, int start_index)
uint16_t uc16
Definition: globals.h:259
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
static Address GrowStack(Address stack_pointer, Address *stack_top, Isolate *isolate)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
Definition: flags.cc:301
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
static ConsString * cast(Object *obj)
unsigned int uchar
Definition: unicode.h:40