v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
regexp-macro-assembler.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 #include "ast.h"
30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h"
33 #include "simulator.h"
34 
35 namespace v8 {
36 namespace internal {
37 
39  : slow_safe_compiler_(false),
40  global_mode_(NOT_GLOBAL),
41  zone_(zone) {
42 }
43 
44 
46 }
47 
48 
50 #ifdef V8_HOST_CAN_READ_UNALIGNED
51  return true;
52 #else
53  return false;
54 #endif
55 }
56 
57 
58 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
59 
61  : RegExpMacroAssembler(zone) {
62 }
63 
64 
66 }
67 
68 
70 #ifdef V8_TARGET_CAN_READ_UNALIGNED
71  return !slow_safe();
72 #else
73  return false;
74 #endif
75 }
76 
78  String* subject,
79  int start_index) {
80  // Not just flat, but ultra flat.
81  ASSERT(subject->IsExternalString() || subject->IsSeqString());
82  ASSERT(start_index >= 0);
83  ASSERT(start_index <= subject->length());
84  if (subject->IsAsciiRepresentation()) {
85  const byte* address;
86  if (StringShape(subject).IsExternal()) {
87  const char* data = ExternalAsciiString::cast(subject)->GetChars();
88  address = reinterpret_cast<const byte*>(data);
89  } else {
90  ASSERT(subject->IsSeqAsciiString());
91  char* data = SeqAsciiString::cast(subject)->GetChars();
92  address = reinterpret_cast<const byte*>(data);
93  }
94  return address + start_index;
95  }
96  const uc16* data;
97  if (StringShape(subject).IsExternal()) {
98  data = ExternalTwoByteString::cast(subject)->GetChars();
99  } else {
100  ASSERT(subject->IsSeqTwoByteString());
101  data = SeqTwoByteString::cast(subject)->GetChars();
102  }
103  return reinterpret_cast<const byte*>(data + start_index);
104 }
105 
106 
108  Handle<Code> regexp_code,
109  Handle<String> subject,
110  int* offsets_vector,
111  int offsets_vector_length,
112  int previous_index,
113  Isolate* isolate) {
114 
115  ASSERT(subject->IsFlat());
116  ASSERT(previous_index >= 0);
117  ASSERT(previous_index <= subject->length());
118 
119  // No allocations before calling the regexp, but we can't use
120  // AssertNoAllocation, since regexps might be preempted, and another thread
121  // might do allocation anyway.
122 
123  String* subject_ptr = *subject;
124  // Character offsets into string.
125  int start_offset = previous_index;
126  int char_length = subject_ptr->length() - start_offset;
127  int slice_offset = 0;
128 
129  // The string has been flattened, so if it is a cons string it contains the
130  // full string in the first part.
131  if (StringShape(subject_ptr).IsCons()) {
132  ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
133  subject_ptr = ConsString::cast(subject_ptr)->first();
134  } else if (StringShape(subject_ptr).IsSliced()) {
135  SlicedString* slice = SlicedString::cast(subject_ptr);
136  subject_ptr = slice->parent();
137  slice_offset = slice->offset();
138  }
139  // Ensure that an underlying string has the same ASCII-ness.
140  bool is_ascii = subject_ptr->IsAsciiRepresentation();
141  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
142  // String is now either Sequential or External
143  int char_size_shift = is_ascii ? 0 : 1;
144 
145  const byte* input_start =
146  StringCharacterPosition(subject_ptr, start_offset + slice_offset);
147  int byte_length = char_length << char_size_shift;
148  const byte* input_end = input_start + byte_length;
149  Result res = Execute(*regexp_code,
150  *subject,
151  start_offset,
152  input_start,
153  input_end,
154  offsets_vector,
155  offsets_vector_length,
156  isolate);
157  return res;
158 }
159 
160 
162  Code* code,
163  String* input, // This needs to be the unpacked (sliced, cons) string.
164  int start_offset,
165  const byte* input_start,
166  const byte* input_end,
167  int* output,
168  int output_size,
169  Isolate* isolate) {
170  ASSERT(isolate == Isolate::Current());
171  // Ensure that the minimum stack has been allocated.
172  RegExpStackScope stack_scope(isolate);
173  Address stack_base = stack_scope.stack()->stack_base();
174 
175  int direct_call = 0;
176  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
177  input,
178  start_offset,
179  input_start,
180  input_end,
181  output,
182  output_size,
183  stack_base,
184  direct_call,
185  isolate);
186  ASSERT(result >= RETRY);
187 
188  if (result == EXCEPTION && !isolate->has_pending_exception()) {
189  // We detected a stack overflow (on the backtrack stack) in RegExp code,
190  // but haven't created the exception yet.
191  isolate->StackOverflow();
192  }
193  return static_cast<Result>(result);
194 }
195 
196 
198  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
202 
203  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
205  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
206  0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
207 
208  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
209  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
210  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
211  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
212 
213  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
214  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
215  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
216  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
217 };
218 
219 
221  Address byte_offset1,
222  Address byte_offset2,
223  size_t byte_length,
224  Isolate* isolate) {
225  ASSERT(isolate == Isolate::Current());
228  // This function is not allowed to cause a garbage collection.
229  // A GC might move the calling generated code and invalidate the
230  // return address on the stack.
231  ASSERT(byte_length % 2 == 0);
232  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
233  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
234  size_t length = byte_length >> 1;
235 
236  for (size_t i = 0; i < length; i++) {
237  unibrow::uchar c1 = substring1[i];
238  unibrow::uchar c2 = substring2[i];
239  if (c1 != c2) {
240  unibrow::uchar s1[1] = { c1 };
241  canonicalize->get(c1, '\0', s1);
242  if (s1[0] != c2) {
243  unibrow::uchar s2[1] = { c2 };
244  canonicalize->get(c2, '\0', s2);
245  if (s1[0] != s2[0]) {
246  return 0;
247  }
248  }
249  }
250  }
251  return 1;
252 }
253 
254 
256  Address* stack_base,
257  Isolate* isolate) {
258  ASSERT(isolate == Isolate::Current());
259  RegExpStack* regexp_stack = isolate->regexp_stack();
260  size_t size = regexp_stack->stack_capacity();
261  Address old_stack_base = regexp_stack->stack_base();
262  ASSERT(old_stack_base == *stack_base);
263  ASSERT(stack_pointer <= old_stack_base);
264  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
265  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
266  if (new_stack_base == NULL) {
267  return NULL;
268  }
269  *stack_base = new_stack_base;
270  intptr_t stack_content_size = old_stack_base - stack_pointer;
271  return new_stack_base - stack_content_size;
272 }
273 
274 #endif // V8_INTERPRETED_REGEXP
275 
276 } } // namespace v8::internal
byte * Address
Definition: globals.h:172
const SwVfpRegister s2
Failure * StackOverflow()
Definition: isolate.cc:897
#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8)
Definition: simulator-arm.h:59
Address EnsureCapacity(size_t size)
Definition: regexp-stack.cc:90
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
bool IsAsciiRepresentation()
Definition: objects-inl.h:289
static ExternalTwoByteString * cast(Object *obj)
RegExpStack * regexp_stack()
Definition: isolate.h:922
#define ASSERT(condition)
Definition: checks.h:270
RegExpStack * stack() const
Definition: regexp-stack.h:49
static ExternalAsciiString * cast(Object *obj)
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:48
uint8_t byte
Definition: globals.h:171
static SlicedString * cast(Object *obj)
unibrow::Mapping< unibrow::Ecma262Canonicalize > * regexp_macro_assembler_canonicalize()
Definition: isolate.h:918
static int CaseInsensitiveCompareUC16(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
static SeqAsciiString * cast(Object *obj)
bool has_pending_exception()
Definition: isolate.h:554
const SwVfpRegister s1
static SeqTwoByteString * cast(Object *obj)
static const byte * StringCharacterPosition(String *subject, int start_index)
uint16_t uc16
Definition: globals.h:273
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
Definition: flags.cc:274
static Address GrowStack(Address stack_pointer, Address *stack_top, Isolate *isolate)
static ConsString * cast(Object *obj)
unsigned int uchar
Definition: unicode.h:40