v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
regexp-macro-assembler-x64.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 
30 #if defined(V8_TARGET_ARCH_X64)
31 
32 #include "serialize.h"
33 #include "unicode.h"
34 #include "log.h"
35 #include "regexp-stack.h"
36 #include "macro-assembler.h"
37 #include "regexp-macro-assembler.h"
39 
40 namespace v8 {
41 namespace internal {
42 
43 #ifndef V8_INTERPRETED_REGEXP
44 
45 /*
46  * This assembler uses the following register assignment convention
47  * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded
48  * using LoadCurrentCharacter before using any of the dispatch methods.
49  * Temporarily stores the index of capture start after a matching pass
50  * for a global regexp.
51  * - rdi : Current position in input, as negative offset from end of string.
52  * Please notice that this is the byte offset, not the character
53  * offset! Is always a 32-bit signed (negative) offset, but must be
54  * maintained sign-extended to 64 bits, since it is used as index.
55  * - rsi : End of input (points to byte after last character in input),
56  * so that rsi+rdi points to the current character.
57  * - rbp : Frame pointer. Used to access arguments, local variables and
58  * RegExp registers.
59  * - rsp : Points to tip of C stack.
60  * - rcx : Points to tip of backtrack stack. The backtrack stack contains
61  * only 32-bit values. Most are offsets from some base (e.g., character
62  * positions from end of string or code location from Code* pointer).
63  * - r8 : Code object pointer. Used to convert between absolute and
64  * code-object-relative addresses.
65  *
66  * The registers rax, rbx, r9 and r11 are free to use for computations.
67  * If changed to use r12+, they should be saved as callee-save registers.
68  * The macro assembler special registers r12 and r13 (kSmiConstantRegister,
69  * kRootRegister) aren't special during execution of RegExp code (they don't
70  * hold the values assumed when creating JS code), so no Smi or Root related
71  * macro operations can be used.
72  *
73  * Each call to a C++ method should retain these registers.
74  *
75  * The stack will have the following content, in some order, indexable from the
76  * frame pointer (see, e.g., kStackHighEnd):
77  * - Isolate* isolate (address of the current isolate)
78  * - direct_call (if 1, direct call from JavaScript code, if 0 call
79  * through the runtime system)
80  * - stack_area_base (high end of the memory area to use as
81  * backtracking stack)
82  * - capture array size (may fit multiple sets of matches)
83  * - int* capture_array (int[num_saved_registers_], for output).
84  * - end of input (address of end of string)
85  * - start of input (address of first character in string)
86  * - start index (character index of start)
87  * - String* input_string (input string)
88  * - return address
89  * - backup of callee save registers (rbx, possibly rsi and rdi).
90  * - success counter (only useful for global regexp to count matches)
91  * - Offset of location before start of input (effectively character
92  * position -1). Used to initialize capture registers to a non-position.
93  * - At start of string (if 1, we are starting at the start of the
94  * string, otherwise 0)
95  * - register 0 rbp[-n] (Only positions must be stored in the first
96  * - register 1 rbp[-n-8] num_saved_registers_ registers)
97  * - ...
98  *
99  * The first num_saved_registers_ registers are initialized to point to
100  * "character -1" in the string (i.e., char_size() bytes before the first
101  * character of the string). The remaining registers starts out uninitialized.
102  *
103  * The first seven values must be provided by the calling code by
104  * calling the code's entry address cast to a function pointer with the
105  * following signature:
106  * int (*match)(String* input_string,
107  * int start_index,
108  * Address start,
109  * Address end,
110  * int* capture_output_array,
111  * bool at_start,
112  * byte* stack_area_base,
113  * bool direct_call)
114  */
115 
116 #define __ ACCESS_MASM((&masm_))
117 
119  Mode mode,
120  int registers_to_save,
121  Zone* zone)
122  : NativeRegExpMacroAssembler(zone),
123  masm_(Isolate::Current(), NULL, kRegExpCodeSize),
124  no_root_array_scope_(&masm_),
125  code_relative_fixup_positions_(4, zone),
126  mode_(mode),
127  num_registers_(registers_to_save),
128  num_saved_registers_(registers_to_save),
129  entry_label_(),
130  start_label_(),
131  success_label_(),
132  backtrack_label_(),
133  exit_label_() {
134  ASSERT_EQ(0, registers_to_save % 2);
135  __ jmp(&entry_label_); // We'll write the entry code when we know more.
136  __ bind(&start_label_); // And then continue from here.
137 }
138 
139 
140 RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
141  // Unuse labels in case we throw away the assembler without calling GetCode.
142  entry_label_.Unuse();
143  start_label_.Unuse();
144  success_label_.Unuse();
145  backtrack_label_.Unuse();
146  exit_label_.Unuse();
147  check_preempt_label_.Unuse();
148  stack_overflow_label_.Unuse();
149 }
150 
151 
152 int RegExpMacroAssemblerX64::stack_limit_slack() {
153  return RegExpStack::kStackLimitSlack;
154 }
155 
156 
157 void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
158  if (by != 0) {
159  __ addq(rdi, Immediate(by * char_size()));
160  }
161 }
162 
163 
164 void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
165  ASSERT(reg >= 0);
166  ASSERT(reg < num_registers_);
167  if (by != 0) {
168  __ addq(register_location(reg), Immediate(by));
169  }
170 }
171 
172 
173 void RegExpMacroAssemblerX64::Backtrack() {
174  CheckPreemption();
175  // Pop Code* offset from backtrack stack, add Code* and jump to location.
176  Pop(rbx);
177  __ addq(rbx, code_object_pointer());
178  __ jmp(rbx);
179 }
180 
181 
182 void RegExpMacroAssemblerX64::Bind(Label* label) {
183  __ bind(label);
184 }
185 
186 
187 void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
188  __ cmpl(current_character(), Immediate(c));
189  BranchOrBacktrack(equal, on_equal);
190 }
191 
192 
193 void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
194  __ cmpl(current_character(), Immediate(limit));
195  BranchOrBacktrack(greater, on_greater);
196 }
197 
198 
199 void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
200  Label not_at_start;
201  // Did we start the match at the start of the string at all?
202  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
203  BranchOrBacktrack(not_equal, &not_at_start);
204  // If we did, are we still at the start of the input?
205  __ lea(rax, Operand(rsi, rdi, times_1, 0));
206  __ cmpq(rax, Operand(rbp, kInputStart));
207  BranchOrBacktrack(equal, on_at_start);
208  __ bind(&not_at_start);
209 }
210 
211 
212 void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
213  // Did we start the match at the start of the string at all?
214  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
215  BranchOrBacktrack(not_equal, on_not_at_start);
216  // If we did, are we still at the start of the input?
217  __ lea(rax, Operand(rsi, rdi, times_1, 0));
218  __ cmpq(rax, Operand(rbp, kInputStart));
219  BranchOrBacktrack(not_equal, on_not_at_start);
220 }
221 
222 
223 void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) {
224  __ cmpl(current_character(), Immediate(limit));
225  BranchOrBacktrack(less, on_less);
226 }
227 
228 
229 void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
230  int cp_offset,
231  Label* on_failure,
232  bool check_end_of_string) {
233 #ifdef DEBUG
234  // If input is ASCII, don't even bother calling here if the string to
235  // match contains a non-ASCII character.
236  if (mode_ == ASCII) {
237  ASSERT(String::IsAscii(str.start(), str.length()));
238  }
239 #endif
240  int byte_length = str.length() * char_size();
241  int byte_offset = cp_offset * char_size();
242  if (check_end_of_string) {
243  // Check that there are at least str.length() characters left in the input.
244  __ cmpl(rdi, Immediate(-(byte_offset + byte_length)));
245  BranchOrBacktrack(greater, on_failure);
246  }
247 
248  if (on_failure == NULL) {
249  // Instead of inlining a backtrack, (re)use the global backtrack target.
250  on_failure = &backtrack_label_;
251  }
252 
253  // Do one character test first to minimize loading for the case that
254  // we don't match at all (loading more than one character introduces that
255  // chance of reading unaligned and reading across cache boundaries).
256  // If the first character matches, expect a larger chance of matching the
257  // string, and start loading more characters at a time.
258  if (mode_ == ASCII) {
259  __ cmpb(Operand(rsi, rdi, times_1, byte_offset),
260  Immediate(static_cast<int8_t>(str[0])));
261  } else {
262  // Don't use 16-bit immediate. The size changing prefix throws off
263  // pre-decoding.
264  __ movzxwl(rax,
265  Operand(rsi, rdi, times_1, byte_offset));
266  __ cmpl(rax, Immediate(static_cast<int32_t>(str[0])));
267  }
268  BranchOrBacktrack(not_equal, on_failure);
269 
270  __ lea(rbx, Operand(rsi, rdi, times_1, 0));
271  for (int i = 1, n = str.length(); i < n; ) {
272  if (mode_ == ASCII) {
273  if (i + 8 <= n) {
274  uint64_t combined_chars =
275  (static_cast<uint64_t>(str[i + 0]) << 0) ||
276  (static_cast<uint64_t>(str[i + 1]) << 8) ||
277  (static_cast<uint64_t>(str[i + 2]) << 16) ||
278  (static_cast<uint64_t>(str[i + 3]) << 24) ||
279  (static_cast<uint64_t>(str[i + 4]) << 32) ||
280  (static_cast<uint64_t>(str[i + 5]) << 40) ||
281  (static_cast<uint64_t>(str[i + 6]) << 48) ||
282  (static_cast<uint64_t>(str[i + 7]) << 56);
283  __ movq(rax, combined_chars, RelocInfo::NONE);
284  __ cmpq(rax, Operand(rbx, byte_offset + i));
285  i += 8;
286  } else if (i + 4 <= n) {
287  uint32_t combined_chars =
288  (static_cast<uint32_t>(str[i + 0]) << 0) ||
289  (static_cast<uint32_t>(str[i + 1]) << 8) ||
290  (static_cast<uint32_t>(str[i + 2]) << 16) ||
291  (static_cast<uint32_t>(str[i + 3]) << 24);
292  __ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars));
293  i += 4;
294  } else {
295  __ cmpb(Operand(rbx, byte_offset + i),
296  Immediate(static_cast<int8_t>(str[i])));
297  i++;
298  }
299  } else {
300  ASSERT(mode_ == UC16);
301  if (i + 4 <= n) {
302  uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]);
303  __ movq(rax, combined_chars, RelocInfo::NONE);
304  __ cmpq(rax,
305  Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
306  i += 4;
307  } else if (i + 2 <= n) {
308  uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]);
309  __ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
310  Immediate(combined_chars));
311  i += 2;
312  } else {
313  __ movzxwl(rax,
314  Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
315  __ cmpl(rax, Immediate(str[i]));
316  i++;
317  }
318  }
319  BranchOrBacktrack(not_equal, on_failure);
320  }
321 }
322 
323 
324 void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
325  Label fallthrough;
326  __ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
327  __ j(not_equal, &fallthrough);
328  Drop();
329  BranchOrBacktrack(no_condition, on_equal);
330  __ bind(&fallthrough);
331 }
332 
333 
334 void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
335  int start_reg,
336  Label* on_no_match) {
337  Label fallthrough;
338  __ movq(rdx, register_location(start_reg)); // Offset of start of capture
339  __ movq(rbx, register_location(start_reg + 1)); // Offset of end of capture
340  __ subq(rbx, rdx); // Length of capture.
341 
342  // -----------------------
343  // rdx = Start offset of capture.
344  // rbx = Length of capture
345 
346  // If length is negative, this code will fail (it's a symptom of a partial or
347  // illegal capture where start of capture after end of capture).
348  // This must not happen (no back-reference can reference a capture that wasn't
349  // closed before in the reg-exp, and we must not generate code that can cause
350  // this condition).
351 
352  // If length is zero, either the capture is empty or it is nonparticipating.
353  // In either case succeed immediately.
354  __ j(equal, &fallthrough);
355 
356  if (mode_ == ASCII) {
357  Label loop_increment;
358  if (on_no_match == NULL) {
359  on_no_match = &backtrack_label_;
360  }
361 
362  __ lea(r9, Operand(rsi, rdx, times_1, 0));
363  __ lea(r11, Operand(rsi, rdi, times_1, 0));
364  __ addq(rbx, r9); // End of capture
365  // ---------------------
366  // r11 - current input character address
367  // r9 - current capture character address
368  // rbx - end of capture
369 
370  Label loop;
371  __ bind(&loop);
372  __ movzxbl(rdx, Operand(r9, 0));
373  __ movzxbl(rax, Operand(r11, 0));
374  // al - input character
375  // dl - capture character
376  __ cmpb(rax, rdx);
377  __ j(equal, &loop_increment);
378 
379  // Mismatch, try case-insensitive match (converting letters to lower-case).
380  // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
381  // a match.
382  __ or_(rax, Immediate(0x20)); // Convert match character to lower-case.
383  __ or_(rdx, Immediate(0x20)); // Convert capture character to lower-case.
384  __ cmpb(rax, rdx);
385  __ j(not_equal, on_no_match); // Definitely not equal.
386  __ subb(rax, Immediate('a'));
387  __ cmpb(rax, Immediate('z' - 'a'));
388  __ j(above, on_no_match); // Weren't letters anyway.
389 
390  __ bind(&loop_increment);
391  // Increment pointers into match and capture strings.
392  __ addq(r11, Immediate(1));
393  __ addq(r9, Immediate(1));
394  // Compare to end of capture, and loop if not done.
395  __ cmpq(r9, rbx);
396  __ j(below, &loop);
397 
398  // Compute new value of character position after the matched part.
399  __ movq(rdi, r11);
400  __ subq(rdi, rsi);
401  } else {
402  ASSERT(mode_ == UC16);
403  // Save important/volatile registers before calling C function.
404 #ifndef _WIN64
405  // Caller save on Linux and callee save in Windows.
406  __ push(rsi);
407  __ push(rdi);
408 #endif
409  __ push(backtrack_stackpointer());
410 
411  static const int num_arguments = 4;
412  __ PrepareCallCFunction(num_arguments);
413 
414  // Put arguments into parameter registers. Parameters are
415  // Address byte_offset1 - Address captured substring's start.
416  // Address byte_offset2 - Address of current character position.
417  // size_t byte_length - length of capture in bytes(!)
418  // Isolate* isolate
419 #ifdef _WIN64
420  // Compute and set byte_offset1 (start of capture).
421  __ lea(rcx, Operand(rsi, rdx, times_1, 0));
422  // Set byte_offset2.
423  __ lea(rdx, Operand(rsi, rdi, times_1, 0));
424  // Set byte_length.
425  __ movq(r8, rbx);
426  // Isolate.
427  __ LoadAddress(r9, ExternalReference::isolate_address());
428 #else // AMD64 calling convention
429  // Compute byte_offset2 (current position = rsi+rdi).
430  __ lea(rax, Operand(rsi, rdi, times_1, 0));
431  // Compute and set byte_offset1 (start of capture).
432  __ lea(rdi, Operand(rsi, rdx, times_1, 0));
433  // Set byte_offset2.
434  __ movq(rsi, rax);
435  // Set byte_length.
436  __ movq(rdx, rbx);
437  // Isolate.
438  __ LoadAddress(rcx, ExternalReference::isolate_address());
439 #endif
440 
441  { // NOLINT: Can't find a way to open this scope without confusing the
442  // linter.
443  AllowExternalCallThatCantCauseGC scope(&masm_);
444  ExternalReference compare =
445  ExternalReference::re_case_insensitive_compare_uc16(masm_.isolate());
446  __ CallCFunction(compare, num_arguments);
447  }
448 
449  // Restore original values before reacting on result value.
450  __ Move(code_object_pointer(), masm_.CodeObject());
451  __ pop(backtrack_stackpointer());
452 #ifndef _WIN64
453  __ pop(rdi);
454  __ pop(rsi);
455 #endif
456 
457  // Check if function returned non-zero for success or zero for failure.
458  __ testq(rax, rax);
459  BranchOrBacktrack(zero, on_no_match);
460  // On success, increment position by length of capture.
461  // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
462  __ addq(rdi, rbx);
463  }
464  __ bind(&fallthrough);
465 }
466 
467 
468 void RegExpMacroAssemblerX64::CheckNotBackReference(
469  int start_reg,
470  Label* on_no_match) {
471  Label fallthrough;
472 
473  // Find length of back-referenced capture.
474  __ movq(rdx, register_location(start_reg));
475  __ movq(rax, register_location(start_reg + 1));
476  __ subq(rax, rdx); // Length to check.
477 
478  // Fail on partial or illegal capture (start of capture after end of capture).
479  // This must not happen (no back-reference can reference a capture that wasn't
480  // closed before in the reg-exp).
481  __ Check(greater_equal, "Invalid capture referenced");
482 
483  // Succeed on empty capture (including non-participating capture)
484  __ j(equal, &fallthrough);
485 
486  // -----------------------
487  // rdx - Start of capture
488  // rax - length of capture
489 
490  // Check that there are sufficient characters left in the input.
491  __ movl(rbx, rdi);
492  __ addl(rbx, rax);
493  BranchOrBacktrack(greater, on_no_match);
494 
495  // Compute pointers to match string and capture string
496  __ lea(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
497  __ addq(rdx, rsi); // Start of capture.
498  __ lea(r9, Operand(rdx, rax, times_1, 0)); // End of capture
499 
500  // -----------------------
501  // rbx - current capture character address.
502  // rbx - current input character address .
503  // r9 - end of input to match (capture length after rbx).
504 
505  Label loop;
506  __ bind(&loop);
507  if (mode_ == ASCII) {
508  __ movzxbl(rax, Operand(rdx, 0));
509  __ cmpb(rax, Operand(rbx, 0));
510  } else {
511  ASSERT(mode_ == UC16);
512  __ movzxwl(rax, Operand(rdx, 0));
513  __ cmpw(rax, Operand(rbx, 0));
514  }
515  BranchOrBacktrack(not_equal, on_no_match);
516  // Increment pointers into capture and match string.
517  __ addq(rbx, Immediate(char_size()));
518  __ addq(rdx, Immediate(char_size()));
519  // Check if we have reached end of match area.
520  __ cmpq(rdx, r9);
521  __ j(below, &loop);
522 
523  // Success.
524  // Set current character position to position after match.
525  __ movq(rdi, rbx);
526  __ subq(rdi, rsi);
527 
528  __ bind(&fallthrough);
529 }
530 
531 
532 void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
533  Label* on_not_equal) {
534  __ cmpl(current_character(), Immediate(c));
535  BranchOrBacktrack(not_equal, on_not_equal);
536 }
537 
538 
539 void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
540  uint32_t mask,
541  Label* on_equal) {
542  if (c == 0) {
543  __ testl(current_character(), Immediate(mask));
544  } else {
545  __ movl(rax, Immediate(mask));
546  __ and_(rax, current_character());
547  __ cmpl(rax, Immediate(c));
548  }
549  BranchOrBacktrack(equal, on_equal);
550 }
551 
552 
553 void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
554  uint32_t mask,
555  Label* on_not_equal) {
556  if (c == 0) {
557  __ testl(current_character(), Immediate(mask));
558  } else {
559  __ movl(rax, Immediate(mask));
560  __ and_(rax, current_character());
561  __ cmpl(rax, Immediate(c));
562  }
563  BranchOrBacktrack(not_equal, on_not_equal);
564 }
565 
566 
567 void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
568  uc16 c,
569  uc16 minus,
570  uc16 mask,
571  Label* on_not_equal) {
572  ASSERT(minus < String::kMaxUtf16CodeUnit);
573  __ lea(rax, Operand(current_character(), -minus));
574  __ and_(rax, Immediate(mask));
575  __ cmpl(rax, Immediate(c));
576  BranchOrBacktrack(not_equal, on_not_equal);
577 }
578 
579 
580 void RegExpMacroAssemblerX64::CheckCharacterInRange(
581  uc16 from,
582  uc16 to,
583  Label* on_in_range) {
584  __ leal(rax, Operand(current_character(), -from));
585  __ cmpl(rax, Immediate(to - from));
586  BranchOrBacktrack(below_equal, on_in_range);
587 }
588 
589 
590 void RegExpMacroAssemblerX64::CheckCharacterNotInRange(
591  uc16 from,
592  uc16 to,
593  Label* on_not_in_range) {
594  __ leal(rax, Operand(current_character(), -from));
595  __ cmpl(rax, Immediate(to - from));
596  BranchOrBacktrack(above, on_not_in_range);
597 }
598 
599 
600 void RegExpMacroAssemblerX64::CheckBitInTable(
601  Handle<ByteArray> table,
602  Label* on_bit_set) {
603  __ Move(rax, table);
604  Register index = current_character();
605  if (mode_ != ASCII || kTableMask != String::kMaxAsciiCharCode) {
606  __ movq(rbx, current_character());
607  __ and_(rbx, Immediate(kTableMask));
608  index = rbx;
609  }
610  __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),
611  Immediate(0));
612  BranchOrBacktrack(not_equal, on_bit_set);
613 }
614 
615 
616 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
617  Label* on_no_match) {
618  // Range checks (c in min..max) are generally implemented by an unsigned
619  // (c - min) <= (max - min) check, using the sequence:
620  // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
621  // cmp(rax, Immediate(max - min))
622  switch (type) {
623  case 's':
624  // Match space-characters
625  if (mode_ == ASCII) {
626  // ASCII space characters are '\t'..'\r' and ' '.
627  Label success;
628  __ cmpl(current_character(), Immediate(' '));
629  __ j(equal, &success);
630  // Check range 0x09..0x0d
631  __ lea(rax, Operand(current_character(), -'\t'));
632  __ cmpl(rax, Immediate('\r' - '\t'));
633  BranchOrBacktrack(above, on_no_match);
634  __ bind(&success);
635  return true;
636  }
637  return false;
638  case 'S':
639  // Match non-space characters.
640  if (mode_ == ASCII) {
641  // ASCII space characters are '\t'..'\r' and ' '.
642  __ cmpl(current_character(), Immediate(' '));
643  BranchOrBacktrack(equal, on_no_match);
644  __ lea(rax, Operand(current_character(), -'\t'));
645  __ cmpl(rax, Immediate('\r' - '\t'));
646  BranchOrBacktrack(below_equal, on_no_match);
647  return true;
648  }
649  return false;
650  case 'd':
651  // Match ASCII digits ('0'..'9')
652  __ lea(rax, Operand(current_character(), -'0'));
653  __ cmpl(rax, Immediate('9' - '0'));
654  BranchOrBacktrack(above, on_no_match);
655  return true;
656  case 'D':
657  // Match non ASCII-digits
658  __ lea(rax, Operand(current_character(), -'0'));
659  __ cmpl(rax, Immediate('9' - '0'));
660  BranchOrBacktrack(below_equal, on_no_match);
661  return true;
662  case '.': {
663  // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
664  __ movl(rax, current_character());
665  __ xor_(rax, Immediate(0x01));
666  // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
667  __ subl(rax, Immediate(0x0b));
668  __ cmpl(rax, Immediate(0x0c - 0x0b));
669  BranchOrBacktrack(below_equal, on_no_match);
670  if (mode_ == UC16) {
671  // Compare original value to 0x2028 and 0x2029, using the already
672  // computed (current_char ^ 0x01 - 0x0b). I.e., check for
673  // 0x201d (0x2028 - 0x0b) or 0x201e.
674  __ subl(rax, Immediate(0x2028 - 0x0b));
675  __ cmpl(rax, Immediate(0x2029 - 0x2028));
676  BranchOrBacktrack(below_equal, on_no_match);
677  }
678  return true;
679  }
680  case 'n': {
681  // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
682  __ movl(rax, current_character());
683  __ xor_(rax, Immediate(0x01));
684  // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
685  __ subl(rax, Immediate(0x0b));
686  __ cmpl(rax, Immediate(0x0c - 0x0b));
687  if (mode_ == ASCII) {
688  BranchOrBacktrack(above, on_no_match);
689  } else {
690  Label done;
691  BranchOrBacktrack(below_equal, &done);
692  // Compare original value to 0x2028 and 0x2029, using the already
693  // computed (current_char ^ 0x01 - 0x0b). I.e., check for
694  // 0x201d (0x2028 - 0x0b) or 0x201e.
695  __ subl(rax, Immediate(0x2028 - 0x0b));
696  __ cmpl(rax, Immediate(0x2029 - 0x2028));
697  BranchOrBacktrack(above, on_no_match);
698  __ bind(&done);
699  }
700  return true;
701  }
702  case 'w': {
703  if (mode_ != ASCII) {
704  // Table is 128 entries, so all ASCII characters can be tested.
705  __ cmpl(current_character(), Immediate('z'));
706  BranchOrBacktrack(above, on_no_match);
707  }
708  __ movq(rbx, ExternalReference::re_word_character_map());
709  ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
710  __ testb(Operand(rbx, current_character(), times_1, 0),
711  current_character());
712  BranchOrBacktrack(zero, on_no_match);
713  return true;
714  }
715  case 'W': {
716  Label done;
717  if (mode_ != ASCII) {
718  // Table is 128 entries, so all ASCII characters can be tested.
719  __ cmpl(current_character(), Immediate('z'));
720  __ j(above, &done);
721  }
722  __ movq(rbx, ExternalReference::re_word_character_map());
723  ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
724  __ testb(Operand(rbx, current_character(), times_1, 0),
725  current_character());
726  BranchOrBacktrack(not_zero, on_no_match);
727  if (mode_ != ASCII) {
728  __ bind(&done);
729  }
730  return true;
731  }
732 
733  case '*':
734  // Match any character.
735  return true;
736  // No custom implementation (yet): s(UC16), S(UC16).
737  default:
738  return false;
739  }
740 }
741 
742 
744  STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
745  if (!global()) {
746  __ Set(rax, FAILURE);
747  }
748  __ jmp(&exit_label_);
749 }
750 
751 
752 Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
753  Label return_rax;
754  // Finalize code - write the entry point code now we know how many
755  // registers we need.
756  // Entry code:
757  __ bind(&entry_label_);
758 
759  // Tell the system that we have a stack frame. Because the type is MANUAL, no
760  // is generated.
761  FrameScope scope(&masm_, StackFrame::MANUAL);
762 
763  // Actually emit code to start a new stack frame.
764  __ push(rbp);
765  __ movq(rbp, rsp);
766  // Save parameters and callee-save registers. Order here should correspond
767  // to order of kBackup_ebx etc.
768 #ifdef _WIN64
769  // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
770  // Store register parameters in pre-allocated stack slots,
771  __ movq(Operand(rbp, kInputString), rcx);
772  __ movq(Operand(rbp, kStartIndex), rdx); // Passed as int32 in edx.
773  __ movq(Operand(rbp, kInputStart), r8);
774  __ movq(Operand(rbp, kInputEnd), r9);
775  // Callee-save on Win64.
776  __ push(rsi);
777  __ push(rdi);
778  __ push(rbx);
779 #else
780  // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
781  // Push register parameters on stack for reference.
782  ASSERT_EQ(kInputString, -1 * kPointerSize);
783  ASSERT_EQ(kStartIndex, -2 * kPointerSize);
784  ASSERT_EQ(kInputStart, -3 * kPointerSize);
785  ASSERT_EQ(kInputEnd, -4 * kPointerSize);
786  ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
787  ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize);
788  __ push(rdi);
789  __ push(rsi);
790  __ push(rdx);
791  __ push(rcx);
792  __ push(r8);
793  __ push(r9);
794 
795  __ push(rbx); // Callee-save
796 #endif
797 
798  __ push(Immediate(0)); // Number of successful matches in a global regexp.
799  __ push(Immediate(0)); // Make room for "input start - 1" constant.
800 
801  // Check if we have space on the stack for registers.
802  Label stack_limit_hit;
803  Label stack_ok;
804 
805  ExternalReference stack_limit =
806  ExternalReference::address_of_stack_limit(masm_.isolate());
807  __ movq(rcx, rsp);
808  __ movq(kScratchRegister, stack_limit);
809  __ subq(rcx, Operand(kScratchRegister, 0));
810  // Handle it if the stack pointer is already below the stack limit.
811  __ j(below_equal, &stack_limit_hit);
812  // Check if there is room for the variable number of registers above
813  // the stack limit.
814  __ cmpq(rcx, Immediate(num_registers_ * kPointerSize));
815  __ j(above_equal, &stack_ok);
816  // Exit with OutOfMemory exception. There is not enough space on the stack
817  // for our working registers.
818  __ Set(rax, EXCEPTION);
819  __ jmp(&return_rax);
820 
821  __ bind(&stack_limit_hit);
822  __ Move(code_object_pointer(), masm_.CodeObject());
823  CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
824  __ testq(rax, rax);
825  // If returned value is non-zero, we exit with the returned value as result.
826  __ j(not_zero, &return_rax);
827 
828  __ bind(&stack_ok);
829 
830  // Allocate space on stack for registers.
831  __ subq(rsp, Immediate(num_registers_ * kPointerSize));
832  // Load string length.
833  __ movq(rsi, Operand(rbp, kInputEnd));
834  // Load input position.
835  __ movq(rdi, Operand(rbp, kInputStart));
836  // Set up rdi to be negative offset from string end.
837  __ subq(rdi, rsi);
838  // Set rax to address of char before start of the string
839  // (effectively string position -1).
840  __ movq(rbx, Operand(rbp, kStartIndex));
841  __ neg(rbx);
842  if (mode_ == UC16) {
843  __ lea(rax, Operand(rdi, rbx, times_2, -char_size()));
844  } else {
845  __ lea(rax, Operand(rdi, rbx, times_1, -char_size()));
846  }
847  // Store this value in a local variable, for use when clearing
848  // position registers.
849  __ movq(Operand(rbp, kInputStartMinusOne), rax);
850 
851 #ifdef WIN32
852  // Ensure that we have written to each stack page, in order. Skipping a page
853  // on Windows can cause segmentation faults. Assuming page size is 4k.
854  const int kPageSize = 4096;
855  const int kRegistersPerPage = kPageSize / kPointerSize;
856  for (int i = num_saved_registers_ + kRegistersPerPage - 1;
857  i < num_registers_;
858  i += kRegistersPerPage) {
859  __ movq(register_location(i), rax); // One write every page.
860  }
861 #endif // WIN32
862 
863  // Initialize code object pointer.
864  __ Move(code_object_pointer(), masm_.CodeObject());
865 
866  Label load_char_start_regexp, start_regexp;
867  // Load newline if index is at start, previous character otherwise.
868  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
869  __ j(not_equal, &load_char_start_regexp, Label::kNear);
870  __ Set(current_character(), '\n');
871  __ jmp(&start_regexp, Label::kNear);
872 
873  // Global regexp restarts matching here.
874  __ bind(&load_char_start_regexp);
875  // Load previous char as initial value of current character register.
876  LoadCurrentCharacterUnchecked(-1, 1);
877  __ bind(&start_regexp);
878 
879  // Initialize on-stack registers.
880  if (num_saved_registers_ > 0) {
881  // Fill saved registers with initial value = start offset - 1
882  // Fill in stack push order, to avoid accessing across an unwritten
883  // page (a problem on Windows).
884  if (num_saved_registers_ > 8) {
885  __ Set(rcx, kRegisterZero);
886  Label init_loop;
887  __ bind(&init_loop);
888  __ movq(Operand(rbp, rcx, times_1, 0), rax);
889  __ subq(rcx, Immediate(kPointerSize));
890  __ cmpq(rcx,
891  Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
892  __ j(greater, &init_loop);
893  } else { // Unroll the loop.
894  for (int i = 0; i < num_saved_registers_; i++) {
895  __ movq(register_location(i), rax);
896  }
897  }
898  }
899 
900  // Initialize backtrack stack pointer.
901  __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
902 
903  __ jmp(&start_label_);
904 
905  // Exit code:
906  if (success_label_.is_linked()) {
907  // Save captures when successful.
908  __ bind(&success_label_);
909  if (num_saved_registers_ > 0) {
910  // copy captures to output
911  __ movq(rdx, Operand(rbp, kStartIndex));
912  __ movq(rbx, Operand(rbp, kRegisterOutput));
913  __ movq(rcx, Operand(rbp, kInputEnd));
914  __ subq(rcx, Operand(rbp, kInputStart));
915  if (mode_ == UC16) {
916  __ lea(rcx, Operand(rcx, rdx, times_2, 0));
917  } else {
918  __ addq(rcx, rdx);
919  }
920  for (int i = 0; i < num_saved_registers_; i++) {
921  __ movq(rax, register_location(i));
922  if (i == 0 && global_with_zero_length_check()) {
923  // Keep capture start in rdx for the zero-length check later.
924  __ movq(rdx, rax);
925  }
926  __ addq(rax, rcx); // Convert to index from start, not end.
927  if (mode_ == UC16) {
928  __ sar(rax, Immediate(1)); // Convert byte index to character index.
929  }
930  __ movl(Operand(rbx, i * kIntSize), rax);
931  }
932  }
933 
934  if (global()) {
935  // Restart matching if the regular expression is flagged as global.
936  // Increment success counter.
937  __ incq(Operand(rbp, kSuccessfulCaptures));
938  // Capture results have been stored, so the number of remaining global
939  // output registers is reduced by the number of stored captures.
940  __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters));
941  __ subq(rcx, Immediate(num_saved_registers_));
942  // Check whether we have enough room for another set of capture results.
943  __ cmpq(rcx, Immediate(num_saved_registers_));
944  __ j(less, &exit_label_);
945 
946  __ movq(Operand(rbp, kNumOutputRegisters), rcx);
947  // Advance the location for output.
948  __ addq(Operand(rbp, kRegisterOutput),
949  Immediate(num_saved_registers_ * kIntSize));
950 
951  // Prepare rax to initialize registers with its value in the next run.
952  __ movq(rax, Operand(rbp, kInputStartMinusOne));
953 
954  if (global_with_zero_length_check()) {
955  // Special case for zero-length matches.
956  // rdx: capture start index
957  __ cmpq(rdi, rdx);
958  // Not a zero-length match, restart.
959  __ j(not_equal, &load_char_start_regexp);
960  // rdi (offset from the end) is zero if we already reached the end.
961  __ testq(rdi, rdi);
962  __ j(zero, &exit_label_, Label::kNear);
963  // Advance current position after a zero-length match.
964  if (mode_ == UC16) {
965  __ addq(rdi, Immediate(2));
966  } else {
967  __ incq(rdi);
968  }
969  }
970 
971  __ jmp(&load_char_start_regexp);
972  } else {
973  __ movq(rax, Immediate(SUCCESS));
974  }
975  }
976 
977  __ bind(&exit_label_);
978  if (global()) {
979  // Return the number of successful captures.
980  __ movq(rax, Operand(rbp, kSuccessfulCaptures));
981  }
982 
983  __ bind(&return_rax);
984 #ifdef _WIN64
985  // Restore callee save registers.
986  __ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
987  __ pop(rbx);
988  __ pop(rdi);
989  __ pop(rsi);
990  // Stack now at rbp.
991 #else
992  // Restore callee save register.
993  __ movq(rbx, Operand(rbp, kBackup_rbx));
994  // Skip rsp to rbp.
995  __ movq(rsp, rbp);
996 #endif
997  // Exit function frame, restore previous one.
998  __ pop(rbp);
999  __ ret(0);
1000 
1001  // Backtrack code (branch target for conditional backtracks).
1002  if (backtrack_label_.is_linked()) {
1003  __ bind(&backtrack_label_);
1004  Backtrack();
1005  }
1006 
1007  Label exit_with_exception;
1008 
1009  // Preempt-code
1010  if (check_preempt_label_.is_linked()) {
1011  SafeCallTarget(&check_preempt_label_);
1012 
1013  __ push(backtrack_stackpointer());
1014  __ push(rdi);
1015 
1016  CallCheckStackGuardState();
1017  __ testq(rax, rax);
1018  // If returning non-zero, we should end execution with the given
1019  // result as return value.
1020  __ j(not_zero, &return_rax);
1021 
1022  // Restore registers.
1023  __ Move(code_object_pointer(), masm_.CodeObject());
1024  __ pop(rdi);
1025  __ pop(backtrack_stackpointer());
1026  // String might have moved: Reload esi from frame.
1027  __ movq(rsi, Operand(rbp, kInputEnd));
1028  SafeReturn();
1029  }
1030 
1031  // Backtrack stack overflow code.
1032  if (stack_overflow_label_.is_linked()) {
1033  SafeCallTarget(&stack_overflow_label_);
1034  // Reached if the backtrack-stack limit has been hit.
1035 
1036  Label grow_failed;
1037  // Save registers before calling C function
1038 #ifndef _WIN64
1039  // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
1040  __ push(rsi);
1041  __ push(rdi);
1042 #endif
1043 
1044  // Call GrowStack(backtrack_stackpointer())
1045  static const int num_arguments = 3;
1046  __ PrepareCallCFunction(num_arguments);
1047 #ifdef _WIN64
1048  // Microsoft passes parameters in rcx, rdx, r8.
1049  // First argument, backtrack stackpointer, is already in rcx.
1050  __ lea(rdx, Operand(rbp, kStackHighEnd)); // Second argument
1051  __ LoadAddress(r8, ExternalReference::isolate_address());
1052 #else
1053  // AMD64 ABI passes parameters in rdi, rsi, rdx.
1054  __ movq(rdi, backtrack_stackpointer()); // First argument.
1055  __ lea(rsi, Operand(rbp, kStackHighEnd)); // Second argument.
1056  __ LoadAddress(rdx, ExternalReference::isolate_address());
1057 #endif
1058  ExternalReference grow_stack =
1059  ExternalReference::re_grow_stack(masm_.isolate());
1060  __ CallCFunction(grow_stack, num_arguments);
1061  // If return NULL, we have failed to grow the stack, and
1062  // must exit with a stack-overflow exception.
1063  __ testq(rax, rax);
1064  __ j(equal, &exit_with_exception);
1065  // Otherwise use return value as new stack pointer.
1066  __ movq(backtrack_stackpointer(), rax);
1067  // Restore saved registers and continue.
1068  __ Move(code_object_pointer(), masm_.CodeObject());
1069 #ifndef _WIN64
1070  __ pop(rdi);
1071  __ pop(rsi);
1072 #endif
1073  SafeReturn();
1074  }
1075 
1076  if (exit_with_exception.is_linked()) {
1077  // If any of the code above needed to exit with an exception.
1078  __ bind(&exit_with_exception);
1079  // Exit with Result EXCEPTION(-1) to signal thrown exception.
1080  __ Set(rax, EXCEPTION);
1081  __ jmp(&return_rax);
1082  }
1083 
1084  FixupCodeRelativePositions();
1085 
1086  CodeDesc code_desc;
1087  masm_.GetCode(&code_desc);
1088  Isolate* isolate = ISOLATE;
1089  Handle<Code> code = isolate->factory()->NewCode(
1090  code_desc, Code::ComputeFlags(Code::REGEXP),
1091  masm_.CodeObject());
1092  PROFILE(isolate, RegExpCodeCreateEvent(*code, *source));
1093  return Handle<HeapObject>::cast(code);
1094 }
1095 
1096 
1097 void RegExpMacroAssemblerX64::GoTo(Label* to) {
1098  BranchOrBacktrack(no_condition, to);
1099 }
1100 
1101 
1102 void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
1103  int comparand,
1104  Label* if_ge) {
1105  __ cmpq(register_location(reg), Immediate(comparand));
1106  BranchOrBacktrack(greater_equal, if_ge);
1107 }
1108 
1109 
1110 void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
1111  int comparand,
1112  Label* if_lt) {
1113  __ cmpq(register_location(reg), Immediate(comparand));
1114  BranchOrBacktrack(less, if_lt);
1115 }
1116 
1117 
1118 void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
1119  Label* if_eq) {
1120  __ cmpq(rdi, register_location(reg));
1121  BranchOrBacktrack(equal, if_eq);
1122 }
1123 
1124 
1125 RegExpMacroAssembler::IrregexpImplementation
1126  RegExpMacroAssemblerX64::Implementation() {
1127  return kX64Implementation;
1128 }
1129 
1130 
1131 void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
1132  Label* on_end_of_input,
1133  bool check_bounds,
1134  int characters) {
1135  ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1136  ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1137  if (check_bounds) {
1138  CheckPosition(cp_offset + characters - 1, on_end_of_input);
1139  }
1140  LoadCurrentCharacterUnchecked(cp_offset, characters);
1141 }
1142 
1143 
1144 void RegExpMacroAssemblerX64::PopCurrentPosition() {
1145  Pop(rdi);
1146 }
1147 
1148 
1149 void RegExpMacroAssemblerX64::PopRegister(int register_index) {
1150  Pop(rax);
1151  __ movq(register_location(register_index), rax);
1152 }
1153 
1154 
1155 void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
1156  Push(label);
1157  CheckStackLimit();
1158 }
1159 
1160 
1161 void RegExpMacroAssemblerX64::PushCurrentPosition() {
1162  Push(rdi);
1163 }
1164 
1165 
1166 void RegExpMacroAssemblerX64::PushRegister(int register_index,
1167  StackCheckFlag check_stack_limit) {
1168  __ movq(rax, register_location(register_index));
1169  Push(rax);
1170  if (check_stack_limit) CheckStackLimit();
1171 }
1172 
1173 
1174 void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
1175  __ movq(rdi, register_location(reg));
1176 }
1177 
1178 
1179 void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
1180  __ movq(backtrack_stackpointer(), register_location(reg));
1181  __ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
1182 }
1183 
1184 
1185 void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) {
1186  Label after_position;
1187  __ cmpq(rdi, Immediate(-by * char_size()));
1188  __ j(greater_equal, &after_position, Label::kNear);
1189  __ movq(rdi, Immediate(-by * char_size()));
1190  // On RegExp code entry (where this operation is used), the character before
1191  // the current position is expected to be already loaded.
1192  // We have advanced the position, so it's safe to read backwards.
1193  LoadCurrentCharacterUnchecked(-1, 1);
1194  __ bind(&after_position);
1195 }
1196 
1197 
1198 void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
1199  ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
1200  __ movq(register_location(register_index), Immediate(to));
1201 }
1202 
1203 
1204 bool RegExpMacroAssemblerX64::Succeed() {
1205  __ jmp(&success_label_);
1206  return global();
1207 }
1208 
1209 
1210 void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
1211  int cp_offset) {
1212  if (cp_offset == 0) {
1213  __ movq(register_location(reg), rdi);
1214  } else {
1215  __ lea(rax, Operand(rdi, cp_offset * char_size()));
1216  __ movq(register_location(reg), rax);
1217  }
1218 }
1219 
1220 
1221 void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
1222  ASSERT(reg_from <= reg_to);
1223  __ movq(rax, Operand(rbp, kInputStartMinusOne));
1224  for (int reg = reg_from; reg <= reg_to; reg++) {
1225  __ movq(register_location(reg), rax);
1226  }
1227 }
1228 
1229 
1230 void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
1231  __ movq(rax, backtrack_stackpointer());
1232  __ subq(rax, Operand(rbp, kStackHighEnd));
1233  __ movq(register_location(reg), rax);
1234 }
1235 
1236 
1237 // Private methods:
1238 
1239 void RegExpMacroAssemblerX64::CallCheckStackGuardState() {
1240  // This function call preserves no register values. Caller should
1241  // store anything volatile in a C call or overwritten by this function.
1242  static const int num_arguments = 3;
1243  __ PrepareCallCFunction(num_arguments);
1244 #ifdef _WIN64
1245  // Second argument: Code* of self. (Do this before overwriting r8).
1246  __ movq(rdx, code_object_pointer());
1247  // Third argument: RegExp code frame pointer.
1248  __ movq(r8, rbp);
1249  // First argument: Next address on the stack (will be address of
1250  // return address).
1251  __ lea(rcx, Operand(rsp, -kPointerSize));
1252 #else
1253  // Third argument: RegExp code frame pointer.
1254  __ movq(rdx, rbp);
1255  // Second argument: Code* of self.
1256  __ movq(rsi, code_object_pointer());
1257  // First argument: Next address on the stack (will be address of
1258  // return address).
1259  __ lea(rdi, Operand(rsp, -kPointerSize));
1260 #endif
1261  ExternalReference stack_check =
1262  ExternalReference::re_check_stack_guard_state(masm_.isolate());
1263  __ CallCFunction(stack_check, num_arguments);
1264 }
1265 
1266 
1267 // Helper function for reading a value out of a stack frame.
1268 template <typename T>
1269 static T& frame_entry(Address re_frame, int frame_offset) {
1270  return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
1271 }
1272 
1273 
1274 int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
1275  Code* re_code,
1276  Address re_frame) {
1277  Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate);
1278  ASSERT(isolate == Isolate::Current());
1279  if (isolate->stack_guard()->IsStackOverflow()) {
1280  isolate->StackOverflow();
1281  return EXCEPTION;
1282  }
1283 
1284  // If not real stack overflow the stack guard was used to interrupt
1285  // execution for another purpose.
1286 
1287  // If this is a direct call from JavaScript retry the RegExp forcing the call
1288  // through the runtime system. Currently the direct call cannot handle a GC.
1289  if (frame_entry<int>(re_frame, kDirectCall) == 1) {
1290  return RETRY;
1291  }
1292 
1293  // Prepare for possible GC.
1294  HandleScope handles(isolate);
1295  Handle<Code> code_handle(re_code);
1296 
1297  Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
1298 
1299  // Current string.
1300  bool is_ascii = subject->IsAsciiRepresentationUnderneath();
1301 
1302  ASSERT(re_code->instruction_start() <= *return_address);
1303  ASSERT(*return_address <=
1304  re_code->instruction_start() + re_code->instruction_size());
1305 
1306  MaybeObject* result = Execution::HandleStackGuardInterrupt(isolate);
1307 
1308  if (*code_handle != re_code) { // Return address no longer valid
1309  intptr_t delta = code_handle->address() - re_code->address();
1310  // Overwrite the return address on the stack.
1311  *return_address += delta;
1312  }
1313 
1314  if (result->IsException()) {
1315  return EXCEPTION;
1316  }
1317 
1318  Handle<String> subject_tmp = subject;
1319  int slice_offset = 0;
1320 
1321  // Extract the underlying string and the slice offset.
1322  if (StringShape(*subject_tmp).IsCons()) {
1323  subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
1324  } else if (StringShape(*subject_tmp).IsSliced()) {
1325  SlicedString* slice = SlicedString::cast(*subject_tmp);
1326  subject_tmp = Handle<String>(slice->parent());
1327  slice_offset = slice->offset();
1328  }
1329 
1330  // String might have changed.
1331  if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
1332  // If we changed between an ASCII and an UC16 string, the specialized
1333  // code cannot be used, and we need to restart regexp matching from
1334  // scratch (including, potentially, compiling a new version of the code).
1335  return RETRY;
1336  }
1337 
1338  // Otherwise, the content of the string might have moved. It must still
1339  // be a sequential or external string with the same content.
1340  // Update the start and end pointers in the stack frame to the current
1341  // location (whether it has actually moved or not).
1342  ASSERT(StringShape(*subject_tmp).IsSequential() ||
1343  StringShape(*subject_tmp).IsExternal());
1344 
1345  // The original start address of the characters to match.
1346  const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
1347 
1348  // Find the current start address of the same character at the current string
1349  // position.
1350  int start_index = frame_entry<int>(re_frame, kStartIndex);
1351  const byte* new_address = StringCharacterPosition(*subject_tmp,
1352  start_index + slice_offset);
1353 
1354  if (start_address != new_address) {
1355  // If there is a difference, update the object pointer and start and end
1356  // addresses in the RegExp stack frame to match the new value.
1357  const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
1358  int byte_length = static_cast<int>(end_address - start_address);
1359  frame_entry<const String*>(re_frame, kInputString) = *subject;
1360  frame_entry<const byte*>(re_frame, kInputStart) = new_address;
1361  frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
1362  } else if (frame_entry<const String*>(re_frame, kInputString) != *subject) {
1363  // Subject string might have been a ConsString that underwent
1364  // short-circuiting during GC. That will not change start_address but
1365  // will change pointer inside the subject handle.
1366  frame_entry<const String*>(re_frame, kInputString) = *subject;
1367  }
1368 
1369  return 0;
1370 }
1371 
1372 
1373 Operand RegExpMacroAssemblerX64::register_location(int register_index) {
1374  ASSERT(register_index < (1<<30));
1375  if (num_registers_ <= register_index) {
1376  num_registers_ = register_index + 1;
1377  }
1378  return Operand(rbp, kRegisterZero - register_index * kPointerSize);
1379 }
1380 
1381 
1382 void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
1383  Label* on_outside_input) {
1384  __ cmpl(rdi, Immediate(-cp_offset * char_size()));
1385  BranchOrBacktrack(greater_equal, on_outside_input);
1386 }
1387 
1388 
1389 void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
1390  Label* to) {
1391  if (condition < 0) { // No condition
1392  if (to == NULL) {
1393  Backtrack();
1394  return;
1395  }
1396  __ jmp(to);
1397  return;
1398  }
1399  if (to == NULL) {
1400  __ j(condition, &backtrack_label_);
1401  return;
1402  }
1403  __ j(condition, to);
1404 }
1405 
1406 
1407 void RegExpMacroAssemblerX64::SafeCall(Label* to) {
1408  __ call(to);
1409 }
1410 
1411 
1412 void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
1413  __ bind(label);
1414  __ subq(Operand(rsp, 0), code_object_pointer());
1415 }
1416 
1417 
1418 void RegExpMacroAssemblerX64::SafeReturn() {
1419  __ addq(Operand(rsp, 0), code_object_pointer());
1420  __ ret(0);
1421 }
1422 
1423 
1424 void RegExpMacroAssemblerX64::Push(Register source) {
1425  ASSERT(!source.is(backtrack_stackpointer()));
1426  // Notice: This updates flags, unlike normal Push.
1427  __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1428  __ movl(Operand(backtrack_stackpointer(), 0), source);
1429 }
1430 
1431 
1432 void RegExpMacroAssemblerX64::Push(Immediate value) {
1433  // Notice: This updates flags, unlike normal Push.
1434  __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1435  __ movl(Operand(backtrack_stackpointer(), 0), value);
1436 }
1437 
1438 
1439 void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
1440  for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) {
1441  int position = code_relative_fixup_positions_[i];
1442  // The position succeeds a relative label offset from position.
1443  // Patch the relative offset to be relative to the Code object pointer
1444  // instead.
1445  int patch_position = position - kIntSize;
1446  int offset = masm_.long_at(patch_position);
1447  masm_.long_at_put(patch_position,
1448  offset
1449  + position
1450  + Code::kHeaderSize
1451  - kHeapObjectTag);
1452  }
1453  code_relative_fixup_positions_.Clear();
1454 }
1455 
1456 
1457 void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
1458  __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1459  __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
1460  MarkPositionForCodeRelativeFixup();
1461 }
1462 
1463 
1464 void RegExpMacroAssemblerX64::Pop(Register target) {
1465  ASSERT(!target.is(backtrack_stackpointer()));
1466  __ movsxlq(target, Operand(backtrack_stackpointer(), 0));
1467  // Notice: This updates flags, unlike normal Pop.
1468  __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1469 }
1470 
1471 
1472 void RegExpMacroAssemblerX64::Drop() {
1473  __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1474 }
1475 
1476 
1477 void RegExpMacroAssemblerX64::CheckPreemption() {
1478  // Check for preemption.
1479  Label no_preempt;
1480  ExternalReference stack_limit =
1481  ExternalReference::address_of_stack_limit(masm_.isolate());
1482  __ load_rax(stack_limit);
1483  __ cmpq(rsp, rax);
1484  __ j(above, &no_preempt);
1485 
1486  SafeCall(&check_preempt_label_);
1487 
1488  __ bind(&no_preempt);
1489 }
1490 
1491 
1492 void RegExpMacroAssemblerX64::CheckStackLimit() {
1493  Label no_stack_overflow;
1494  ExternalReference stack_limit =
1495  ExternalReference::address_of_regexp_stack_limit(masm_.isolate());
1496  __ load_rax(stack_limit);
1497  __ cmpq(backtrack_stackpointer(), rax);
1498  __ j(above, &no_stack_overflow);
1499 
1500  SafeCall(&stack_overflow_label_);
1501 
1502  __ bind(&no_stack_overflow);
1503 }
1504 
1505 
1506 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
1507  int characters) {
1508  if (mode_ == ASCII) {
1509  if (characters == 4) {
1510  __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1511  } else if (characters == 2) {
1512  __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1513  } else {
1514  ASSERT(characters == 1);
1515  __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1516  }
1517  } else {
1518  ASSERT(mode_ == UC16);
1519  if (characters == 2) {
1520  __ movl(current_character(),
1521  Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
1522  } else {
1523  ASSERT(characters == 1);
1524  __ movzxwl(current_character(),
1525  Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
1526  }
1527  }
1528 }
1529 
1530 #undef __
1531 
1532 #endif // V8_INTERPRETED_REGEXP
1533 
1534 }} // namespace v8::internal
1535 
1536 #endif // V8_TARGET_ARCH_X64
byte * Address
Definition: globals.h:172
const Register rdx
unsigned char byte
Definition: disasm.h:33
v8::Handle< v8::Value > Fail(const v8::Arguments &args)
const Register r11
const Register rbp
const Register rsi
#define ASSERT(condition)
Definition: checks.h:270
#define PROFILE(isolate, Call)
Definition: cpu-profiler.h:190
const int kIntSize
Definition: globals.h:231
RegExpMacroAssemblerX64(Mode mode, int registers_to_save, Zone *zone)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
const Register r9
const int kPointerSize
Definition: globals.h:234
Operand FieldOperand(Register object, int offset)
const int kHeapObjectTag
Definition: v8.h:3848
const Register rbx
const Register rsp
#define __
const Register rax
const Register rdi
#define T(name, string, precedence)
Definition: token.cc:48
#define ISOLATE
Definition: isolate.h:1410
const Register kScratchRegister
uint16_t uc16
Definition: globals.h:273
const Register r8
const Register rcx
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
Definition: flags.cc:274
#define STATIC_ASSERT(test)
Definition: checks.h:283
const uc32 kMaxAsciiCharCode
Definition: globals.h:277
FlagType type() const
Definition: flags.cc:1358