v8  3.14.5(node0.10.28)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
test-regexp.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 
29 #include <stdlib.h>
30 
31 #include "v8.h"
32 
33 #include "ast.h"
34 #include "char-predicates-inl.h"
35 #include "cctest.h"
36 #include "jsregexp.h"
37 #include "parser.h"
38 #include "regexp-macro-assembler.h"
40 #include "string-stream.h"
41 #include "zone-inl.h"
42 #ifdef V8_INTERPRETED_REGEXP
43 #include "interpreter-irregexp.h"
44 #else // V8_INTERPRETED_REGEXP
45 #include "macro-assembler.h"
46 #include "code.h"
47 #ifdef V8_TARGET_ARCH_ARM
48 #include "arm/assembler-arm.h"
51 #endif
52 #ifdef V8_TARGET_ARCH_MIPS
53 #include "mips/assembler-mips.h"
56 #endif
57 #ifdef V8_TARGET_ARCH_X64
58 #include "x64/assembler-x64.h"
61 #endif
62 #ifdef V8_TARGET_ARCH_IA32
63 #include "ia32/assembler-ia32.h"
66 #endif
67 #endif // V8_INTERPRETED_REGEXP
68 
69 using namespace v8::internal;
70 
71 
72 static bool CheckParse(const char* input) {
74  v8::HandleScope scope;
75  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
76  FlatStringReader reader(Isolate::Current(), CStrVector(input));
77  RegExpCompileData result;
79  &reader, false, &result, Isolate::Current()->runtime_zone());
80 }
81 
82 
83 static SmartArrayPointer<const char> Parse(const char* input) {
85  v8::HandleScope scope;
86  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
87  FlatStringReader reader(Isolate::Current(), CStrVector(input));
88  RegExpCompileData result;
90  &reader, false, &result, Isolate::Current()->runtime_zone()));
91  CHECK(result.tree != NULL);
92  CHECK(result.error.is_null());
94  result.tree->ToString(Isolate::Current()->runtime_zone());
95  return output;
96 }
97 
98 static bool CheckSimple(const char* input) {
100  v8::HandleScope scope;
101  unibrow::Utf8InputBuffer<> buffer(input, StrLength(input));
102  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
103  FlatStringReader reader(Isolate::Current(), CStrVector(input));
104  RegExpCompileData result;
106  &reader, false, &result, Isolate::Current()->runtime_zone()));
107  CHECK(result.tree != NULL);
108  CHECK(result.error.is_null());
109  return result.simple;
110 }
111 
112 struct MinMaxPair {
115 };
116 
117 static MinMaxPair CheckMinMaxMatch(const char* input) {
119  v8::HandleScope scope;
120  unibrow::Utf8InputBuffer<> buffer(input, StrLength(input));
121  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
122  FlatStringReader reader(Isolate::Current(), CStrVector(input));
123  RegExpCompileData result;
125  &reader, false, &result, Isolate::Current()->runtime_zone()));
126  CHECK(result.tree != NULL);
127  CHECK(result.error.is_null());
128  int min_match = result.tree->min_match();
129  int max_match = result.tree->max_match();
130  MinMaxPair pair = { min_match, max_match };
131  return pair;
132 }
133 
134 
135 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
136 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
137 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
138 #define CHECK_MIN_MAX(input, min, max) \
139  { MinMaxPair min_max = CheckMinMaxMatch(input); \
140  CHECK_EQ(min, min_max.min_match); \
141  CHECK_EQ(max, min_max.max_match); \
142  }
143 
146 
147  CHECK_PARSE_ERROR("?");
148 
149  CHECK_PARSE_EQ("abc", "'abc'");
150  CHECK_PARSE_EQ("", "%");
151  CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
152  CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
153  CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
154  CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
155  CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
156  CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
157  CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
158  CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
159  CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
160  CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
161  CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
162  CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
163  CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
164  CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
165  CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
166  CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
167  CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
168  CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
169  CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
170  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
171  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
172  CHECK_PARSE_EQ("(?:foo)", "'foo'");
173  CHECK_PARSE_EQ("(?: foo )", "' foo '");
174  CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
175  CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
176  CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
177  CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
178  CHECK_PARSE_EQ("()", "(^ %)");
179  CHECK_PARSE_EQ("(?=)", "(-> + %)");
180  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
181  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
182  CHECK_PARSE_EQ("[x]", "[x]");
183  CHECK_PARSE_EQ("[xyz]", "[x y z]");
184  CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
185  CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
186  CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
187  CHECK_PARSE_EQ("]", "']'");
188  CHECK_PARSE_EQ("}", "'}'");
189  CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
190  CHECK_PARSE_EQ("[\\d]", "[0-9]");
191  CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
192  CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
193  CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
194  CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
195  // Control character outside character class.
196  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
197  "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
198  CHECK_PARSE_EQ("\\c!", "'\\c!'");
199  CHECK_PARSE_EQ("\\c_", "'\\c_'");
200  CHECK_PARSE_EQ("\\c~", "'\\c~'");
201  CHECK_PARSE_EQ("\\c1", "'\\c1'");
202  // Control character inside character class.
203  CHECK_PARSE_EQ("[\\c!]", "[\\ c !]");
204  CHECK_PARSE_EQ("[\\c_]", "[\\x1f]");
205  CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]");
206  CHECK_PARSE_EQ("[\\ca]", "[\\x01]");
207  CHECK_PARSE_EQ("[\\cz]", "[\\x1a]");
208  CHECK_PARSE_EQ("[\\cA]", "[\\x01]");
209  CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]");
210  CHECK_PARSE_EQ("[\\c1]", "[\\x11]");
211 
212  CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
213  CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
214  CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
215  CHECK_PARSE_EQ("\\0", "'\\x00'");
216  CHECK_PARSE_EQ("\\8", "'8'");
217  CHECK_PARSE_EQ("\\9", "'9'");
218  CHECK_PARSE_EQ("\\11", "'\\x09'");
219  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
220  CHECK_PARSE_EQ("\\011", "'\\x09'");
221  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
222  CHECK_PARSE_EQ("\\118", "'\\x098'");
223  CHECK_PARSE_EQ("\\111", "'I'");
224  CHECK_PARSE_EQ("\\1111", "'I1'");
225  CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
226  CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
227  CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
228  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
229  CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
230  " (# 0 - g (<- 1)))");
231  CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
232  " (# 0 - g (<- 2)))");
233  CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
234  " (# 0 - g (<- 3)))");
235  CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
236  " (# 0 - g '\\x04'))");
237  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
238  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
239  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
240  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
241  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
242  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
243  CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
244  CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
245  CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
246  CHECK_PARSE_EQ("(?=a)?a", "'a'");
247  CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
248  CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
249  CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
250  CHECK_PARSE_EQ("(?!a)?a", "'a'");
251  CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
252  CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
253  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
254  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
255  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
256  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
257  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
258  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
259  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
260  CHECK_PARSE_EQ("[\\111]", "[I]");
261  CHECK_PARSE_EQ("[\\1111]", "[I 1]");
262  CHECK_PARSE_EQ("\\x34", "'\x34'");
263  CHECK_PARSE_EQ("\\x60", "'\x60'");
264  CHECK_PARSE_EQ("\\x3z", "'x3z'");
265  CHECK_PARSE_EQ("\\c", "'\\c'");
266  CHECK_PARSE_EQ("\\u0034", "'\x34'");
267  CHECK_PARSE_EQ("\\u003z", "'u003z'");
268  CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
269 
270  CHECK_SIMPLE("", false);
271  CHECK_SIMPLE("a", true);
272  CHECK_SIMPLE("a|b", false);
273  CHECK_SIMPLE("a\\n", false);
274  CHECK_SIMPLE("^a", false);
275  CHECK_SIMPLE("a$", false);
276  CHECK_SIMPLE("a\\b!", false);
277  CHECK_SIMPLE("a\\Bb", false);
278  CHECK_SIMPLE("a*", false);
279  CHECK_SIMPLE("a*?", false);
280  CHECK_SIMPLE("a?", false);
281  CHECK_SIMPLE("a??", false);
282  CHECK_SIMPLE("a{0,1}?", false);
283  CHECK_SIMPLE("a{1,1}?", false);
284  CHECK_SIMPLE("a{1,2}?", false);
285  CHECK_SIMPLE("a+?", false);
286  CHECK_SIMPLE("(a)", false);
287  CHECK_SIMPLE("(a)\\1", false);
288  CHECK_SIMPLE("(\\1a)", false);
289  CHECK_SIMPLE("\\1(a)", false);
290  CHECK_SIMPLE("a\\s", false);
291  CHECK_SIMPLE("a\\S", false);
292  CHECK_SIMPLE("a\\d", false);
293  CHECK_SIMPLE("a\\D", false);
294  CHECK_SIMPLE("a\\w", false);
295  CHECK_SIMPLE("a\\W", false);
296  CHECK_SIMPLE("a.", false);
297  CHECK_SIMPLE("a\\q", false);
298  CHECK_SIMPLE("a[a]", false);
299  CHECK_SIMPLE("a[^a]", false);
300  CHECK_SIMPLE("a[a-z]", false);
301  CHECK_SIMPLE("a[\\q]", false);
302  CHECK_SIMPLE("a(?:b)", false);
303  CHECK_SIMPLE("a(?=b)", false);
304  CHECK_SIMPLE("a(?!b)", false);
305  CHECK_SIMPLE("\\x60", false);
306  CHECK_SIMPLE("\\u0060", false);
307  CHECK_SIMPLE("\\cA", false);
308  CHECK_SIMPLE("\\q", false);
309  CHECK_SIMPLE("\\1112", false);
310  CHECK_SIMPLE("\\0", false);
311  CHECK_SIMPLE("(a)\\1", false);
312  CHECK_SIMPLE("(?=a)?a", false);
313  CHECK_SIMPLE("(?!a)?a\\1", false);
314  CHECK_SIMPLE("(?:(?=a))a\\1", false);
315 
316  CHECK_PARSE_EQ("a{}", "'a{}'");
317  CHECK_PARSE_EQ("a{,}", "'a{,}'");
318  CHECK_PARSE_EQ("a{", "'a{'");
319  CHECK_PARSE_EQ("a{z}", "'a{z}'");
320  CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
321  CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
322  CHECK_PARSE_EQ("a{12,", "'a{12,'");
323  CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
324  CHECK_PARSE_EQ("{}", "'{}'");
325  CHECK_PARSE_EQ("{,}", "'{,}'");
326  CHECK_PARSE_EQ("{", "'{'");
327  CHECK_PARSE_EQ("{z}", "'{z}'");
328  CHECK_PARSE_EQ("{1z}", "'{1z}'");
329  CHECK_PARSE_EQ("{12z}", "'{12z}'");
330  CHECK_PARSE_EQ("{12,", "'{12,'");
331  CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
332 
333  CHECK_MIN_MAX("a", 1, 1);
334  CHECK_MIN_MAX("abc", 3, 3);
335  CHECK_MIN_MAX("a[bc]d", 3, 3);
336  CHECK_MIN_MAX("a|bc", 1, 2);
337  CHECK_MIN_MAX("ab|c", 1, 2);
338  CHECK_MIN_MAX("a||bc", 0, 2);
339  CHECK_MIN_MAX("|", 0, 0);
340  CHECK_MIN_MAX("(?:ab)", 2, 2);
341  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
342  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
343  CHECK_MIN_MAX("(ab)", 2, 2);
344  CHECK_MIN_MAX("(ab|cde)", 2, 3);
345  CHECK_MIN_MAX("(ab)\\1", 2, 4);
346  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
347  CHECK_MIN_MAX("(?:ab)?", 0, 2);
348  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
349  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
350  CHECK_MIN_MAX("a?", 0, 1);
353  CHECK_MIN_MAX("a??", 0, 1);
356  CHECK_MIN_MAX("(?:a?)?", 0, 1);
357  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
358  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
359  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
360  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
361  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
362  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
363  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
364  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
365  CHECK_MIN_MAX("a{0}", 0, 0);
366  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
367  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
372  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
373  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
374  CHECK_MIN_MAX("a\\bc", 2, 2);
375  CHECK_MIN_MAX("a\\Bc", 2, 2);
376  CHECK_MIN_MAX("a\\sc", 3, 3);
377  CHECK_MIN_MAX("a\\Sc", 3, 3);
378  CHECK_MIN_MAX("a(?=b)c", 2, 2);
379  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
380  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
381 }
382 
383 TEST(ParserRegression) {
384  CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
385  CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
386  CHECK_PARSE_EQ("{", "'{'");
387  CHECK_PARSE_EQ("a|", "(| 'a' %)");
388 }
389 
390 static void ExpectError(const char* input,
391  const char* expected) {
393  v8::HandleScope scope;
394  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
395  FlatStringReader reader(Isolate::Current(), CStrVector(input));
396  RegExpCompileData result;
398  &reader, false, &result, Isolate::Current()->runtime_zone()));
399  CHECK(result.tree == NULL);
400  CHECK(!result.error.is_null());
401  SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
402  CHECK_EQ(expected, *str);
403 }
404 
405 
406 TEST(Errors) {
408  const char* kEndBackslash = "\\ at end of pattern";
409  ExpectError("\\", kEndBackslash);
410  const char* kUnterminatedGroup = "Unterminated group";
411  ExpectError("(foo", kUnterminatedGroup);
412  const char* kInvalidGroup = "Invalid group";
413  ExpectError("(?", kInvalidGroup);
414  const char* kUnterminatedCharacterClass = "Unterminated character class";
415  ExpectError("[", kUnterminatedCharacterClass);
416  ExpectError("[a-", kUnterminatedCharacterClass);
417  const char* kNothingToRepeat = "Nothing to repeat";
418  ExpectError("*", kNothingToRepeat);
419  ExpectError("?", kNothingToRepeat);
420  ExpectError("+", kNothingToRepeat);
421  ExpectError("{1}", kNothingToRepeat);
422  ExpectError("{1,2}", kNothingToRepeat);
423  ExpectError("{1,}", kNothingToRepeat);
424 
425  // Check that we don't allow more than kMaxCapture captures
426  const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
427  const char* kTooManyCaptures = "Too many captures";
428  HeapStringAllocator allocator;
429  StringStream accumulator(&allocator);
430  for (int i = 0; i <= kMaxCaptures; i++) {
431  accumulator.Add("()");
432  }
433  SmartArrayPointer<const char> many_captures(accumulator.ToCString());
434  ExpectError(*many_captures, kTooManyCaptures);
435 }
436 
437 
438 static bool IsDigit(uc16 c) {
439  return ('0' <= c && c <= '9');
440 }
441 
442 
443 static bool NotDigit(uc16 c) {
444  return !IsDigit(c);
445 }
446 
447 
448 static bool IsWhiteSpace(uc16 c) {
449  switch (c) {
450  case 0x09:
451  case 0x0A:
452  case 0x0B:
453  case 0x0C:
454  case 0x0d:
455  case 0x20:
456  case 0xA0:
457  case 0x2028:
458  case 0x2029:
459  case 0xFEFF:
460  return true;
461  default:
462  return unibrow::Space::Is(c);
463  }
464 }
465 
466 
467 static bool NotWhiteSpace(uc16 c) {
468  return !IsWhiteSpace(c);
469 }
470 
471 
472 static bool NotWord(uc16 c) {
473  return !IsRegExpWord(c);
474 }
475 
476 
477 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
478  ZoneScope scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
479  Zone* zone = Isolate::Current()->runtime_zone();
480  ZoneList<CharacterRange>* ranges =
481  new(zone) ZoneList<CharacterRange>(2, zone);
482  CharacterRange::AddClassEscape(c, ranges, zone);
483  for (unsigned i = 0; i < (1 << 16); i++) {
484  bool in_class = false;
485  for (int j = 0; !in_class && j < ranges->length(); j++) {
486  CharacterRange& range = ranges->at(j);
487  in_class = (range.from() <= i && i <= range.to());
488  }
489  CHECK_EQ(pred(i), in_class);
490  }
491 }
492 
493 
494 TEST(CharacterClassEscapes) {
496  TestCharacterClassEscapes('.', IsRegExpNewline);
497  TestCharacterClassEscapes('d', IsDigit);
498  TestCharacterClassEscapes('D', NotDigit);
499  TestCharacterClassEscapes('s', IsWhiteSpace);
500  TestCharacterClassEscapes('S', NotWhiteSpace);
501  TestCharacterClassEscapes('w', IsRegExpWord);
502  TestCharacterClassEscapes('W', NotWord);
503 }
504 
505 
506 static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
508  Isolate* isolate = Isolate::Current();
509  FlatStringReader reader(isolate, CStrVector(input));
510  RegExpCompileData compile_data;
511  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
512  &compile_data,
513  isolate->runtime_zone()))
514  return NULL;
515  Handle<String> pattern = isolate->factory()->
516  NewStringFromUtf8(CStrVector(input));
517  Handle<String> sample_subject =
518  isolate->factory()->NewStringFromUtf8(CStrVector(""));
519  RegExpEngine::Compile(&compile_data,
520  false,
521  false,
522  multiline,
523  pattern,
524  sample_subject,
525  is_ascii,
526  isolate->runtime_zone());
527  return compile_data.node;
528 }
529 
530 
531 static void Execute(const char* input,
532  bool multiline,
533  bool is_ascii,
534  bool dot_output = false) {
535  v8::HandleScope scope;
536  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
537  RegExpNode* node = Compile(input, multiline, is_ascii);
538  USE(node);
539 #ifdef DEBUG
540  if (dot_output) {
541  RegExpEngine::DotPrint(input, node, false);
542  exit(0);
543  }
544 #endif // DEBUG
545 }
546 
547 
548 class TestConfig {
549  public:
550  typedef int Key;
551  typedef int Value;
552  static const int kNoKey;
553  static int NoValue() { return 0; }
554  static inline int Compare(int a, int b) {
555  if (a < b)
556  return -1;
557  else if (a > b)
558  return 1;
559  else
560  return 0;
561  }
562 };
563 
564 
565 const int TestConfig::kNoKey = 0;
566 
567 
568 static unsigned PseudoRandom(int i, int j) {
569  return ~(~((i * 781) ^ (j * 329)));
570 }
571 
572 
573 TEST(SplayTreeSimple) {
575  static const unsigned kLimit = 1000;
576  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
577  ZoneSplayTree<TestConfig> tree(Isolate::Current()->runtime_zone());
578  bool seen[kLimit];
579  for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
580 #define CHECK_MAPS_EQUAL() do { \
581  for (unsigned k = 0; k < kLimit; k++) \
582  CHECK_EQ(seen[k], tree.Find(k, &loc)); \
583  } while (false)
584  for (int i = 0; i < 50; i++) {
585  for (int j = 0; j < 50; j++) {
586  unsigned next = PseudoRandom(i, j) % kLimit;
587  if (seen[next]) {
588  // We've already seen this one. Check the value and remove
589  // it.
591  CHECK(tree.Find(next, &loc));
592  CHECK_EQ(next, loc.key());
593  CHECK_EQ(3 * next, loc.value());
594  tree.Remove(next);
595  seen[next] = false;
597  } else {
598  // Check that it wasn't there already and then add it.
600  CHECK(!tree.Find(next, &loc));
601  CHECK(tree.Insert(next, &loc));
602  CHECK_EQ(next, loc.key());
603  loc.set_value(3 * next);
604  seen[next] = true;
606  }
607  int val = PseudoRandom(j, i) % kLimit;
608  if (seen[val]) {
610  CHECK(tree.FindGreatestLessThan(val, &loc));
611  CHECK_EQ(loc.key(), val);
612  break;
613  }
614  val = PseudoRandom(i + j, i - j) % kLimit;
615  if (seen[val]) {
617  CHECK(tree.FindLeastGreaterThan(val, &loc));
618  CHECK_EQ(loc.key(), val);
619  break;
620  }
621  }
622  }
623 }
624 
625 
626 TEST(DispatchTableConstruction) {
628  // Initialize test data.
629  static const int kLimit = 1000;
630  static const int kRangeCount = 8;
631  static const int kRangeSize = 16;
632  uc16 ranges[kRangeCount][2 * kRangeSize];
633  for (int i = 0; i < kRangeCount; i++) {
634  Vector<uc16> range(ranges[i], 2 * kRangeSize);
635  for (int j = 0; j < 2 * kRangeSize; j++) {
636  range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
637  }
638  range.Sort();
639  for (int j = 1; j < 2 * kRangeSize; j++) {
640  CHECK(range[j-1] <= range[j]);
641  }
642  }
643  // Enter test data into dispatch table.
644  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
645  DispatchTable table(Isolate::Current()->runtime_zone());
646  for (int i = 0; i < kRangeCount; i++) {
647  uc16* range = ranges[i];
648  for (int j = 0; j < 2 * kRangeSize; j += 2)
649  table.AddRange(CharacterRange(range[j], range[j + 1]), i,
650  Isolate::Current()->runtime_zone());
651  }
652  // Check that the table looks as we would expect
653  for (int p = 0; p < kLimit; p++) {
654  OutSet* outs = table.Get(p);
655  for (int j = 0; j < kRangeCount; j++) {
656  uc16* range = ranges[j];
657  bool is_on = false;
658  for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
659  is_on = (range[k] <= p && p <= range[k + 1]);
660  CHECK_EQ(is_on, outs->Get(j));
661  }
662  }
663 }
664 
665 // Test of debug-only syntax.
666 #ifdef DEBUG
667 
668 TEST(ParsePossessiveRepetition) {
669  bool old_flag_value = FLAG_regexp_possessive_quantifier;
670 
671  // Enable possessive quantifier syntax.
672  FLAG_regexp_possessive_quantifier = true;
673 
674  CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')");
675  CHECK_PARSE_EQ("a++", "(# 1 - p 'a')");
676  CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')");
677  CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')");
678  CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
679 
680  // Disable possessive quantifier syntax.
681  FLAG_regexp_possessive_quantifier = false;
682 
683  CHECK_PARSE_ERROR("a*+");
684  CHECK_PARSE_ERROR("a++");
685  CHECK_PARSE_ERROR("a?+");
686  CHECK_PARSE_ERROR("a{10,20}+");
687  CHECK_PARSE_ERROR("a{10,20}+b");
688 
689  FLAG_regexp_possessive_quantifier = old_flag_value;
690 }
691 
692 #endif
693 
694 // Tests of interpreter.
695 
696 
697 #ifndef V8_INTERPRETED_REGEXP
698 
699 #if V8_TARGET_ARCH_IA32
700 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
701 #elif V8_TARGET_ARCH_X64
702 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
703 #elif V8_TARGET_ARCH_ARM
704 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
705 #elif V8_TARGET_ARCH_MIPS
706 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
707 #endif
708 
710  public:
712  : env_(), scope_(), zone_(Isolate::Current()->runtime_zone(),
713  DELETE_ON_EXIT) {
714  env_ = v8::Context::New();
715  env_->Enter();
716  }
718  env_->Exit();
719  env_.Dispose();
720  }
721  private:
723  v8::HandleScope scope_;
724  v8::internal::ZoneScope zone_;
725 };
726 
727 
728 static ArchRegExpMacroAssembler::Result Execute(Code* code,
729  String* input,
730  int start_offset,
731  const byte* input_start,
732  const byte* input_end,
733  int* captures) {
735  code,
736  input,
737  start_offset,
738  input_start,
739  input_end,
740  captures,
741  0,
742  Isolate::Current());
743 }
744 
745 
746 TEST(MacroAssemblerNativeSuccess) {
748  ContextInitializer initializer;
749  Factory* factory = Isolate::Current()->factory();
750 
751  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
752  Isolate::Current()->runtime_zone());
753 
754  m.Succeed();
755 
756  Handle<String> source = factory->NewStringFromAscii(CStrVector(""));
757  Handle<Object> code_object = m.GetCode(source);
758  Handle<Code> code = Handle<Code>::cast(code_object);
759 
760  int captures[4] = {42, 37, 87, 117};
761  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
763  const byte* start_adr =
764  reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
765 
767  Execute(*code,
768  *input,
769  0,
770  start_adr,
771  start_adr + seq_input->length(),
772  captures);
773 
775  CHECK_EQ(-1, captures[0]);
776  CHECK_EQ(-1, captures[1]);
777  CHECK_EQ(-1, captures[2]);
778  CHECK_EQ(-1, captures[3]);
779 }
780 
781 
782 TEST(MacroAssemblerNativeSimple) {
784  ContextInitializer initializer;
785  Factory* factory = Isolate::Current()->factory();
786 
787  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
788  Isolate::Current()->runtime_zone());
789 
790  uc16 foo_chars[3] = {'f', 'o', 'o'};
791  Vector<const uc16> foo(foo_chars, 3);
792 
793  Label fail;
794  m.CheckCharacters(foo, 0, &fail, true);
795  m.WriteCurrentPositionToRegister(0, 0);
796  m.AdvanceCurrentPosition(3);
797  m.WriteCurrentPositionToRegister(1, 0);
798  m.Succeed();
799  m.Bind(&fail);
800  m.Fail();
801 
802  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
803  Handle<Object> code_object = m.GetCode(source);
804  Handle<Code> code = Handle<Code>::cast(code_object);
805 
806  int captures[4] = {42, 37, 87, 117};
807  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
809  Address start_adr = seq_input->GetCharsAddress();
810 
812  Execute(*code,
813  *input,
814  0,
815  start_adr,
816  start_adr + input->length(),
817  captures);
818 
820  CHECK_EQ(0, captures[0]);
821  CHECK_EQ(3, captures[1]);
822  CHECK_EQ(-1, captures[2]);
823  CHECK_EQ(-1, captures[3]);
824 
825  input = factory->NewStringFromAscii(CStrVector("barbarbar"));
826  seq_input = Handle<SeqAsciiString>::cast(input);
827  start_adr = seq_input->GetCharsAddress();
828 
829  result = Execute(*code,
830  *input,
831  0,
832  start_adr,
833  start_adr + input->length(),
834  captures);
835 
837 }
838 
839 
840 TEST(MacroAssemblerNativeSimpleUC16) {
842  ContextInitializer initializer;
843  Factory* factory = Isolate::Current()->factory();
844 
845  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
846  Isolate::Current()->runtime_zone());
847 
848  uc16 foo_chars[3] = {'f', 'o', 'o'};
849  Vector<const uc16> foo(foo_chars, 3);
850 
851  Label fail;
852  m.CheckCharacters(foo, 0, &fail, true);
853  m.WriteCurrentPositionToRegister(0, 0);
854  m.AdvanceCurrentPosition(3);
855  m.WriteCurrentPositionToRegister(1, 0);
856  m.Succeed();
857  m.Bind(&fail);
858  m.Fail();
859 
860  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
861  Handle<Object> code_object = m.GetCode(source);
862  Handle<Code> code = Handle<Code>::cast(code_object);
863 
864  int captures[4] = {42, 37, 87, 117};
865  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
866  static_cast<uc16>('\xa0')};
867  Handle<String> input =
868  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
870  Address start_adr = seq_input->GetCharsAddress();
871 
873  Execute(*code,
874  *input,
875  0,
876  start_adr,
877  start_adr + input->length(),
878  captures);
879 
881  CHECK_EQ(0, captures[0]);
882  CHECK_EQ(3, captures[1]);
883  CHECK_EQ(-1, captures[2]);
884  CHECK_EQ(-1, captures[3]);
885 
886  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
887  static_cast<uc16>('\xa0')};
888  input = factory->NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
889  seq_input = Handle<SeqTwoByteString>::cast(input);
890  start_adr = seq_input->GetCharsAddress();
891 
892  result = Execute(*code,
893  *input,
894  0,
895  start_adr,
896  start_adr + input->length() * 2,
897  captures);
898 
900 }
901 
902 
903 TEST(MacroAssemblerNativeBacktrack) {
905  ContextInitializer initializer;
906  Factory* factory = Isolate::Current()->factory();
907 
908  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
909  Isolate::Current()->runtime_zone());
910 
911  Label fail;
912  Label backtrack;
913  m.LoadCurrentCharacter(10, &fail);
914  m.Succeed();
915  m.Bind(&fail);
916  m.PushBacktrack(&backtrack);
917  m.LoadCurrentCharacter(10, NULL);
918  m.Succeed();
919  m.Bind(&backtrack);
920  m.Fail();
921 
922  Handle<String> source = factory->NewStringFromAscii(CStrVector(".........."));
923  Handle<Object> code_object = m.GetCode(source);
924  Handle<Code> code = Handle<Code>::cast(code_object);
925 
926  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
928  Address start_adr = seq_input->GetCharsAddress();
929 
931  Execute(*code,
932  *input,
933  0,
934  start_adr,
935  start_adr + input->length(),
936  NULL);
937 
939 }
940 
941 
942 TEST(MacroAssemblerNativeBackReferenceASCII) {
944  ContextInitializer initializer;
945  Factory* factory = Isolate::Current()->factory();
946 
947  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
948  Isolate::Current()->runtime_zone());
949 
950  m.WriteCurrentPositionToRegister(0, 0);
951  m.AdvanceCurrentPosition(2);
952  m.WriteCurrentPositionToRegister(1, 0);
953  Label nomatch;
954  m.CheckNotBackReference(0, &nomatch);
955  m.Fail();
956  m.Bind(&nomatch);
957  m.AdvanceCurrentPosition(2);
958  Label missing_match;
959  m.CheckNotBackReference(0, &missing_match);
960  m.WriteCurrentPositionToRegister(2, 0);
961  m.Succeed();
962  m.Bind(&missing_match);
963  m.Fail();
964 
965  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
966  Handle<Object> code_object = m.GetCode(source);
967  Handle<Code> code = Handle<Code>::cast(code_object);
968 
969  Handle<String> input = factory->NewStringFromAscii(CStrVector("fooofo"));
971  Address start_adr = seq_input->GetCharsAddress();
972 
973  int output[4];
975  Execute(*code,
976  *input,
977  0,
978  start_adr,
979  start_adr + input->length(),
980  output);
981 
983  CHECK_EQ(0, output[0]);
984  CHECK_EQ(2, output[1]);
985  CHECK_EQ(6, output[2]);
986  CHECK_EQ(-1, output[3]);
987 }
988 
989 
990 TEST(MacroAssemblerNativeBackReferenceUC16) {
992  ContextInitializer initializer;
993  Factory* factory = Isolate::Current()->factory();
994 
995  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
996  Isolate::Current()->runtime_zone());
997 
998  m.WriteCurrentPositionToRegister(0, 0);
999  m.AdvanceCurrentPosition(2);
1000  m.WriteCurrentPositionToRegister(1, 0);
1001  Label nomatch;
1002  m.CheckNotBackReference(0, &nomatch);
1003  m.Fail();
1004  m.Bind(&nomatch);
1005  m.AdvanceCurrentPosition(2);
1006  Label missing_match;
1007  m.CheckNotBackReference(0, &missing_match);
1008  m.WriteCurrentPositionToRegister(2, 0);
1009  m.Succeed();
1010  m.Bind(&missing_match);
1011  m.Fail();
1012 
1013  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
1014  Handle<Object> code_object = m.GetCode(source);
1015  Handle<Code> code = Handle<Code>::cast(code_object);
1016 
1017  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1018  Handle<String> input =
1019  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
1021  Address start_adr = seq_input->GetCharsAddress();
1022 
1023  int output[4];
1025  Execute(*code,
1026  *input,
1027  0,
1028  start_adr,
1029  start_adr + input->length() * 2,
1030  output);
1031 
1033  CHECK_EQ(0, output[0]);
1034  CHECK_EQ(2, output[1]);
1035  CHECK_EQ(6, output[2]);
1036  CHECK_EQ(-1, output[3]);
1037 }
1038 
1039 
1040 
1041 TEST(MacroAssemblernativeAtStart) {
1043  ContextInitializer initializer;
1044  Factory* factory = Isolate::Current()->factory();
1045 
1046  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
1047  Isolate::Current()->runtime_zone());
1048 
1049  Label not_at_start, newline, fail;
1050  m.CheckNotAtStart(&not_at_start);
1051  // Check that prevchar = '\n' and current = 'f'.
1052  m.CheckCharacter('\n', &newline);
1053  m.Bind(&fail);
1054  m.Fail();
1055  m.Bind(&newline);
1056  m.LoadCurrentCharacter(0, &fail);
1057  m.CheckNotCharacter('f', &fail);
1058  m.Succeed();
1059 
1060  m.Bind(&not_at_start);
1061  // Check that prevchar = 'o' and current = 'b'.
1062  Label prevo;
1063  m.CheckCharacter('o', &prevo);
1064  m.Fail();
1065  m.Bind(&prevo);
1066  m.LoadCurrentCharacter(0, &fail);
1067  m.CheckNotCharacter('b', &fail);
1068  m.Succeed();
1069 
1070  Handle<String> source = factory->NewStringFromAscii(CStrVector("(^f|ob)"));
1071  Handle<Object> code_object = m.GetCode(source);
1072  Handle<Code> code = Handle<Code>::cast(code_object);
1073 
1074  Handle<String> input = factory->NewStringFromAscii(CStrVector("foobar"));
1076  Address start_adr = seq_input->GetCharsAddress();
1077 
1079  Execute(*code,
1080  *input,
1081  0,
1082  start_adr,
1083  start_adr + input->length(),
1084  NULL);
1085 
1087 
1088  result = Execute(*code,
1089  *input,
1090  3,
1091  start_adr + 3,
1092  start_adr + input->length(),
1093  NULL);
1094 
1096 }
1097 
1098 
1099 TEST(MacroAssemblerNativeBackRefNoCase) {
1101  ContextInitializer initializer;
1102  Factory* factory = Isolate::Current()->factory();
1103 
1104  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
1105  Isolate::Current()->runtime_zone());
1106 
1107  Label fail, succ;
1108 
1109  m.WriteCurrentPositionToRegister(0, 0);
1110  m.WriteCurrentPositionToRegister(2, 0);
1111  m.AdvanceCurrentPosition(3);
1112  m.WriteCurrentPositionToRegister(3, 0);
1113  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1114  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1115  Label expected_fail;
1116  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1117  m.Bind(&fail);
1118  m.Fail();
1119 
1120  m.Bind(&expected_fail);
1121  m.AdvanceCurrentPosition(3); // Skip "xYz"
1122  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1123  m.Fail();
1124 
1125  m.Bind(&succ);
1126  m.WriteCurrentPositionToRegister(1, 0);
1127  m.Succeed();
1128 
1129  Handle<String> source =
1130  factory->NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1131  Handle<Object> code_object = m.GetCode(source);
1132  Handle<Code> code = Handle<Code>::cast(code_object);
1133 
1134  Handle<String> input =
1135  factory->NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1137  Address start_adr = seq_input->GetCharsAddress();
1138 
1139  int output[4];
1141  Execute(*code,
1142  *input,
1143  0,
1144  start_adr,
1145  start_adr + input->length(),
1146  output);
1147 
1149  CHECK_EQ(0, output[0]);
1150  CHECK_EQ(12, output[1]);
1151  CHECK_EQ(0, output[2]);
1152  CHECK_EQ(3, output[3]);
1153 }
1154 
1155 
1156 
1157 TEST(MacroAssemblerNativeRegisters) {
1159  ContextInitializer initializer;
1160  Factory* factory = Isolate::Current()->factory();
1161 
1162  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6,
1163  Isolate::Current()->runtime_zone());
1164 
1165  uc16 foo_chars[3] = {'f', 'o', 'o'};
1166  Vector<const uc16> foo(foo_chars, 3);
1167 
1168  enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1169  Label fail;
1170  Label backtrack;
1171  m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1172  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1173  m.PushBacktrack(&backtrack);
1174  m.WriteStackPointerToRegister(sp);
1175  // Fill stack and registers
1176  m.AdvanceCurrentPosition(2);
1177  m.WriteCurrentPositionToRegister(out1, 0);
1178  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1179  m.PushBacktrack(&fail);
1180  // Drop backtrack stack frames.
1181  m.ReadStackPointerFromRegister(sp);
1182  // And take the first backtrack (to &backtrack)
1183  m.Backtrack();
1184 
1185  m.PushCurrentPosition();
1186  m.AdvanceCurrentPosition(2);
1187  m.PopCurrentPosition();
1188 
1189  m.Bind(&backtrack);
1190  m.PopRegister(out1);
1191  m.ReadCurrentPositionFromRegister(out1);
1192  m.AdvanceCurrentPosition(3);
1193  m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1194 
1195  Label loop;
1196  m.SetRegister(loop_cnt, 0); // loop counter
1197  m.Bind(&loop);
1198  m.AdvanceRegister(loop_cnt, 1);
1199  m.AdvanceCurrentPosition(1);
1200  m.IfRegisterLT(loop_cnt, 3, &loop);
1201  m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1202 
1203  Label loop2;
1204  m.SetRegister(loop_cnt, 2); // loop counter
1205  m.Bind(&loop2);
1206  m.AdvanceRegister(loop_cnt, -1);
1207  m.AdvanceCurrentPosition(1);
1208  m.IfRegisterGE(loop_cnt, 0, &loop2);
1209  m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1210 
1211  Label loop3;
1212  Label exit_loop3;
1213  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1214  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1215  m.ReadCurrentPositionFromRegister(out3);
1216  m.Bind(&loop3);
1217  m.AdvanceCurrentPosition(1);
1218  m.CheckGreedyLoop(&exit_loop3);
1219  m.GoTo(&loop3);
1220  m.Bind(&exit_loop3);
1221  m.PopCurrentPosition();
1222  m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1223 
1224  m.Succeed();
1225 
1226  m.Bind(&fail);
1227  m.Fail();
1228 
1229  Handle<String> source =
1230  factory->NewStringFromAscii(CStrVector("<loop test>"));
1231  Handle<Object> code_object = m.GetCode(source);
1232  Handle<Code> code = Handle<Code>::cast(code_object);
1233 
1234  // String long enough for test (content doesn't matter).
1235  Handle<String> input =
1236  factory->NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1238  Address start_adr = seq_input->GetCharsAddress();
1239 
1240  int output[6];
1242  Execute(*code,
1243  *input,
1244  0,
1245  start_adr,
1246  start_adr + input->length(),
1247  output);
1248 
1250  CHECK_EQ(0, output[0]);
1251  CHECK_EQ(3, output[1]);
1252  CHECK_EQ(6, output[2]);
1253  CHECK_EQ(9, output[3]);
1254  CHECK_EQ(9, output[4]);
1255  CHECK_EQ(-1, output[5]);
1256 }
1257 
1258 
1259 TEST(MacroAssemblerStackOverflow) {
1261  ContextInitializer initializer;
1262  Isolate* isolate = Isolate::Current();
1263  Factory* factory = isolate->factory();
1264 
1265  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
1266  Isolate::Current()->runtime_zone());
1267 
1268  Label loop;
1269  m.Bind(&loop);
1270  m.PushBacktrack(&loop);
1271  m.GoTo(&loop);
1272 
1273  Handle<String> source =
1274  factory->NewStringFromAscii(CStrVector("<stack overflow test>"));
1275  Handle<Object> code_object = m.GetCode(source);
1276  Handle<Code> code = Handle<Code>::cast(code_object);
1277 
1278  // String long enough for test (content doesn't matter).
1279  Handle<String> input =
1280  factory->NewStringFromAscii(CStrVector("dummy"));
1282  Address start_adr = seq_input->GetCharsAddress();
1283 
1285  Execute(*code,
1286  *input,
1287  0,
1288  start_adr,
1289  start_adr + input->length(),
1290  NULL);
1291 
1293  CHECK(isolate->has_pending_exception());
1294  isolate->clear_pending_exception();
1295 }
1296 
1297 
1298 TEST(MacroAssemblerNativeLotsOfRegisters) {
1300  ContextInitializer initializer;
1301  Isolate* isolate = Isolate::Current();
1302  Factory* factory = isolate->factory();
1303 
1304  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2,
1305  Isolate::Current()->runtime_zone());
1306 
1307  // At least 2048, to ensure the allocated space for registers
1308  // span one full page.
1309  const int large_number = 8000;
1310  m.WriteCurrentPositionToRegister(large_number, 42);
1311  m.WriteCurrentPositionToRegister(0, 0);
1312  m.WriteCurrentPositionToRegister(1, 1);
1313  Label done;
1314  m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1315  m.Bind(&done);
1316  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1317  m.PopRegister(1);
1318  m.Succeed();
1319 
1320  Handle<String> source =
1321  factory->NewStringFromAscii(CStrVector("<huge register space test>"));
1322  Handle<Object> code_object = m.GetCode(source);
1323  Handle<Code> code = Handle<Code>::cast(code_object);
1324 
1325  // String long enough for test (content doesn't matter).
1326  Handle<String> input =
1327  factory->NewStringFromAscii(CStrVector("sample text"));
1329  Address start_adr = seq_input->GetCharsAddress();
1330 
1331  int captures[2];
1333  Execute(*code,
1334  *input,
1335  0,
1336  start_adr,
1337  start_adr + input->length(),
1338  captures);
1339 
1341  CHECK_EQ(0, captures[0]);
1342  CHECK_EQ(42, captures[1]);
1343 
1344  isolate->clear_pending_exception();
1345 }
1346 
1347 #else // V8_INTERPRETED_REGEXP
1348 
1351  byte codes[1024];
1352  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
1353  Isolate::Current()->runtime_zone());
1354  // ^f(o)o.
1355  Label fail, fail2, start;
1356  uc16 foo_chars[3];
1357  foo_chars[0] = 'f';
1358  foo_chars[1] = 'o';
1359  foo_chars[2] = 'o';
1360  Vector<const uc16> foo(foo_chars, 3);
1361  m.SetRegister(4, 42);
1362  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1363  m.AdvanceRegister(4, 42);
1364  m.GoTo(&start);
1365  m.Fail();
1366  m.Bind(&start);
1367  m.PushBacktrack(&fail2);
1368  m.CheckCharacters(foo, 0, &fail, true);
1369  m.WriteCurrentPositionToRegister(0, 0);
1370  m.PushCurrentPosition();
1371  m.AdvanceCurrentPosition(3);
1372  m.WriteCurrentPositionToRegister(1, 0);
1373  m.PopCurrentPosition();
1374  m.AdvanceCurrentPosition(1);
1375  m.WriteCurrentPositionToRegister(2, 0);
1376  m.AdvanceCurrentPosition(1);
1377  m.WriteCurrentPositionToRegister(3, 0);
1378  m.Succeed();
1379 
1380  m.Bind(&fail);
1381  m.Backtrack();
1382  m.Succeed();
1383 
1384  m.Bind(&fail2);
1385  m.PopRegister(0);
1386  m.Fail();
1387 
1388  Isolate* isolate = Isolate::Current();
1389  Factory* factory = isolate->factory();
1390  HandleScope scope(isolate);
1391 
1392  Handle<String> source = factory->NewStringFromAscii(CStrVector("^f(o)o"));
1393  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1394  int captures[5];
1395 
1396  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1397  Handle<String> f1_16 =
1398  factory->NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1399 
1400  CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1401  CHECK_EQ(0, captures[0]);
1402  CHECK_EQ(3, captures[1]);
1403  CHECK_EQ(1, captures[2]);
1404  CHECK_EQ(2, captures[3]);
1405  CHECK_EQ(84, captures[4]);
1406 
1407  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1408  Handle<String> f2_16 =
1409  factory->NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1410 
1411  CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1412  CHECK_EQ(42, captures[0]);
1413 }
1414 
1415 #endif // V8_INTERPRETED_REGEXP
1416 
1417 
1418 TEST(AddInverseToTable) {
1420  static const int kLimit = 1000;
1421  static const int kRangeCount = 16;
1422  for (int t = 0; t < 10; t++) {
1423  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1424  Zone* zone = Isolate::Current()->runtime_zone();
1425  ZoneList<CharacterRange>* ranges =
1426  new(zone)
1427  ZoneList<CharacterRange>(kRangeCount, zone);
1428  for (int i = 0; i < kRangeCount; i++) {
1429  int from = PseudoRandom(t + 87, i + 25) % kLimit;
1430  int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1431  if (to > kLimit) to = kLimit;
1432  ranges->Add(CharacterRange(from, to), zone);
1433  }
1434  DispatchTable table(zone);
1435  DispatchTableConstructor cons(&table, false,
1436  Isolate::Current()->runtime_zone());
1437  cons.set_choice_index(0);
1438  cons.AddInverse(ranges);
1439  for (int i = 0; i < kLimit; i++) {
1440  bool is_on = false;
1441  for (int j = 0; !is_on && j < kRangeCount; j++)
1442  is_on = ranges->at(j).Contains(i);
1443  OutSet* set = table.Get(i);
1444  CHECK_EQ(is_on, set->Get(0) == false);
1445  }
1446  }
1447  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1448  Zone* zone = Isolate::Current()->runtime_zone();
1449  ZoneList<CharacterRange>* ranges =
1450  new(zone) ZoneList<CharacterRange>(1, zone);
1451  ranges->Add(CharacterRange(0xFFF0, 0xFFFE), zone);
1452  DispatchTable table(zone);
1453  DispatchTableConstructor cons(&table, false,
1454  Isolate::Current()->runtime_zone());
1455  cons.set_choice_index(0);
1456  cons.AddInverse(ranges);
1457  CHECK(!table.Get(0xFFFE)->Get(0));
1458  CHECK(table.Get(0xFFFF)->Get(0));
1459 }
1460 
1461 
1462 static uc32 canonicalize(uc32 c) {
1464  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1465  if (count == 0) {
1466  return c;
1467  } else {
1468  CHECK_EQ(1, count);
1469  return canon[0];
1470  }
1471 }
1472 
1473 
1474 TEST(LatinCanonicalize) {
1476  for (char lower = 'a'; lower <= 'z'; lower++) {
1477  char upper = lower + ('A' - 'a');
1478  CHECK_EQ(canonicalize(lower), canonicalize(upper));
1480  int length = un_canonicalize.get(lower, '\0', uncanon);
1481  CHECK_EQ(2, length);
1482  CHECK_EQ(upper, uncanon[0]);
1483  CHECK_EQ(lower, uncanon[1]);
1484  }
1485  for (uc32 c = 128; c < (1 << 21); c++)
1486  CHECK_GE(canonicalize(c), 128);
1488  // Canonicalization is only defined for the Basic Multilingual Plane.
1489  for (uc32 c = 0; c < (1 << 16); c++) {
1491  int length = to_upper.get(c, '\0', upper);
1492  if (length == 0) {
1493  length = 1;
1494  upper[0] = c;
1495  }
1496  uc32 u = upper[0];
1497  if (length > 1 || (c >= 128 && u < 128))
1498  u = c;
1499  CHECK_EQ(u, canonicalize(c));
1500  }
1501 }
1502 
1503 
1504 static uc32 CanonRangeEnd(uc32 c) {
1506  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1507  if (count == 0) {
1508  return c;
1509  } else {
1510  CHECK_EQ(1, count);
1511  return canon[0];
1512  }
1513 }
1514 
1515 
1516 TEST(RangeCanonicalization) {
1517  // Check that we arrive at the same result when using the basic
1518  // range canonicalization primitives as when using immediate
1519  // canonicalization.
1521  int block_start = 0;
1522  while (block_start <= 0xFFFF) {
1523  uc32 block_end = CanonRangeEnd(block_start);
1524  unsigned block_length = block_end - block_start + 1;
1525  if (block_length > 1) {
1527  int first_length = un_canonicalize.get(block_start, '\0', first);
1528  for (unsigned i = 1; i < block_length; i++) {
1530  int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1531  CHECK_EQ(first_length, succ_length);
1532  for (int j = 0; j < succ_length; j++) {
1533  int calc = first[j] + i;
1534  int found = succ[j];
1535  CHECK_EQ(calc, found);
1536  }
1537  }
1538  }
1539  block_start = block_start + block_length;
1540  }
1541 }
1542 
1543 
1544 TEST(UncanonicalizeEquivalence) {
1547  for (int i = 0; i < (1 << 16); i++) {
1548  int length = un_canonicalize.get(i, '\0', chars);
1549  for (int j = 0; j < length; j++) {
1551  int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1552  CHECK_EQ(length, length2);
1553  for (int k = 0; k < length; k++)
1554  CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1555  }
1556  }
1557 }
1558 
1559 
1560 static void TestRangeCaseIndependence(CharacterRange input,
1561  Vector<CharacterRange> expected) {
1562  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1563  Zone* zone = Isolate::Current()->runtime_zone();
1564  int count = expected.length();
1565  ZoneList<CharacterRange>* list =
1566  new(zone) ZoneList<CharacterRange>(count, zone);
1567  input.AddCaseEquivalents(list, false, zone);
1568  CHECK_EQ(count, list->length());
1569  for (int i = 0; i < list->length(); i++) {
1570  CHECK_EQ(expected[i].from(), list->at(i).from());
1571  CHECK_EQ(expected[i].to(), list->at(i).to());
1572  }
1573 }
1574 
1575 
1576 static void TestSimpleRangeCaseIndependence(CharacterRange input,
1577  CharacterRange expected) {
1579  vector[0] = expected;
1580  TestRangeCaseIndependence(input, vector);
1581 }
1582 
1583 
1584 TEST(CharacterRangeCaseIndependence) {
1586  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1588  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1590  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1591  CharacterRange('A', 'Z'));
1592  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1593  CharacterRange('C', 'F'));
1594  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1595  CharacterRange('A', 'B'));
1596  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1597  CharacterRange('Y', 'Z'));
1598  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1599  CharacterRange('A', 'Z'));
1600  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1601  CharacterRange('a', 'z'));
1602  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1603  CharacterRange('c', 'f'));
1604  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1605  CharacterRange('a', 'z'));
1606  // Here we need to add [l-z] to complete the case independence of
1607  // [A-Za-z] but we expect [a-z] to be added since we always add a
1608  // whole block at a time.
1609  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1610  CharacterRange('a', 'z'));
1611 }
1612 
1613 
1614 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1615  if (ranges == NULL)
1616  return false;
1617  for (int i = 0; i < ranges->length(); i++) {
1618  CharacterRange range = ranges->at(i);
1619  if (range.from() <= c && c <= range.to())
1620  return true;
1621  }
1622  return false;
1623 }
1624 
1625 
1626 TEST(CharClassDifference) {
1628  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1629  Zone* zone = Isolate::Current()->runtime_zone();
1630  ZoneList<CharacterRange>* base =
1631  new(zone) ZoneList<CharacterRange>(1, zone);
1632  base->Add(CharacterRange::Everything(), zone);
1634  ZoneList<CharacterRange>* included = NULL;
1635  ZoneList<CharacterRange>* excluded = NULL;
1636  CharacterRange::Split(base, overlay, &included, &excluded,
1637  Isolate::Current()->runtime_zone());
1638  for (int i = 0; i < (1 << 16); i++) {
1639  bool in_base = InClass(i, base);
1640  if (in_base) {
1641  bool in_overlay = false;
1642  for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1643  if (overlay[j] <= i && i < overlay[j+1])
1644  in_overlay = true;
1645  }
1646  CHECK_EQ(in_overlay, InClass(i, included));
1647  CHECK_EQ(!in_overlay, InClass(i, excluded));
1648  } else {
1649  CHECK(!InClass(i, included));
1650  CHECK(!InClass(i, excluded));
1651  }
1652  }
1653 }
1654 
1655 
1656 TEST(CanonicalizeCharacterSets) {
1658  ZoneScope scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1659  Zone* zone = Isolate::Current()->runtime_zone();
1660  ZoneList<CharacterRange>* list =
1661  new(zone) ZoneList<CharacterRange>(4, zone);
1662  CharacterSet set(list);
1663 
1664  list->Add(CharacterRange(10, 20), zone);
1665  list->Add(CharacterRange(30, 40), zone);
1666  list->Add(CharacterRange(50, 60), zone);
1667  set.Canonicalize();
1668  ASSERT_EQ(3, list->length());
1669  ASSERT_EQ(10, list->at(0).from());
1670  ASSERT_EQ(20, list->at(0).to());
1671  ASSERT_EQ(30, list->at(1).from());
1672  ASSERT_EQ(40, list->at(1).to());
1673  ASSERT_EQ(50, list->at(2).from());
1674  ASSERT_EQ(60, list->at(2).to());
1675 
1676  list->Rewind(0);
1677  list->Add(CharacterRange(10, 20), zone);
1678  list->Add(CharacterRange(50, 60), zone);
1679  list->Add(CharacterRange(30, 40), zone);
1680  set.Canonicalize();
1681  ASSERT_EQ(3, list->length());
1682  ASSERT_EQ(10, list->at(0).from());
1683  ASSERT_EQ(20, list->at(0).to());
1684  ASSERT_EQ(30, list->at(1).from());
1685  ASSERT_EQ(40, list->at(1).to());
1686  ASSERT_EQ(50, list->at(2).from());
1687  ASSERT_EQ(60, list->at(2).to());
1688 
1689  list->Rewind(0);
1690  list->Add(CharacterRange(30, 40), zone);
1691  list->Add(CharacterRange(10, 20), zone);
1692  list->Add(CharacterRange(25, 25), zone);
1693  list->Add(CharacterRange(100, 100), zone);
1694  list->Add(CharacterRange(1, 1), zone);
1695  set.Canonicalize();
1696  ASSERT_EQ(5, list->length());
1697  ASSERT_EQ(1, list->at(0).from());
1698  ASSERT_EQ(1, list->at(0).to());
1699  ASSERT_EQ(10, list->at(1).from());
1700  ASSERT_EQ(20, list->at(1).to());
1701  ASSERT_EQ(25, list->at(2).from());
1702  ASSERT_EQ(25, list->at(2).to());
1703  ASSERT_EQ(30, list->at(3).from());
1704  ASSERT_EQ(40, list->at(3).to());
1705  ASSERT_EQ(100, list->at(4).from());
1706  ASSERT_EQ(100, list->at(4).to());
1707 
1708  list->Rewind(0);
1709  list->Add(CharacterRange(10, 19), zone);
1710  list->Add(CharacterRange(21, 30), zone);
1711  list->Add(CharacterRange(20, 20), zone);
1712  set.Canonicalize();
1713  ASSERT_EQ(1, list->length());
1714  ASSERT_EQ(10, list->at(0).from());
1715  ASSERT_EQ(30, list->at(0).to());
1716 }
1717 
1718 
1719 TEST(CharacterRangeMerge) {
1721  ZoneScope zone_scope(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
1722  ZoneList<CharacterRange> l1(4, Isolate::Current()->runtime_zone());
1723  ZoneList<CharacterRange> l2(4, Isolate::Current()->runtime_zone());
1724  Zone* zone = Isolate::Current()->runtime_zone();
1725  // Create all combinations of intersections of ranges, both singletons and
1726  // longer.
1727 
1728  int offset = 0;
1729 
1730  // The five kinds of singleton intersections:
1731  // X
1732  // Y - outside before
1733  // Y - outside touching start
1734  // Y - overlap
1735  // Y - outside touching end
1736  // Y - outside after
1737 
1738  for (int i = 0; i < 5; i++) {
1739  l1.Add(CharacterRange::Singleton(offset + 2), zone);
1740  l2.Add(CharacterRange::Singleton(offset + i), zone);
1741  offset += 6;
1742  }
1743 
1744  // The seven kinds of singleton/non-singleton intersections:
1745  // XXX
1746  // Y - outside before
1747  // Y - outside touching start
1748  // Y - inside touching start
1749  // Y - entirely inside
1750  // Y - inside touching end
1751  // Y - outside touching end
1752  // Y - disjoint after
1753 
1754  for (int i = 0; i < 7; i++) {
1755  l1.Add(CharacterRange::Range(offset + 2, offset + 4), zone);
1756  l2.Add(CharacterRange::Singleton(offset + i), zone);
1757  offset += 8;
1758  }
1759 
1760  // The eleven kinds of non-singleton intersections:
1761  //
1762  // XXXXXXXX
1763  // YYYY - outside before.
1764  // YYYY - outside touching start.
1765  // YYYY - overlapping start
1766  // YYYY - inside touching start
1767  // YYYY - entirely inside
1768  // YYYY - inside touching end
1769  // YYYY - overlapping end
1770  // YYYY - outside touching end
1771  // YYYY - outside after
1772  // YYYYYYYY - identical
1773  // YYYYYYYYYYYY - containing entirely.
1774 
1775  for (int i = 0; i < 9; i++) {
1776  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone); // Length 8.
1777  l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), zone);
1778  offset += 22;
1779  }
1780  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1781  l2.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1782  offset += 22;
1783  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1784  l2.Add(CharacterRange::Range(offset + 4, offset + 17), zone);
1785  offset += 22;
1786 
1787  // Different kinds of multi-range overlap:
1788  // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1789  // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1790 
1791  l1.Add(CharacterRange::Range(offset, offset + 21), zone);
1792  l1.Add(CharacterRange::Range(offset + 31, offset + 52), zone);
1793  for (int i = 0; i < 6; i++) {
1794  l2.Add(CharacterRange::Range(offset + 2, offset + 5), zone);
1795  l2.Add(CharacterRange::Singleton(offset + 8), zone);
1796  offset += 9;
1797  }
1798 
1801 
1802  ZoneList<CharacterRange> first_only(4, Isolate::Current()->runtime_zone());
1803  ZoneList<CharacterRange> second_only(4, Isolate::Current()->runtime_zone());
1804  ZoneList<CharacterRange> both(4, Isolate::Current()->runtime_zone());
1805 }
1806 
1807 
1808 TEST(Graph) {
1810  Execute("\\b\\w+\\b", false, true, true);
1811 }
byte * Address
Definition: globals.h:157
#define CHECK_MIN_MAX(input, min, max)
Definition: test-regexp.cc:138
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5707
static bool Initialize(Deserializer *des)
Definition: v8.cc:64
static bool ParseRegExp(FlatStringReader *input, bool multiline, RegExpCompileData *result, Zone *zone)
Definition: parser.cc:5921
bool Find(const Key &key, Locator *locator)
#define CHECK_EQ(expected, value)
Definition: checks.h:219
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5279
bool Insert(const Key &key, Locator *locator)
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
void Sort(int(*cmp)(const T *, const T *))
Definition: utils.h:411
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1783
static Handle< T > cast(Handle< S > that)
Definition: handles.h:81
bool IsRegExpWord(uc16 c)
int32_t uc32
Definition: globals.h:260
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5618
#define CHECK_PARSE_EQ(input, expected)
Definition: test-regexp.cc:136
SmartArrayPointer< const char > ToString(Zone *zone)
Definition: ast.cc:956
static CharacterRange Everything()
Definition: jsregexp.h:275
bool Get(unsigned value)
Definition: jsregexp.cc:5604
#define ASSERT(condition)
Definition: checks.h:270
static const int kMaxWidth
Definition: unicode.h:318
void clear_pending_exception()
Definition: isolate.h:555
#define CHECK(condition)
Definition: checks.h:56
Factory * factory()
Definition: isolate.h:992
#define CHECK_GE(a, b)
Definition: checks.h:228
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:49
int foo
void Add(Vector< const char > format, Vector< FmtElm > elms)
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5314
uint8_t byte
Definition: globals.h:156
Handle< String > NewStringFromUtf8(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:207
const Register sp
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1327
static int NoValue()
Definition: test-regexp.cc:553
#define CHECK_PARSE_ERROR(input)
Definition: test-regexp.cc:135
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:271
static const int kInfinity
Definition: ast.h:2125
static const int kMaxWidth
Definition: unicode.h:297
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5399
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5991
static int Compare(int a, int b)
Definition: test-regexp.cc:554
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5333
#define CHECK_MAPS_EQUAL()
bool IsRegExpNewline(uc16 c)
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:216
int length() const
Definition: utils.h:384
#define CHECK_SIMPLE(input, simple)
Definition: test-regexp.cc:137
bool has_pending_exception()
Definition: isolate.h:561
bool FindLeastGreaterThan(const Key &key, Locator *locator)
Vector< const char > CStrVector(const char *data)
Definition: utils.h:526
static const int kNoKey
Definition: test-regexp.cc:552
int StrLength(const char *string)
Definition: utils.h:234
Handle< String > NewStringFromAscii(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:199
bool is_null() const
Definition: handles.h:87
bool FindGreatestLessThan(const Key &key, Locator *locator)
static const int kMaxWidth
Definition: unicode.h:311
uint16_t uc16
Definition: globals.h:259
Zone * runtime_zone()
Definition: isolate.h:868
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
void USE(T)
Definition: globals.h:289
static bool Is(uchar c)
Definition: unicode.cc:724
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
Definition: flags.cc:301
static Persistent< Context > New(ExtensionConfiguration *extensions=NULL, Handle< ObjectTemplate > global_template=Handle< ObjectTemplate >(), Handle< Value > global_object=Handle< Value >())
Definition: api.cc:4411
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
SmartArrayPointer< const char > ToCString() const
virtual int max_match()=0
static const int kMaxWidth
Definition: unicode.h:304
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5231
static bool Initialize()
Definition: api.cc:4269
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:268
unsigned int uchar
Definition: unicode.h:40
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:6042