v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
test-regexp.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 
29 #include <stdlib.h>
30 
31 #include "v8.h"
32 
33 #include "ast.h"
34 #include "char-predicates-inl.h"
35 #include "cctest.h"
36 #include "jsregexp.h"
37 #include "parser.h"
38 #include "regexp-macro-assembler.h"
40 #include "string-stream.h"
41 #include "zone-inl.h"
42 #ifdef V8_INTERPRETED_REGEXP
43 #include "interpreter-irregexp.h"
44 #else // V8_INTERPRETED_REGEXP
45 #include "macro-assembler.h"
46 #include "code.h"
47 #ifdef V8_TARGET_ARCH_ARM
48 #include "arm/assembler-arm.h"
51 #endif
52 #ifdef V8_TARGET_ARCH_MIPS
53 #include "mips/assembler-mips.h"
56 #endif
57 #ifdef V8_TARGET_ARCH_X64
58 #include "x64/assembler-x64.h"
61 #endif
62 #ifdef V8_TARGET_ARCH_IA32
63 #include "ia32/assembler-ia32.h"
66 #endif
67 #endif // V8_INTERPRETED_REGEXP
68 
69 using namespace v8::internal;
70 
71 
72 static bool CheckParse(const char* input) {
74  v8::HandleScope scope;
75  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
76  FlatStringReader reader(Isolate::Current(), CStrVector(input));
77  RegExpCompileData result;
78  return v8::internal::RegExpParser::ParseRegExp(&reader, false, &result);
79 }
80 
81 
82 static SmartArrayPointer<const char> Parse(const char* input) {
84  v8::HandleScope scope;
85  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
86  FlatStringReader reader(Isolate::Current(), CStrVector(input));
87  RegExpCompileData result;
88  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
89  CHECK(result.tree != NULL);
90  CHECK(result.error.is_null());
92  result.tree->ToString(Isolate::Current()->zone());
93  return output;
94 }
95 
96 static bool CheckSimple(const char* input) {
98  v8::HandleScope scope;
99  unibrow::Utf8InputBuffer<> buffer(input, StrLength(input));
100  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
101  FlatStringReader reader(Isolate::Current(), CStrVector(input));
102  RegExpCompileData result;
103  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
104  CHECK(result.tree != NULL);
105  CHECK(result.error.is_null());
106  return result.simple;
107 }
108 
109 struct MinMaxPair {
112 };
113 
114 static MinMaxPair CheckMinMaxMatch(const char* input) {
116  v8::HandleScope scope;
117  unibrow::Utf8InputBuffer<> buffer(input, StrLength(input));
118  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
119  FlatStringReader reader(Isolate::Current(), CStrVector(input));
120  RegExpCompileData result;
121  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
122  CHECK(result.tree != NULL);
123  CHECK(result.error.is_null());
124  int min_match = result.tree->min_match();
125  int max_match = result.tree->max_match();
126  MinMaxPair pair = { min_match, max_match };
127  return pair;
128 }
129 
130 
131 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
132 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
133 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
134 #define CHECK_MIN_MAX(input, min, max) \
135  { MinMaxPair min_max = CheckMinMaxMatch(input); \
136  CHECK_EQ(min, min_max.min_match); \
137  CHECK_EQ(max, min_max.max_match); \
138  }
139 
142 
143  CHECK_PARSE_ERROR("?");
144 
145  CHECK_PARSE_EQ("abc", "'abc'");
146  CHECK_PARSE_EQ("", "%");
147  CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
148  CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
149  CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
150  CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
151  CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
152  CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
153  CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
154  CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
155  CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
156  CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
157  CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
158  CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
159  CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
160  CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
161  CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
162  CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
163  CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
164  CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
165  CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
166  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
167  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
168  CHECK_PARSE_EQ("(?:foo)", "'foo'");
169  CHECK_PARSE_EQ("(?: foo )", "' foo '");
170  CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
171  CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
172  CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
173  CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
174  CHECK_PARSE_EQ("()", "(^ %)");
175  CHECK_PARSE_EQ("(?=)", "(-> + %)");
176  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
177  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
178  CHECK_PARSE_EQ("[x]", "[x]");
179  CHECK_PARSE_EQ("[xyz]", "[x y z]");
180  CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
181  CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
182  CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
183  CHECK_PARSE_EQ("]", "']'");
184  CHECK_PARSE_EQ("}", "'}'");
185  CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
186  CHECK_PARSE_EQ("[\\d]", "[0-9]");
187  CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
188  CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
189  CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
190  CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
191  // Control character outside character class.
192  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
193  "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
194  CHECK_PARSE_EQ("\\c!", "'\\c!'");
195  CHECK_PARSE_EQ("\\c_", "'\\c_'");
196  CHECK_PARSE_EQ("\\c~", "'\\c~'");
197  CHECK_PARSE_EQ("\\c1", "'\\c1'");
198  // Control character inside character class.
199  CHECK_PARSE_EQ("[\\c!]", "[\\ c !]");
200  CHECK_PARSE_EQ("[\\c_]", "[\\x1f]");
201  CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]");
202  CHECK_PARSE_EQ("[\\ca]", "[\\x01]");
203  CHECK_PARSE_EQ("[\\cz]", "[\\x1a]");
204  CHECK_PARSE_EQ("[\\cA]", "[\\x01]");
205  CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]");
206  CHECK_PARSE_EQ("[\\c1]", "[\\x11]");
207 
208  CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
209  CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
210  CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
211  CHECK_PARSE_EQ("\\0", "'\\x00'");
212  CHECK_PARSE_EQ("\\8", "'8'");
213  CHECK_PARSE_EQ("\\9", "'9'");
214  CHECK_PARSE_EQ("\\11", "'\\x09'");
215  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
216  CHECK_PARSE_EQ("\\011", "'\\x09'");
217  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
218  CHECK_PARSE_EQ("\\118", "'\\x098'");
219  CHECK_PARSE_EQ("\\111", "'I'");
220  CHECK_PARSE_EQ("\\1111", "'I1'");
221  CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
222  CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
223  CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
224  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
225  CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
226  " (# 0 - g (<- 1)))");
227  CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
228  " (# 0 - g (<- 2)))");
229  CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
230  " (# 0 - g (<- 3)))");
231  CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
232  " (# 0 - g '\\x04'))");
233  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
234  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
235  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
236  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
237  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
238  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
239  CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
240  CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
241  CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
242  CHECK_PARSE_EQ("(?=a)?a", "'a'");
243  CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
244  CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
245  CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
246  CHECK_PARSE_EQ("(?!a)?a", "'a'");
247  CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
248  CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
249  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
250  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
251  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
252  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
253  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
254  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
255  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
256  CHECK_PARSE_EQ("[\\111]", "[I]");
257  CHECK_PARSE_EQ("[\\1111]", "[I 1]");
258  CHECK_PARSE_EQ("\\x34", "'\x34'");
259  CHECK_PARSE_EQ("\\x60", "'\x60'");
260  CHECK_PARSE_EQ("\\x3z", "'x3z'");
261  CHECK_PARSE_EQ("\\c", "'\\c'");
262  CHECK_PARSE_EQ("\\u0034", "'\x34'");
263  CHECK_PARSE_EQ("\\u003z", "'u003z'");
264  CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
265 
266  CHECK_SIMPLE("a", true);
267  CHECK_SIMPLE("a|b", false);
268  CHECK_SIMPLE("a\\n", false);
269  CHECK_SIMPLE("^a", false);
270  CHECK_SIMPLE("a$", false);
271  CHECK_SIMPLE("a\\b!", false);
272  CHECK_SIMPLE("a\\Bb", false);
273  CHECK_SIMPLE("a*", false);
274  CHECK_SIMPLE("a*?", false);
275  CHECK_SIMPLE("a?", false);
276  CHECK_SIMPLE("a??", false);
277  CHECK_SIMPLE("a{0,1}?", false);
278  CHECK_SIMPLE("a{1,1}?", false);
279  CHECK_SIMPLE("a{1,2}?", false);
280  CHECK_SIMPLE("a+?", false);
281  CHECK_SIMPLE("(a)", false);
282  CHECK_SIMPLE("(a)\\1", false);
283  CHECK_SIMPLE("(\\1a)", false);
284  CHECK_SIMPLE("\\1(a)", false);
285  CHECK_SIMPLE("a\\s", false);
286  CHECK_SIMPLE("a\\S", false);
287  CHECK_SIMPLE("a\\d", false);
288  CHECK_SIMPLE("a\\D", false);
289  CHECK_SIMPLE("a\\w", false);
290  CHECK_SIMPLE("a\\W", false);
291  CHECK_SIMPLE("a.", false);
292  CHECK_SIMPLE("a\\q", false);
293  CHECK_SIMPLE("a[a]", false);
294  CHECK_SIMPLE("a[^a]", false);
295  CHECK_SIMPLE("a[a-z]", false);
296  CHECK_SIMPLE("a[\\q]", false);
297  CHECK_SIMPLE("a(?:b)", false);
298  CHECK_SIMPLE("a(?=b)", false);
299  CHECK_SIMPLE("a(?!b)", false);
300  CHECK_SIMPLE("\\x60", false);
301  CHECK_SIMPLE("\\u0060", false);
302  CHECK_SIMPLE("\\cA", false);
303  CHECK_SIMPLE("\\q", false);
304  CHECK_SIMPLE("\\1112", false);
305  CHECK_SIMPLE("\\0", false);
306  CHECK_SIMPLE("(a)\\1", false);
307  CHECK_SIMPLE("(?=a)?a", false);
308  CHECK_SIMPLE("(?!a)?a\\1", false);
309  CHECK_SIMPLE("(?:(?=a))a\\1", false);
310 
311  CHECK_PARSE_EQ("a{}", "'a{}'");
312  CHECK_PARSE_EQ("a{,}", "'a{,}'");
313  CHECK_PARSE_EQ("a{", "'a{'");
314  CHECK_PARSE_EQ("a{z}", "'a{z}'");
315  CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
316  CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
317  CHECK_PARSE_EQ("a{12,", "'a{12,'");
318  CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
319  CHECK_PARSE_EQ("{}", "'{}'");
320  CHECK_PARSE_EQ("{,}", "'{,}'");
321  CHECK_PARSE_EQ("{", "'{'");
322  CHECK_PARSE_EQ("{z}", "'{z}'");
323  CHECK_PARSE_EQ("{1z}", "'{1z}'");
324  CHECK_PARSE_EQ("{12z}", "'{12z}'");
325  CHECK_PARSE_EQ("{12,", "'{12,'");
326  CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
327 
328  CHECK_MIN_MAX("a", 1, 1);
329  CHECK_MIN_MAX("abc", 3, 3);
330  CHECK_MIN_MAX("a[bc]d", 3, 3);
331  CHECK_MIN_MAX("a|bc", 1, 2);
332  CHECK_MIN_MAX("ab|c", 1, 2);
333  CHECK_MIN_MAX("a||bc", 0, 2);
334  CHECK_MIN_MAX("|", 0, 0);
335  CHECK_MIN_MAX("(?:ab)", 2, 2);
336  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
337  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
338  CHECK_MIN_MAX("(ab)", 2, 2);
339  CHECK_MIN_MAX("(ab|cde)", 2, 3);
340  CHECK_MIN_MAX("(ab)\\1", 2, 4);
341  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
342  CHECK_MIN_MAX("(?:ab)?", 0, 2);
343  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
344  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
345  CHECK_MIN_MAX("a?", 0, 1);
348  CHECK_MIN_MAX("a??", 0, 1);
351  CHECK_MIN_MAX("(?:a?)?", 0, 1);
352  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
353  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
354  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
355  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
356  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
357  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
358  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
359  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
360  CHECK_MIN_MAX("a{0}", 0, 0);
361  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
362  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
367  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
368  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
369  CHECK_MIN_MAX("a\\bc", 2, 2);
370  CHECK_MIN_MAX("a\\Bc", 2, 2);
371  CHECK_MIN_MAX("a\\sc", 3, 3);
372  CHECK_MIN_MAX("a\\Sc", 3, 3);
373  CHECK_MIN_MAX("a(?=b)c", 2, 2);
374  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
375  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
376 }
377 
378 TEST(ParserRegression) {
379  CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
380  CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
381  CHECK_PARSE_EQ("{", "'{'");
382  CHECK_PARSE_EQ("a|", "(| 'a' %)");
383 }
384 
385 static void ExpectError(const char* input,
386  const char* expected) {
388  v8::HandleScope scope;
389  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
390  FlatStringReader reader(Isolate::Current(), CStrVector(input));
391  RegExpCompileData result;
392  CHECK(!v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
393  CHECK(result.tree == NULL);
394  CHECK(!result.error.is_null());
395  SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
396  CHECK_EQ(expected, *str);
397 }
398 
399 
400 TEST(Errors) {
402  const char* kEndBackslash = "\\ at end of pattern";
403  ExpectError("\\", kEndBackslash);
404  const char* kUnterminatedGroup = "Unterminated group";
405  ExpectError("(foo", kUnterminatedGroup);
406  const char* kInvalidGroup = "Invalid group";
407  ExpectError("(?", kInvalidGroup);
408  const char* kUnterminatedCharacterClass = "Unterminated character class";
409  ExpectError("[", kUnterminatedCharacterClass);
410  ExpectError("[a-", kUnterminatedCharacterClass);
411  const char* kNothingToRepeat = "Nothing to repeat";
412  ExpectError("*", kNothingToRepeat);
413  ExpectError("?", kNothingToRepeat);
414  ExpectError("+", kNothingToRepeat);
415  ExpectError("{1}", kNothingToRepeat);
416  ExpectError("{1,2}", kNothingToRepeat);
417  ExpectError("{1,}", kNothingToRepeat);
418 
419  // Check that we don't allow more than kMaxCapture captures
420  const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
421  const char* kTooManyCaptures = "Too many captures";
422  HeapStringAllocator allocator;
423  StringStream accumulator(&allocator);
424  for (int i = 0; i <= kMaxCaptures; i++) {
425  accumulator.Add("()");
426  }
427  SmartArrayPointer<const char> many_captures(accumulator.ToCString());
428  ExpectError(*many_captures, kTooManyCaptures);
429 }
430 
431 
432 static bool IsDigit(uc16 c) {
433  return ('0' <= c && c <= '9');
434 }
435 
436 
437 static bool NotDigit(uc16 c) {
438  return !IsDigit(c);
439 }
440 
441 
442 static bool IsWhiteSpace(uc16 c) {
443  switch (c) {
444  case 0x09:
445  case 0x0A:
446  case 0x0B:
447  case 0x0C:
448  case 0x0d:
449  case 0x20:
450  case 0xA0:
451  case 0x2028:
452  case 0x2029:
453  case 0xFEFF:
454  return true;
455  default:
456  return unibrow::Space::Is(c);
457  }
458 }
459 
460 
461 static bool NotWhiteSpace(uc16 c) {
462  return !IsWhiteSpace(c);
463 }
464 
465 
466 static bool NotWord(uc16 c) {
467  return !IsRegExpWord(c);
468 }
469 
470 
471 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
472  ZoneScope scope(Isolate::Current(), DELETE_ON_EXIT);
473  Zone* zone = Isolate::Current()->zone();
474  ZoneList<CharacterRange>* ranges =
475  new(zone) ZoneList<CharacterRange>(2, zone);
476  CharacterRange::AddClassEscape(c, ranges, zone);
477  for (unsigned i = 0; i < (1 << 16); i++) {
478  bool in_class = false;
479  for (int j = 0; !in_class && j < ranges->length(); j++) {
480  CharacterRange& range = ranges->at(j);
481  in_class = (range.from() <= i && i <= range.to());
482  }
483  CHECK_EQ(pred(i), in_class);
484  }
485 }
486 
487 
488 TEST(CharacterClassEscapes) {
490  TestCharacterClassEscapes('.', IsRegExpNewline);
491  TestCharacterClassEscapes('d', IsDigit);
492  TestCharacterClassEscapes('D', NotDigit);
493  TestCharacterClassEscapes('s', IsWhiteSpace);
494  TestCharacterClassEscapes('S', NotWhiteSpace);
495  TestCharacterClassEscapes('w', IsRegExpWord);
496  TestCharacterClassEscapes('W', NotWord);
497 }
498 
499 
500 static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
502  Isolate* isolate = Isolate::Current();
503  FlatStringReader reader(isolate, CStrVector(input));
504  RegExpCompileData compile_data;
505  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
506  &compile_data))
507  return NULL;
508  Handle<String> pattern = isolate->factory()->
509  NewStringFromUtf8(CStrVector(input));
510  Handle<String> sample_subject =
511  isolate->factory()->NewStringFromUtf8(CStrVector(""));
512  RegExpEngine::Compile(&compile_data,
513  false,
514  false,
515  multiline,
516  pattern,
517  sample_subject,
518  is_ascii,
519  isolate->zone());
520  return compile_data.node;
521 }
522 
523 
524 static void Execute(const char* input,
525  bool multiline,
526  bool is_ascii,
527  bool dot_output = false) {
528  v8::HandleScope scope;
529  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
530  RegExpNode* node = Compile(input, multiline, is_ascii);
531  USE(node);
532 #ifdef DEBUG
533  if (dot_output) {
534  RegExpEngine::DotPrint(input, node, false);
535  exit(0);
536  }
537 #endif // DEBUG
538 }
539 
540 
541 class TestConfig {
542  public:
543  typedef int Key;
544  typedef int Value;
545  static const int kNoKey;
546  static int NoValue() { return 0; }
547  static inline int Compare(int a, int b) {
548  if (a < b)
549  return -1;
550  else if (a > b)
551  return 1;
552  else
553  return 0;
554  }
555 };
556 
557 
558 const int TestConfig::kNoKey = 0;
559 
560 
561 static unsigned PseudoRandom(int i, int j) {
562  return ~(~((i * 781) ^ (j * 329)));
563 }
564 
565 
566 TEST(SplayTreeSimple) {
568  static const unsigned kLimit = 1000;
569  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
570  ZoneSplayTree<TestConfig> tree(Isolate::Current()->zone());
571  bool seen[kLimit];
572  for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
573 #define CHECK_MAPS_EQUAL() do { \
574  for (unsigned k = 0; k < kLimit; k++) \
575  CHECK_EQ(seen[k], tree.Find(k, &loc)); \
576  } while (false)
577  for (int i = 0; i < 50; i++) {
578  for (int j = 0; j < 50; j++) {
579  unsigned next = PseudoRandom(i, j) % kLimit;
580  if (seen[next]) {
581  // We've already seen this one. Check the value and remove
582  // it.
584  CHECK(tree.Find(next, &loc));
585  CHECK_EQ(next, loc.key());
586  CHECK_EQ(3 * next, loc.value());
587  tree.Remove(next);
588  seen[next] = false;
590  } else {
591  // Check that it wasn't there already and then add it.
593  CHECK(!tree.Find(next, &loc));
594  CHECK(tree.Insert(next, &loc));
595  CHECK_EQ(next, loc.key());
596  loc.set_value(3 * next);
597  seen[next] = true;
599  }
600  int val = PseudoRandom(j, i) % kLimit;
601  if (seen[val]) {
603  CHECK(tree.FindGreatestLessThan(val, &loc));
604  CHECK_EQ(loc.key(), val);
605  break;
606  }
607  val = PseudoRandom(i + j, i - j) % kLimit;
608  if (seen[val]) {
610  CHECK(tree.FindLeastGreaterThan(val, &loc));
611  CHECK_EQ(loc.key(), val);
612  break;
613  }
614  }
615  }
616 }
617 
618 
619 TEST(DispatchTableConstruction) {
621  // Initialize test data.
622  static const int kLimit = 1000;
623  static const int kRangeCount = 8;
624  static const int kRangeSize = 16;
625  uc16 ranges[kRangeCount][2 * kRangeSize];
626  for (int i = 0; i < kRangeCount; i++) {
627  Vector<uc16> range(ranges[i], 2 * kRangeSize);
628  for (int j = 0; j < 2 * kRangeSize; j++) {
629  range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
630  }
631  range.Sort();
632  for (int j = 1; j < 2 * kRangeSize; j++) {
633  CHECK(range[j-1] <= range[j]);
634  }
635  }
636  // Enter test data into dispatch table.
637  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
638  DispatchTable table(Isolate::Current()->zone());
639  for (int i = 0; i < kRangeCount; i++) {
640  uc16* range = ranges[i];
641  for (int j = 0; j < 2 * kRangeSize; j += 2)
642  table.AddRange(CharacterRange(range[j], range[j + 1]), i,
643  Isolate::Current()->zone());
644  }
645  // Check that the table looks as we would expect
646  for (int p = 0; p < kLimit; p++) {
647  OutSet* outs = table.Get(p);
648  for (int j = 0; j < kRangeCount; j++) {
649  uc16* range = ranges[j];
650  bool is_on = false;
651  for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
652  is_on = (range[k] <= p && p <= range[k + 1]);
653  CHECK_EQ(is_on, outs->Get(j));
654  }
655  }
656 }
657 
658 // Test of debug-only syntax.
659 #ifdef DEBUG
660 
661 TEST(ParsePossessiveRepetition) {
662  bool old_flag_value = FLAG_regexp_possessive_quantifier;
663 
664  // Enable possessive quantifier syntax.
665  FLAG_regexp_possessive_quantifier = true;
666 
667  CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')");
668  CHECK_PARSE_EQ("a++", "(# 1 - p 'a')");
669  CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')");
670  CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')");
671  CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
672 
673  // Disable possessive quantifier syntax.
674  FLAG_regexp_possessive_quantifier = false;
675 
676  CHECK_PARSE_ERROR("a*+");
677  CHECK_PARSE_ERROR("a++");
678  CHECK_PARSE_ERROR("a?+");
679  CHECK_PARSE_ERROR("a{10,20}+");
680  CHECK_PARSE_ERROR("a{10,20}+b");
681 
682  FLAG_regexp_possessive_quantifier = old_flag_value;
683 }
684 
685 #endif
686 
687 // Tests of interpreter.
688 
689 
690 #ifndef V8_INTERPRETED_REGEXP
691 
692 #if V8_TARGET_ARCH_IA32
693 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
694 #elif V8_TARGET_ARCH_X64
695 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
696 #elif V8_TARGET_ARCH_ARM
697 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
698 #elif V8_TARGET_ARCH_MIPS
699 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
700 #endif
701 
703  public:
705  : env_(), scope_(), zone_(Isolate::Current(), DELETE_ON_EXIT) {
706  env_ = v8::Context::New();
707  env_->Enter();
708  }
710  env_->Exit();
711  env_.Dispose();
712  }
713  private:
715  v8::HandleScope scope_;
716  v8::internal::ZoneScope zone_;
717 };
718 
719 
720 static ArchRegExpMacroAssembler::Result Execute(Code* code,
721  String* input,
722  int start_offset,
723  const byte* input_start,
724  const byte* input_end,
725  int* captures) {
727  code,
728  input,
729  start_offset,
730  input_start,
731  input_end,
732  captures,
733  0,
734  Isolate::Current());
735 }
736 
737 
738 TEST(MacroAssemblerNativeSuccess) {
740  ContextInitializer initializer;
741  Factory* factory = Isolate::Current()->factory();
742 
743  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
744  Isolate::Current()->zone());
745 
746  m.Succeed();
747 
748  Handle<String> source = factory->NewStringFromAscii(CStrVector(""));
749  Handle<Object> code_object = m.GetCode(source);
750  Handle<Code> code = Handle<Code>::cast(code_object);
751 
752  int captures[4] = {42, 37, 87, 117};
753  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
755  const byte* start_adr =
756  reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
757 
759  Execute(*code,
760  *input,
761  0,
762  start_adr,
763  start_adr + seq_input->length(),
764  captures);
765 
767  CHECK_EQ(-1, captures[0]);
768  CHECK_EQ(-1, captures[1]);
769  CHECK_EQ(-1, captures[2]);
770  CHECK_EQ(-1, captures[3]);
771 }
772 
773 
774 TEST(MacroAssemblerNativeSimple) {
776  ContextInitializer initializer;
777  Factory* factory = Isolate::Current()->factory();
778 
779  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
780  Isolate::Current()->zone());
781 
782  uc16 foo_chars[3] = {'f', 'o', 'o'};
783  Vector<const uc16> foo(foo_chars, 3);
784 
785  Label fail;
786  m.CheckCharacters(foo, 0, &fail, true);
787  m.WriteCurrentPositionToRegister(0, 0);
788  m.AdvanceCurrentPosition(3);
789  m.WriteCurrentPositionToRegister(1, 0);
790  m.Succeed();
791  m.Bind(&fail);
792  m.Fail();
793 
794  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
795  Handle<Object> code_object = m.GetCode(source);
796  Handle<Code> code = Handle<Code>::cast(code_object);
797 
798  int captures[4] = {42, 37, 87, 117};
799  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
801  Address start_adr = seq_input->GetCharsAddress();
802 
804  Execute(*code,
805  *input,
806  0,
807  start_adr,
808  start_adr + input->length(),
809  captures);
810 
812  CHECK_EQ(0, captures[0]);
813  CHECK_EQ(3, captures[1]);
814  CHECK_EQ(-1, captures[2]);
815  CHECK_EQ(-1, captures[3]);
816 
817  input = factory->NewStringFromAscii(CStrVector("barbarbar"));
818  seq_input = Handle<SeqAsciiString>::cast(input);
819  start_adr = seq_input->GetCharsAddress();
820 
821  result = Execute(*code,
822  *input,
823  0,
824  start_adr,
825  start_adr + input->length(),
826  captures);
827 
829 }
830 
831 
832 TEST(MacroAssemblerNativeSimpleUC16) {
834  ContextInitializer initializer;
835  Factory* factory = Isolate::Current()->factory();
836 
837  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
838  Isolate::Current()->zone());
839 
840  uc16 foo_chars[3] = {'f', 'o', 'o'};
841  Vector<const uc16> foo(foo_chars, 3);
842 
843  Label fail;
844  m.CheckCharacters(foo, 0, &fail, true);
845  m.WriteCurrentPositionToRegister(0, 0);
846  m.AdvanceCurrentPosition(3);
847  m.WriteCurrentPositionToRegister(1, 0);
848  m.Succeed();
849  m.Bind(&fail);
850  m.Fail();
851 
852  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
853  Handle<Object> code_object = m.GetCode(source);
854  Handle<Code> code = Handle<Code>::cast(code_object);
855 
856  int captures[4] = {42, 37, 87, 117};
857  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
858  static_cast<uc16>('\xa0')};
859  Handle<String> input =
860  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
862  Address start_adr = seq_input->GetCharsAddress();
863 
865  Execute(*code,
866  *input,
867  0,
868  start_adr,
869  start_adr + input->length(),
870  captures);
871 
873  CHECK_EQ(0, captures[0]);
874  CHECK_EQ(3, captures[1]);
875  CHECK_EQ(-1, captures[2]);
876  CHECK_EQ(-1, captures[3]);
877 
878  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
879  static_cast<uc16>('\xa0')};
880  input = factory->NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
881  seq_input = Handle<SeqTwoByteString>::cast(input);
882  start_adr = seq_input->GetCharsAddress();
883 
884  result = Execute(*code,
885  *input,
886  0,
887  start_adr,
888  start_adr + input->length() * 2,
889  captures);
890 
892 }
893 
894 
895 TEST(MacroAssemblerNativeBacktrack) {
897  ContextInitializer initializer;
898  Factory* factory = Isolate::Current()->factory();
899 
900  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
901  Isolate::Current()->zone());
902 
903  Label fail;
904  Label backtrack;
905  m.LoadCurrentCharacter(10, &fail);
906  m.Succeed();
907  m.Bind(&fail);
908  m.PushBacktrack(&backtrack);
909  m.LoadCurrentCharacter(10, NULL);
910  m.Succeed();
911  m.Bind(&backtrack);
912  m.Fail();
913 
914  Handle<String> source = factory->NewStringFromAscii(CStrVector(".........."));
915  Handle<Object> code_object = m.GetCode(source);
916  Handle<Code> code = Handle<Code>::cast(code_object);
917 
918  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
920  Address start_adr = seq_input->GetCharsAddress();
921 
923  Execute(*code,
924  *input,
925  0,
926  start_adr,
927  start_adr + input->length(),
928  NULL);
929 
931 }
932 
933 
934 TEST(MacroAssemblerNativeBackReferenceASCII) {
936  ContextInitializer initializer;
937  Factory* factory = Isolate::Current()->factory();
938 
939  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
940  Isolate::Current()->zone());
941 
942  m.WriteCurrentPositionToRegister(0, 0);
943  m.AdvanceCurrentPosition(2);
944  m.WriteCurrentPositionToRegister(1, 0);
945  Label nomatch;
946  m.CheckNotBackReference(0, &nomatch);
947  m.Fail();
948  m.Bind(&nomatch);
949  m.AdvanceCurrentPosition(2);
950  Label missing_match;
951  m.CheckNotBackReference(0, &missing_match);
952  m.WriteCurrentPositionToRegister(2, 0);
953  m.Succeed();
954  m.Bind(&missing_match);
955  m.Fail();
956 
957  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
958  Handle<Object> code_object = m.GetCode(source);
959  Handle<Code> code = Handle<Code>::cast(code_object);
960 
961  Handle<String> input = factory->NewStringFromAscii(CStrVector("fooofo"));
963  Address start_adr = seq_input->GetCharsAddress();
964 
965  int output[4];
967  Execute(*code,
968  *input,
969  0,
970  start_adr,
971  start_adr + input->length(),
972  output);
973 
975  CHECK_EQ(0, output[0]);
976  CHECK_EQ(2, output[1]);
977  CHECK_EQ(6, output[2]);
978  CHECK_EQ(-1, output[3]);
979 }
980 
981 
982 TEST(MacroAssemblerNativeBackReferenceUC16) {
984  ContextInitializer initializer;
985  Factory* factory = Isolate::Current()->factory();
986 
987  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
988  Isolate::Current()->zone());
989 
990  m.WriteCurrentPositionToRegister(0, 0);
991  m.AdvanceCurrentPosition(2);
992  m.WriteCurrentPositionToRegister(1, 0);
993  Label nomatch;
994  m.CheckNotBackReference(0, &nomatch);
995  m.Fail();
996  m.Bind(&nomatch);
997  m.AdvanceCurrentPosition(2);
998  Label missing_match;
999  m.CheckNotBackReference(0, &missing_match);
1000  m.WriteCurrentPositionToRegister(2, 0);
1001  m.Succeed();
1002  m.Bind(&missing_match);
1003  m.Fail();
1004 
1005  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
1006  Handle<Object> code_object = m.GetCode(source);
1007  Handle<Code> code = Handle<Code>::cast(code_object);
1008 
1009  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1010  Handle<String> input =
1011  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
1013  Address start_adr = seq_input->GetCharsAddress();
1014 
1015  int output[4];
1017  Execute(*code,
1018  *input,
1019  0,
1020  start_adr,
1021  start_adr + input->length() * 2,
1022  output);
1023 
1025  CHECK_EQ(0, output[0]);
1026  CHECK_EQ(2, output[1]);
1027  CHECK_EQ(6, output[2]);
1028  CHECK_EQ(-1, output[3]);
1029 }
1030 
1031 
1032 
1033 TEST(MacroAssemblernativeAtStart) {
1035  ContextInitializer initializer;
1036  Factory* factory = Isolate::Current()->factory();
1037 
1038  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
1039  Isolate::Current()->zone());
1040 
1041  Label not_at_start, newline, fail;
1042  m.CheckNotAtStart(&not_at_start);
1043  // Check that prevchar = '\n' and current = 'f'.
1044  m.CheckCharacter('\n', &newline);
1045  m.Bind(&fail);
1046  m.Fail();
1047  m.Bind(&newline);
1048  m.LoadCurrentCharacter(0, &fail);
1049  m.CheckNotCharacter('f', &fail);
1050  m.Succeed();
1051 
1052  m.Bind(&not_at_start);
1053  // Check that prevchar = 'o' and current = 'b'.
1054  Label prevo;
1055  m.CheckCharacter('o', &prevo);
1056  m.Fail();
1057  m.Bind(&prevo);
1058  m.LoadCurrentCharacter(0, &fail);
1059  m.CheckNotCharacter('b', &fail);
1060  m.Succeed();
1061 
1062  Handle<String> source = factory->NewStringFromAscii(CStrVector("(^f|ob)"));
1063  Handle<Object> code_object = m.GetCode(source);
1064  Handle<Code> code = Handle<Code>::cast(code_object);
1065 
1066  Handle<String> input = factory->NewStringFromAscii(CStrVector("foobar"));
1068  Address start_adr = seq_input->GetCharsAddress();
1069 
1071  Execute(*code,
1072  *input,
1073  0,
1074  start_adr,
1075  start_adr + input->length(),
1076  NULL);
1077 
1079 
1080  result = Execute(*code,
1081  *input,
1082  3,
1083  start_adr + 3,
1084  start_adr + input->length(),
1085  NULL);
1086 
1088 }
1089 
1090 
1091 TEST(MacroAssemblerNativeBackRefNoCase) {
1093  ContextInitializer initializer;
1094  Factory* factory = Isolate::Current()->factory();
1095 
1096  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
1097  Isolate::Current()->zone());
1098 
1099  Label fail, succ;
1100 
1101  m.WriteCurrentPositionToRegister(0, 0);
1102  m.WriteCurrentPositionToRegister(2, 0);
1103  m.AdvanceCurrentPosition(3);
1104  m.WriteCurrentPositionToRegister(3, 0);
1105  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1106  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1107  Label expected_fail;
1108  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1109  m.Bind(&fail);
1110  m.Fail();
1111 
1112  m.Bind(&expected_fail);
1113  m.AdvanceCurrentPosition(3); // Skip "xYz"
1114  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1115  m.Fail();
1116 
1117  m.Bind(&succ);
1118  m.WriteCurrentPositionToRegister(1, 0);
1119  m.Succeed();
1120 
1121  Handle<String> source =
1122  factory->NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1123  Handle<Object> code_object = m.GetCode(source);
1124  Handle<Code> code = Handle<Code>::cast(code_object);
1125 
1126  Handle<String> input =
1127  factory->NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1129  Address start_adr = seq_input->GetCharsAddress();
1130 
1131  int output[4];
1133  Execute(*code,
1134  *input,
1135  0,
1136  start_adr,
1137  start_adr + input->length(),
1138  output);
1139 
1141  CHECK_EQ(0, output[0]);
1142  CHECK_EQ(12, output[1]);
1143  CHECK_EQ(0, output[2]);
1144  CHECK_EQ(3, output[3]);
1145 }
1146 
1147 
1148 
1149 TEST(MacroAssemblerNativeRegisters) {
1151  ContextInitializer initializer;
1152  Factory* factory = Isolate::Current()->factory();
1153 
1154  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6,
1155  Isolate::Current()->zone());
1156 
1157  uc16 foo_chars[3] = {'f', 'o', 'o'};
1158  Vector<const uc16> foo(foo_chars, 3);
1159 
1160  enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1161  Label fail;
1162  Label backtrack;
1163  m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1164  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1165  m.PushBacktrack(&backtrack);
1166  m.WriteStackPointerToRegister(sp);
1167  // Fill stack and registers
1168  m.AdvanceCurrentPosition(2);
1169  m.WriteCurrentPositionToRegister(out1, 0);
1170  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1171  m.PushBacktrack(&fail);
1172  // Drop backtrack stack frames.
1173  m.ReadStackPointerFromRegister(sp);
1174  // And take the first backtrack (to &backtrack)
1175  m.Backtrack();
1176 
1177  m.PushCurrentPosition();
1178  m.AdvanceCurrentPosition(2);
1179  m.PopCurrentPosition();
1180 
1181  m.Bind(&backtrack);
1182  m.PopRegister(out1);
1183  m.ReadCurrentPositionFromRegister(out1);
1184  m.AdvanceCurrentPosition(3);
1185  m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1186 
1187  Label loop;
1188  m.SetRegister(loop_cnt, 0); // loop counter
1189  m.Bind(&loop);
1190  m.AdvanceRegister(loop_cnt, 1);
1191  m.AdvanceCurrentPosition(1);
1192  m.IfRegisterLT(loop_cnt, 3, &loop);
1193  m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1194 
1195  Label loop2;
1196  m.SetRegister(loop_cnt, 2); // loop counter
1197  m.Bind(&loop2);
1198  m.AdvanceRegister(loop_cnt, -1);
1199  m.AdvanceCurrentPosition(1);
1200  m.IfRegisterGE(loop_cnt, 0, &loop2);
1201  m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1202 
1203  Label loop3;
1204  Label exit_loop3;
1205  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1206  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1207  m.ReadCurrentPositionFromRegister(out3);
1208  m.Bind(&loop3);
1209  m.AdvanceCurrentPosition(1);
1210  m.CheckGreedyLoop(&exit_loop3);
1211  m.GoTo(&loop3);
1212  m.Bind(&exit_loop3);
1213  m.PopCurrentPosition();
1214  m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1215 
1216  m.Succeed();
1217 
1218  m.Bind(&fail);
1219  m.Fail();
1220 
1221  Handle<String> source =
1222  factory->NewStringFromAscii(CStrVector("<loop test>"));
1223  Handle<Object> code_object = m.GetCode(source);
1224  Handle<Code> code = Handle<Code>::cast(code_object);
1225 
1226  // String long enough for test (content doesn't matter).
1227  Handle<String> input =
1228  factory->NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1230  Address start_adr = seq_input->GetCharsAddress();
1231 
1232  int output[6];
1234  Execute(*code,
1235  *input,
1236  0,
1237  start_adr,
1238  start_adr + input->length(),
1239  output);
1240 
1242  CHECK_EQ(0, output[0]);
1243  CHECK_EQ(3, output[1]);
1244  CHECK_EQ(6, output[2]);
1245  CHECK_EQ(9, output[3]);
1246  CHECK_EQ(9, output[4]);
1247  CHECK_EQ(-1, output[5]);
1248 }
1249 
1250 
1251 TEST(MacroAssemblerStackOverflow) {
1253  ContextInitializer initializer;
1254  Isolate* isolate = Isolate::Current();
1255  Factory* factory = isolate->factory();
1256 
1257  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0,
1258  Isolate::Current()->zone());
1259 
1260  Label loop;
1261  m.Bind(&loop);
1262  m.PushBacktrack(&loop);
1263  m.GoTo(&loop);
1264 
1265  Handle<String> source =
1266  factory->NewStringFromAscii(CStrVector("<stack overflow test>"));
1267  Handle<Object> code_object = m.GetCode(source);
1268  Handle<Code> code = Handle<Code>::cast(code_object);
1269 
1270  // String long enough for test (content doesn't matter).
1271  Handle<String> input =
1272  factory->NewStringFromAscii(CStrVector("dummy"));
1274  Address start_adr = seq_input->GetCharsAddress();
1275 
1277  Execute(*code,
1278  *input,
1279  0,
1280  start_adr,
1281  start_adr + input->length(),
1282  NULL);
1283 
1285  CHECK(isolate->has_pending_exception());
1286  isolate->clear_pending_exception();
1287 }
1288 
1289 
1290 TEST(MacroAssemblerNativeLotsOfRegisters) {
1292  ContextInitializer initializer;
1293  Isolate* isolate = Isolate::Current();
1294  Factory* factory = isolate->factory();
1295 
1296  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2,
1297  Isolate::Current()->zone());
1298 
1299  // At least 2048, to ensure the allocated space for registers
1300  // span one full page.
1301  const int large_number = 8000;
1302  m.WriteCurrentPositionToRegister(large_number, 42);
1303  m.WriteCurrentPositionToRegister(0, 0);
1304  m.WriteCurrentPositionToRegister(1, 1);
1305  Label done;
1306  m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1307  m.Bind(&done);
1308  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1309  m.PopRegister(1);
1310  m.Succeed();
1311 
1312  Handle<String> source =
1313  factory->NewStringFromAscii(CStrVector("<huge register space test>"));
1314  Handle<Object> code_object = m.GetCode(source);
1315  Handle<Code> code = Handle<Code>::cast(code_object);
1316 
1317  // String long enough for test (content doesn't matter).
1318  Handle<String> input =
1319  factory->NewStringFromAscii(CStrVector("sample text"));
1321  Address start_adr = seq_input->GetCharsAddress();
1322 
1323  int captures[2];
1325  Execute(*code,
1326  *input,
1327  0,
1328  start_adr,
1329  start_adr + input->length(),
1330  captures);
1331 
1333  CHECK_EQ(0, captures[0]);
1334  CHECK_EQ(42, captures[1]);
1335 
1336  isolate->clear_pending_exception();
1337 }
1338 
1339 #else // V8_INTERPRETED_REGEXP
1340 
1343  byte codes[1024];
1344  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
1345  // ^f(o)o.
1346  Label fail, fail2, start;
1347  uc16 foo_chars[3];
1348  foo_chars[0] = 'f';
1349  foo_chars[1] = 'o';
1350  foo_chars[2] = 'o';
1351  Vector<const uc16> foo(foo_chars, 3);
1352  m.SetRegister(4, 42);
1353  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1354  m.AdvanceRegister(4, 42);
1355  m.GoTo(&start);
1356  m.Fail();
1357  m.Bind(&start);
1358  m.PushBacktrack(&fail2);
1359  m.CheckCharacters(foo, 0, &fail, true);
1360  m.WriteCurrentPositionToRegister(0, 0);
1361  m.PushCurrentPosition();
1362  m.AdvanceCurrentPosition(3);
1363  m.WriteCurrentPositionToRegister(1, 0);
1364  m.PopCurrentPosition();
1365  m.AdvanceCurrentPosition(1);
1366  m.WriteCurrentPositionToRegister(2, 0);
1367  m.AdvanceCurrentPosition(1);
1368  m.WriteCurrentPositionToRegister(3, 0);
1369  m.Succeed();
1370 
1371  m.Bind(&fail);
1372  m.Backtrack();
1373  m.Succeed();
1374 
1375  m.Bind(&fail2);
1376  m.PopRegister(0);
1377  m.Fail();
1378 
1379  Isolate* isolate = Isolate::Current();
1380  Factory* factory = isolate->factory();
1381  HandleScope scope(isolate);
1382 
1383  Handle<String> source = factory->NewStringFromAscii(CStrVector("^f(o)o"));
1384  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1385  int captures[5];
1386 
1387  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1388  Handle<String> f1_16 =
1389  factory->NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1390 
1391  CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1392  CHECK_EQ(0, captures[0]);
1393  CHECK_EQ(3, captures[1]);
1394  CHECK_EQ(1, captures[2]);
1395  CHECK_EQ(2, captures[3]);
1396  CHECK_EQ(84, captures[4]);
1397 
1398  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1399  Handle<String> f2_16 =
1400  factory->NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1401 
1402  CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1403  CHECK_EQ(42, captures[0]);
1404 }
1405 
1406 #endif // V8_INTERPRETED_REGEXP
1407 
1408 
1409 TEST(AddInverseToTable) {
1411  static const int kLimit = 1000;
1412  static const int kRangeCount = 16;
1413  for (int t = 0; t < 10; t++) {
1414  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
1415  Zone* zone = Isolate::Current()->zone();
1416  ZoneList<CharacterRange>* ranges =
1417  new(zone)
1418  ZoneList<CharacterRange>(kRangeCount, zone);
1419  for (int i = 0; i < kRangeCount; i++) {
1420  int from = PseudoRandom(t + 87, i + 25) % kLimit;
1421  int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1422  if (to > kLimit) to = kLimit;
1423  ranges->Add(CharacterRange(from, to), zone);
1424  }
1425  DispatchTable table(zone);
1426  DispatchTableConstructor cons(&table, false, Isolate::Current()->zone());
1427  cons.set_choice_index(0);
1428  cons.AddInverse(ranges);
1429  for (int i = 0; i < kLimit; i++) {
1430  bool is_on = false;
1431  for (int j = 0; !is_on && j < kRangeCount; j++)
1432  is_on = ranges->at(j).Contains(i);
1433  OutSet* set = table.Get(i);
1434  CHECK_EQ(is_on, set->Get(0) == false);
1435  }
1436  }
1437  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
1438  Zone* zone = Isolate::Current()->zone();
1439  ZoneList<CharacterRange>* ranges =
1440  new(zone) ZoneList<CharacterRange>(1, zone);
1441  ranges->Add(CharacterRange(0xFFF0, 0xFFFE), zone);
1442  DispatchTable table(zone);
1443  DispatchTableConstructor cons(&table, false, Isolate::Current()->zone());
1444  cons.set_choice_index(0);
1445  cons.AddInverse(ranges);
1446  CHECK(!table.Get(0xFFFE)->Get(0));
1447  CHECK(table.Get(0xFFFF)->Get(0));
1448 }
1449 
1450 
1451 static uc32 canonicalize(uc32 c) {
1453  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1454  if (count == 0) {
1455  return c;
1456  } else {
1457  CHECK_EQ(1, count);
1458  return canon[0];
1459  }
1460 }
1461 
1462 
1463 TEST(LatinCanonicalize) {
1465  for (char lower = 'a'; lower <= 'z'; lower++) {
1466  char upper = lower + ('A' - 'a');
1467  CHECK_EQ(canonicalize(lower), canonicalize(upper));
1469  int length = un_canonicalize.get(lower, '\0', uncanon);
1470  CHECK_EQ(2, length);
1471  CHECK_EQ(upper, uncanon[0]);
1472  CHECK_EQ(lower, uncanon[1]);
1473  }
1474  for (uc32 c = 128; c < (1 << 21); c++)
1475  CHECK_GE(canonicalize(c), 128);
1477  // Canonicalization is only defined for the Basic Multilingual Plane.
1478  for (uc32 c = 0; c < (1 << 16); c++) {
1480  int length = to_upper.get(c, '\0', upper);
1481  if (length == 0) {
1482  length = 1;
1483  upper[0] = c;
1484  }
1485  uc32 u = upper[0];
1486  if (length > 1 || (c >= 128 && u < 128))
1487  u = c;
1488  CHECK_EQ(u, canonicalize(c));
1489  }
1490 }
1491 
1492 
1493 static uc32 CanonRangeEnd(uc32 c) {
1495  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1496  if (count == 0) {
1497  return c;
1498  } else {
1499  CHECK_EQ(1, count);
1500  return canon[0];
1501  }
1502 }
1503 
1504 
1505 TEST(RangeCanonicalization) {
1506  // Check that we arrive at the same result when using the basic
1507  // range canonicalization primitives as when using immediate
1508  // canonicalization.
1510  int block_start = 0;
1511  while (block_start <= 0xFFFF) {
1512  uc32 block_end = CanonRangeEnd(block_start);
1513  unsigned block_length = block_end - block_start + 1;
1514  if (block_length > 1) {
1516  int first_length = un_canonicalize.get(block_start, '\0', first);
1517  for (unsigned i = 1; i < block_length; i++) {
1519  int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1520  CHECK_EQ(first_length, succ_length);
1521  for (int j = 0; j < succ_length; j++) {
1522  int calc = first[j] + i;
1523  int found = succ[j];
1524  CHECK_EQ(calc, found);
1525  }
1526  }
1527  }
1528  block_start = block_start + block_length;
1529  }
1530 }
1531 
1532 
1533 TEST(UncanonicalizeEquivalence) {
1536  for (int i = 0; i < (1 << 16); i++) {
1537  int length = un_canonicalize.get(i, '\0', chars);
1538  for (int j = 0; j < length; j++) {
1540  int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1541  CHECK_EQ(length, length2);
1542  for (int k = 0; k < length; k++)
1543  CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1544  }
1545  }
1546 }
1547 
1548 
1549 static void TestRangeCaseIndependence(CharacterRange input,
1550  Vector<CharacterRange> expected) {
1551  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
1552  Zone* zone = Isolate::Current()->zone();
1553  int count = expected.length();
1554  ZoneList<CharacterRange>* list =
1555  new(zone) ZoneList<CharacterRange>(count, zone);
1556  input.AddCaseEquivalents(list, false, zone);
1557  CHECK_EQ(count, list->length());
1558  for (int i = 0; i < list->length(); i++) {
1559  CHECK_EQ(expected[i].from(), list->at(i).from());
1560  CHECK_EQ(expected[i].to(), list->at(i).to());
1561  }
1562 }
1563 
1564 
1565 static void TestSimpleRangeCaseIndependence(CharacterRange input,
1566  CharacterRange expected) {
1568  vector[0] = expected;
1569  TestRangeCaseIndependence(input, vector);
1570 }
1571 
1572 
1573 TEST(CharacterRangeCaseIndependence) {
1575  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1577  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1579  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1580  CharacterRange('A', 'Z'));
1581  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1582  CharacterRange('C', 'F'));
1583  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1584  CharacterRange('A', 'B'));
1585  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1586  CharacterRange('Y', 'Z'));
1587  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1588  CharacterRange('A', 'Z'));
1589  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1590  CharacterRange('a', 'z'));
1591  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1592  CharacterRange('c', 'f'));
1593  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1594  CharacterRange('a', 'z'));
1595  // Here we need to add [l-z] to complete the case independence of
1596  // [A-Za-z] but we expect [a-z] to be added since we always add a
1597  // whole block at a time.
1598  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1599  CharacterRange('a', 'z'));
1600 }
1601 
1602 
1603 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1604  if (ranges == NULL)
1605  return false;
1606  for (int i = 0; i < ranges->length(); i++) {
1607  CharacterRange range = ranges->at(i);
1608  if (range.from() <= c && c <= range.to())
1609  return true;
1610  }
1611  return false;
1612 }
1613 
1614 
1615 TEST(CharClassDifference) {
1617  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
1618  Zone* zone = Isolate::Current()->zone();
1619  ZoneList<CharacterRange>* base =
1620  new(zone) ZoneList<CharacterRange>(1, zone);
1621  base->Add(CharacterRange::Everything(), zone);
1623  ZoneList<CharacterRange>* included = NULL;
1624  ZoneList<CharacterRange>* excluded = NULL;
1625  CharacterRange::Split(base, overlay, &included, &excluded,
1626  Isolate::Current()->zone());
1627  for (int i = 0; i < (1 << 16); i++) {
1628  bool in_base = InClass(i, base);
1629  if (in_base) {
1630  bool in_overlay = false;
1631  for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1632  if (overlay[j] <= i && i < overlay[j+1])
1633  in_overlay = true;
1634  }
1635  CHECK_EQ(in_overlay, InClass(i, included));
1636  CHECK_EQ(!in_overlay, InClass(i, excluded));
1637  } else {
1638  CHECK(!InClass(i, included));
1639  CHECK(!InClass(i, excluded));
1640  }
1641  }
1642 }
1643 
1644 
1645 TEST(CanonicalizeCharacterSets) {
1647  ZoneScope scope(Isolate::Current(), DELETE_ON_EXIT);
1648  Zone* zone = Isolate::Current()->zone();
1649  ZoneList<CharacterRange>* list =
1650  new(zone) ZoneList<CharacterRange>(4, zone);
1651  CharacterSet set(list);
1652 
1653  list->Add(CharacterRange(10, 20), zone);
1654  list->Add(CharacterRange(30, 40), zone);
1655  list->Add(CharacterRange(50, 60), zone);
1656  set.Canonicalize();
1657  ASSERT_EQ(3, list->length());
1658  ASSERT_EQ(10, list->at(0).from());
1659  ASSERT_EQ(20, list->at(0).to());
1660  ASSERT_EQ(30, list->at(1).from());
1661  ASSERT_EQ(40, list->at(1).to());
1662  ASSERT_EQ(50, list->at(2).from());
1663  ASSERT_EQ(60, list->at(2).to());
1664 
1665  list->Rewind(0);
1666  list->Add(CharacterRange(10, 20), zone);
1667  list->Add(CharacterRange(50, 60), zone);
1668  list->Add(CharacterRange(30, 40), zone);
1669  set.Canonicalize();
1670  ASSERT_EQ(3, list->length());
1671  ASSERT_EQ(10, list->at(0).from());
1672  ASSERT_EQ(20, list->at(0).to());
1673  ASSERT_EQ(30, list->at(1).from());
1674  ASSERT_EQ(40, list->at(1).to());
1675  ASSERT_EQ(50, list->at(2).from());
1676  ASSERT_EQ(60, list->at(2).to());
1677 
1678  list->Rewind(0);
1679  list->Add(CharacterRange(30, 40), zone);
1680  list->Add(CharacterRange(10, 20), zone);
1681  list->Add(CharacterRange(25, 25), zone);
1682  list->Add(CharacterRange(100, 100), zone);
1683  list->Add(CharacterRange(1, 1), zone);
1684  set.Canonicalize();
1685  ASSERT_EQ(5, list->length());
1686  ASSERT_EQ(1, list->at(0).from());
1687  ASSERT_EQ(1, list->at(0).to());
1688  ASSERT_EQ(10, list->at(1).from());
1689  ASSERT_EQ(20, list->at(1).to());
1690  ASSERT_EQ(25, list->at(2).from());
1691  ASSERT_EQ(25, list->at(2).to());
1692  ASSERT_EQ(30, list->at(3).from());
1693  ASSERT_EQ(40, list->at(3).to());
1694  ASSERT_EQ(100, list->at(4).from());
1695  ASSERT_EQ(100, list->at(4).to());
1696 
1697  list->Rewind(0);
1698  list->Add(CharacterRange(10, 19), zone);
1699  list->Add(CharacterRange(21, 30), zone);
1700  list->Add(CharacterRange(20, 20), zone);
1701  set.Canonicalize();
1702  ASSERT_EQ(1, list->length());
1703  ASSERT_EQ(10, list->at(0).from());
1704  ASSERT_EQ(30, list->at(0).to());
1705 }
1706 
1707 
1708 TEST(CharacterRangeMerge) {
1710  ZoneScope zone_scope(Isolate::Current(), DELETE_ON_EXIT);
1711  ZoneList<CharacterRange> l1(4, Isolate::Current()->zone());
1712  ZoneList<CharacterRange> l2(4, Isolate::Current()->zone());
1713  Zone* zone = Isolate::Current()->zone();
1714  // Create all combinations of intersections of ranges, both singletons and
1715  // longer.
1716 
1717  int offset = 0;
1718 
1719  // The five kinds of singleton intersections:
1720  // X
1721  // Y - outside before
1722  // Y - outside touching start
1723  // Y - overlap
1724  // Y - outside touching end
1725  // Y - outside after
1726 
1727  for (int i = 0; i < 5; i++) {
1728  l1.Add(CharacterRange::Singleton(offset + 2), zone);
1729  l2.Add(CharacterRange::Singleton(offset + i), zone);
1730  offset += 6;
1731  }
1732 
1733  // The seven kinds of singleton/non-singleton intersections:
1734  // XXX
1735  // Y - outside before
1736  // Y - outside touching start
1737  // Y - inside touching start
1738  // Y - entirely inside
1739  // Y - inside touching end
1740  // Y - outside touching end
1741  // Y - disjoint after
1742 
1743  for (int i = 0; i < 7; i++) {
1744  l1.Add(CharacterRange::Range(offset + 2, offset + 4), zone);
1745  l2.Add(CharacterRange::Singleton(offset + i), zone);
1746  offset += 8;
1747  }
1748 
1749  // The eleven kinds of non-singleton intersections:
1750  //
1751  // XXXXXXXX
1752  // YYYY - outside before.
1753  // YYYY - outside touching start.
1754  // YYYY - overlapping start
1755  // YYYY - inside touching start
1756  // YYYY - entirely inside
1757  // YYYY - inside touching end
1758  // YYYY - overlapping end
1759  // YYYY - outside touching end
1760  // YYYY - outside after
1761  // YYYYYYYY - identical
1762  // YYYYYYYYYYYY - containing entirely.
1763 
1764  for (int i = 0; i < 9; i++) {
1765  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone); // Length 8.
1766  l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), zone);
1767  offset += 22;
1768  }
1769  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1770  l2.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1771  offset += 22;
1772  l1.Add(CharacterRange::Range(offset + 6, offset + 15), zone);
1773  l2.Add(CharacterRange::Range(offset + 4, offset + 17), zone);
1774  offset += 22;
1775 
1776  // Different kinds of multi-range overlap:
1777  // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1778  // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1779 
1780  l1.Add(CharacterRange::Range(offset, offset + 21), zone);
1781  l1.Add(CharacterRange::Range(offset + 31, offset + 52), zone);
1782  for (int i = 0; i < 6; i++) {
1783  l2.Add(CharacterRange::Range(offset + 2, offset + 5), zone);
1784  l2.Add(CharacterRange::Singleton(offset + 8), zone);
1785  offset += 9;
1786  }
1787 
1790 
1791  ZoneList<CharacterRange> first_only(4, Isolate::Current()->zone());
1792  ZoneList<CharacterRange> second_only(4, Isolate::Current()->zone());
1793  ZoneList<CharacterRange> both(4, Isolate::Current()->zone());
1794 }
1795 
1796 
1797 TEST(Graph) {
1799  Execute("\\b\\w+\\b", false, true, true);
1800 }
byte * Address
Definition: globals.h:172
#define CHECK_MIN_MAX(input, min, max)
Definition: test-regexp.cc:134
static bool ParseRegExp(FlatStringReader *input, bool multiline, RegExpCompileData *result)
Definition: parser.cc:6004
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5555
static bool Initialize(Deserializer *des)
Definition: v8.cc:64
bool Find(const Key &key, Locator *locator)
#define CHECK_EQ(expected, value)
Definition: checks.h:219
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5127
bool Insert(const Key &key, Locator *locator)
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
void Sort(int(*cmp)(const T *, const T *))
Definition: utils.h:410
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1783
static Handle< T > cast(Handle< S > that)
Definition: handles.h:81
bool IsRegExpWord(uc16 c)
int32_t uc32
Definition: globals.h:274
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5466
#define CHECK_PARSE_EQ(input, expected)
Definition: test-regexp.cc:132
SmartArrayPointer< const char > ToString(Zone *zone)
Definition: ast.cc:951
static CharacterRange Everything()
Definition: jsregexp.h:258
bool Get(unsigned value)
Definition: jsregexp.cc:5452
#define ASSERT(condition)
Definition: checks.h:270
static const int kMaxWidth
Definition: unicode.h:314
void clear_pending_exception()
Definition: isolate.h:548
#define CHECK(condition)
Definition: checks.h:56
Factory * factory()
Definition: isolate.h:977
#define CHECK_GE(a, b)
Definition: checks.h:228
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:48
int foo
void Add(Vector< const char > format, Vector< FmtElm > elms)
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5162
uint8_t byte
Definition: globals.h:171
Handle< String > NewStringFromUtf8(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:207
const Register sp
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1327
static int NoValue()
Definition: test-regexp.cc:546
#define CHECK_PARSE_ERROR(input)
Definition: test-regexp.cc:131
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:254
static const int kInfinity
Definition: ast.h:2206
static const int kMaxWidth
Definition: unicode.h:293
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5247
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5839
static int Compare(int a, int b)
Definition: test-regexp.cc:547
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5181
#define CHECK_MAPS_EQUAL()
bool IsRegExpNewline(uc16 c)
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:216
int length() const
Definition: utils.h:383
#define CHECK_SIMPLE(input, simple)
Definition: test-regexp.cc:133
bool has_pending_exception()
Definition: isolate.h:554
bool FindLeastGreaterThan(const Key &key, Locator *locator)
Vector< const char > CStrVector(const char *data)
Definition: utils.h:525
static const int kNoKey
Definition: test-regexp.cc:545
int StrLength(const char *string)
Definition: utils.h:234
Handle< String > NewStringFromAscii(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:199
bool is_null() const
Definition: handles.h:87
bool FindGreatestLessThan(const Key &key, Locator *locator)
static const int kMaxWidth
Definition: unicode.h:307
uint16_t uc16
Definition: globals.h:273
#define ASSERT_EQ(v1, v2)
Definition: checks.h:271
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
Definition: flags.cc:274
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
void USE(T)
Definition: globals.h:303
static bool Is(uchar c)
Definition: unicode.cc:724
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
static Persistent< Context > New(ExtensionConfiguration *extensions=NULL, Handle< ObjectTemplate > global_template=Handle< ObjectTemplate >(), Handle< Value > global_object=Handle< Value >())
Definition: api.cc:4308
SmartArrayPointer< const char > ToCString() const
virtual int max_match()=0
static const int kMaxWidth
Definition: unicode.h:300
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5079
static bool Initialize()
Definition: api.cc:4204
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:251
unsigned int uchar
Definition: unicode.h:40
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5890