v8  3.25.30(node0.11.13)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
test-regexp.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 
29 #include <stdlib.h>
30 
31 #include "v8.h"
32 
33 #include "ast.h"
34 #include "char-predicates-inl.h"
35 #include "cctest.h"
36 #include "jsregexp.h"
37 #include "parser.h"
38 #include "regexp-macro-assembler.h"
40 #include "string-stream.h"
41 #include "zone-inl.h"
42 #ifdef V8_INTERPRETED_REGEXP
43 #include "interpreter-irregexp.h"
44 #else // V8_INTERPRETED_REGEXP
45 #include "macro-assembler.h"
46 #include "code.h"
47 #if V8_TARGET_ARCH_ARM
48 #include "arm/assembler-arm.h"
51 #endif
52 #if V8_TARGET_ARCH_ARM64
53 #include "arm64/assembler-arm64.h"
56 #endif
57 #if V8_TARGET_ARCH_MIPS
58 #include "mips/assembler-mips.h"
61 #endif
62 #if V8_TARGET_ARCH_X64
63 #include "x64/assembler-x64.h"
66 #endif
67 #if V8_TARGET_ARCH_IA32
68 #include "ia32/assembler-ia32.h"
71 #endif
72 #endif // V8_INTERPRETED_REGEXP
73 
74 using namespace v8::internal;
75 
76 
77 static bool CheckParse(const char* input) {
80  Zone zone(CcTest::i_isolate());
82  RegExpCompileData result;
83  return v8::internal::RegExpParser::ParseRegExp(
84  &reader, false, &result, &zone);
85 }
86 
87 
88 static SmartArrayPointer<const char> Parse(const char* input) {
91  Zone zone(CcTest::i_isolate());
93  RegExpCompileData result;
94  CHECK(v8::internal::RegExpParser::ParseRegExp(
95  &reader, false, &result, &zone));
96  CHECK(result.tree != NULL);
97  CHECK(result.error.is_null());
98  SmartArrayPointer<const char> output = result.tree->ToString(&zone);
99  return output;
100 }
101 
102 
103 static bool CheckSimple(const char* input) {
106  Zone zone(CcTest::i_isolate());
108  RegExpCompileData result;
109  CHECK(v8::internal::RegExpParser::ParseRegExp(
110  &reader, false, &result, &zone));
111  CHECK(result.tree != NULL);
112  CHECK(result.error.is_null());
113  return result.simple;
114 }
115 
116 struct MinMaxPair {
119 };
120 
121 
122 static MinMaxPair CheckMinMaxMatch(const char* input) {
125  Zone zone(CcTest::i_isolate());
127  RegExpCompileData result;
128  CHECK(v8::internal::RegExpParser::ParseRegExp(
129  &reader, false, &result, &zone));
130  CHECK(result.tree != NULL);
131  CHECK(result.error.is_null());
132  int min_match = result.tree->min_match();
133  int max_match = result.tree->max_match();
134  MinMaxPair pair = { min_match, max_match };
135  return pair;
136 }
137 
138 
139 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
140 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, Parse(input).get())
141 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
142 #define CHECK_MIN_MAX(input, min, max) \
143  { MinMaxPair min_max = CheckMinMaxMatch(input); \
144  CHECK_EQ(min, min_max.min_match); \
145  CHECK_EQ(max, min_max.max_match); \
146  }
147 
150 
151  CHECK_PARSE_ERROR("?");
152 
153  CHECK_PARSE_EQ("abc", "'abc'");
154  CHECK_PARSE_EQ("", "%");
155  CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
156  CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
157  CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
158  CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
159  CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
160  CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
161  CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
162  CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
163  CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
164  CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
165  CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
166  CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
167  CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
168  CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
169  CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
170  CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
171  CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
172  CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
173  CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
174  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
175  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
176  CHECK_PARSE_EQ("(?:foo)", "'foo'");
177  CHECK_PARSE_EQ("(?: foo )", "' foo '");
178  CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
179  CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
180  CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
181  CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
182  CHECK_PARSE_EQ("()", "(^ %)");
183  CHECK_PARSE_EQ("(?=)", "(-> + %)");
184  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
185  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
186  CHECK_PARSE_EQ("[x]", "[x]");
187  CHECK_PARSE_EQ("[xyz]", "[x y z]");
188  CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
189  CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
190  CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
191  CHECK_PARSE_EQ("]", "']'");
192  CHECK_PARSE_EQ("}", "'}'");
193  CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
194  CHECK_PARSE_EQ("[\\d]", "[0-9]");
195  CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
196  CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
197  CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
198  CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
199  // Control character outside character class.
200  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
201  "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
202  CHECK_PARSE_EQ("\\c!", "'\\c!'");
203  CHECK_PARSE_EQ("\\c_", "'\\c_'");
204  CHECK_PARSE_EQ("\\c~", "'\\c~'");
205  CHECK_PARSE_EQ("\\c1", "'\\c1'");
206  // Control character inside character class.
207  CHECK_PARSE_EQ("[\\c!]", "[\\ c !]");
208  CHECK_PARSE_EQ("[\\c_]", "[\\x1f]");
209  CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]");
210  CHECK_PARSE_EQ("[\\ca]", "[\\x01]");
211  CHECK_PARSE_EQ("[\\cz]", "[\\x1a]");
212  CHECK_PARSE_EQ("[\\cA]", "[\\x01]");
213  CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]");
214  CHECK_PARSE_EQ("[\\c1]", "[\\x11]");
215 
216  CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
217  CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
218  CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
219  CHECK_PARSE_EQ("\\0", "'\\x00'");
220  CHECK_PARSE_EQ("\\8", "'8'");
221  CHECK_PARSE_EQ("\\9", "'9'");
222  CHECK_PARSE_EQ("\\11", "'\\x09'");
223  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
224  CHECK_PARSE_EQ("\\011", "'\\x09'");
225  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
226  CHECK_PARSE_EQ("\\118", "'\\x098'");
227  CHECK_PARSE_EQ("\\111", "'I'");
228  CHECK_PARSE_EQ("\\1111", "'I1'");
229  CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
230  CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
231  CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
232  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
233  CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
234  " (# 0 - g (<- 1)))");
235  CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
236  " (# 0 - g (<- 2)))");
237  CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
238  " (# 0 - g (<- 3)))");
239  CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
240  " (# 0 - g '\\x04'))");
241  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
242  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
243  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
244  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
245  "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
246  " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
247  CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
248  CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
249  CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
250  CHECK_PARSE_EQ("(?=a)?a", "'a'");
251  CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
252  CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
253  CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
254  CHECK_PARSE_EQ("(?!a)?a", "'a'");
255  CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
256  CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
257  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
258  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
259  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
260  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
261  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
262  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
263  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
264  CHECK_PARSE_EQ("[\\111]", "[I]");
265  CHECK_PARSE_EQ("[\\1111]", "[I 1]");
266  CHECK_PARSE_EQ("\\x34", "'\x34'");
267  CHECK_PARSE_EQ("\\x60", "'\x60'");
268  CHECK_PARSE_EQ("\\x3z", "'x3z'");
269  CHECK_PARSE_EQ("\\c", "'\\c'");
270  CHECK_PARSE_EQ("\\u0034", "'\x34'");
271  CHECK_PARSE_EQ("\\u003z", "'u003z'");
272  CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
273 
274  CHECK_SIMPLE("", false);
275  CHECK_SIMPLE("a", true);
276  CHECK_SIMPLE("a|b", false);
277  CHECK_SIMPLE("a\\n", false);
278  CHECK_SIMPLE("^a", false);
279  CHECK_SIMPLE("a$", false);
280  CHECK_SIMPLE("a\\b!", false);
281  CHECK_SIMPLE("a\\Bb", false);
282  CHECK_SIMPLE("a*", false);
283  CHECK_SIMPLE("a*?", false);
284  CHECK_SIMPLE("a?", false);
285  CHECK_SIMPLE("a??", false);
286  CHECK_SIMPLE("a{0,1}?", false);
287  CHECK_SIMPLE("a{1,1}?", false);
288  CHECK_SIMPLE("a{1,2}?", false);
289  CHECK_SIMPLE("a+?", false);
290  CHECK_SIMPLE("(a)", false);
291  CHECK_SIMPLE("(a)\\1", false);
292  CHECK_SIMPLE("(\\1a)", false);
293  CHECK_SIMPLE("\\1(a)", false);
294  CHECK_SIMPLE("a\\s", false);
295  CHECK_SIMPLE("a\\S", false);
296  CHECK_SIMPLE("a\\d", false);
297  CHECK_SIMPLE("a\\D", false);
298  CHECK_SIMPLE("a\\w", false);
299  CHECK_SIMPLE("a\\W", false);
300  CHECK_SIMPLE("a.", false);
301  CHECK_SIMPLE("a\\q", false);
302  CHECK_SIMPLE("a[a]", false);
303  CHECK_SIMPLE("a[^a]", false);
304  CHECK_SIMPLE("a[a-z]", false);
305  CHECK_SIMPLE("a[\\q]", false);
306  CHECK_SIMPLE("a(?:b)", false);
307  CHECK_SIMPLE("a(?=b)", false);
308  CHECK_SIMPLE("a(?!b)", false);
309  CHECK_SIMPLE("\\x60", false);
310  CHECK_SIMPLE("\\u0060", false);
311  CHECK_SIMPLE("\\cA", false);
312  CHECK_SIMPLE("\\q", false);
313  CHECK_SIMPLE("\\1112", false);
314  CHECK_SIMPLE("\\0", false);
315  CHECK_SIMPLE("(a)\\1", false);
316  CHECK_SIMPLE("(?=a)?a", false);
317  CHECK_SIMPLE("(?!a)?a\\1", false);
318  CHECK_SIMPLE("(?:(?=a))a\\1", false);
319 
320  CHECK_PARSE_EQ("a{}", "'a{}'");
321  CHECK_PARSE_EQ("a{,}", "'a{,}'");
322  CHECK_PARSE_EQ("a{", "'a{'");
323  CHECK_PARSE_EQ("a{z}", "'a{z}'");
324  CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
325  CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
326  CHECK_PARSE_EQ("a{12,", "'a{12,'");
327  CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
328  CHECK_PARSE_EQ("{}", "'{}'");
329  CHECK_PARSE_EQ("{,}", "'{,}'");
330  CHECK_PARSE_EQ("{", "'{'");
331  CHECK_PARSE_EQ("{z}", "'{z}'");
332  CHECK_PARSE_EQ("{1z}", "'{1z}'");
333  CHECK_PARSE_EQ("{12z}", "'{12z}'");
334  CHECK_PARSE_EQ("{12,", "'{12,'");
335  CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
336 
337  CHECK_MIN_MAX("a", 1, 1);
338  CHECK_MIN_MAX("abc", 3, 3);
339  CHECK_MIN_MAX("a[bc]d", 3, 3);
340  CHECK_MIN_MAX("a|bc", 1, 2);
341  CHECK_MIN_MAX("ab|c", 1, 2);
342  CHECK_MIN_MAX("a||bc", 0, 2);
343  CHECK_MIN_MAX("|", 0, 0);
344  CHECK_MIN_MAX("(?:ab)", 2, 2);
345  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
346  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
347  CHECK_MIN_MAX("(ab)", 2, 2);
348  CHECK_MIN_MAX("(ab|cde)", 2, 3);
349  CHECK_MIN_MAX("(ab)\\1", 2, 4);
350  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
351  CHECK_MIN_MAX("(?:ab)?", 0, 2);
352  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
353  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
354  CHECK_MIN_MAX("a?", 0, 1);
357  CHECK_MIN_MAX("a??", 0, 1);
360  CHECK_MIN_MAX("(?:a?)?", 0, 1);
361  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
362  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
363  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
364  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
365  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
366  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
367  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
368  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
369  CHECK_MIN_MAX("a{0}", 0, 0);
370  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
371  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
376  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
377  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
378  CHECK_MIN_MAX("a\\bc", 2, 2);
379  CHECK_MIN_MAX("a\\Bc", 2, 2);
380  CHECK_MIN_MAX("a\\sc", 3, 3);
381  CHECK_MIN_MAX("a\\Sc", 3, 3);
382  CHECK_MIN_MAX("a(?=b)c", 2, 2);
383  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
384  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
385 }
386 
387 
388 TEST(ParserRegression) {
389  CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
390  CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
391  CHECK_PARSE_EQ("{", "'{'");
392  CHECK_PARSE_EQ("a|", "(| 'a' %)");
393 }
394 
395 static void ExpectError(const char* input,
396  const char* expected) {
399  Zone zone(CcTest::i_isolate());
401  RegExpCompileData result;
402  CHECK(!v8::internal::RegExpParser::ParseRegExp(
403  &reader, false, &result, &zone));
404  CHECK(result.tree == NULL);
405  CHECK(!result.error.is_null());
406  SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
407  CHECK_EQ(expected, str.get());
408 }
409 
410 
411 TEST(Errors) {
412  const char* kEndBackslash = "\\ at end of pattern";
413  ExpectError("\\", kEndBackslash);
414  const char* kUnterminatedGroup = "Unterminated group";
415  ExpectError("(foo", kUnterminatedGroup);
416  const char* kInvalidGroup = "Invalid group";
417  ExpectError("(?", kInvalidGroup);
418  const char* kUnterminatedCharacterClass = "Unterminated character class";
419  ExpectError("[", kUnterminatedCharacterClass);
420  ExpectError("[a-", kUnterminatedCharacterClass);
421  const char* kNothingToRepeat = "Nothing to repeat";
422  ExpectError("*", kNothingToRepeat);
423  ExpectError("?", kNothingToRepeat);
424  ExpectError("+", kNothingToRepeat);
425  ExpectError("{1}", kNothingToRepeat);
426  ExpectError("{1,2}", kNothingToRepeat);
427  ExpectError("{1,}", kNothingToRepeat);
428 
429  // Check that we don't allow more than kMaxCapture captures
430  const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
431  const char* kTooManyCaptures = "Too many captures";
432  HeapStringAllocator allocator;
433  StringStream accumulator(&allocator);
434  for (int i = 0; i <= kMaxCaptures; i++) {
435  accumulator.Add("()");
436  }
437  SmartArrayPointer<const char> many_captures(accumulator.ToCString());
438  ExpectError(many_captures.get(), kTooManyCaptures);
439 }
440 
441 
442 static bool IsDigit(uc16 c) {
443  return ('0' <= c && c <= '9');
444 }
445 
446 
447 static bool NotDigit(uc16 c) {
448  return !IsDigit(c);
449 }
450 
451 
452 static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
453  // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
454  // WhiteSpace (7.2) and LineTerminator (7.3) values.
456 }
457 
458 
459 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
460  return !IsWhiteSpaceOrLineTerminator(c);
461 }
462 
463 
464 static bool NotWord(uc16 c) {
465  return !IsRegExpWord(c);
466 }
467 
468 
469 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
470  Zone zone(CcTest::i_isolate());
471  ZoneList<CharacterRange>* ranges =
472  new(&zone) ZoneList<CharacterRange>(2, &zone);
473  CharacterRange::AddClassEscape(c, ranges, &zone);
474  for (unsigned i = 0; i < (1 << 16); i++) {
475  bool in_class = false;
476  for (int j = 0; !in_class && j < ranges->length(); j++) {
477  CharacterRange& range = ranges->at(j);
478  in_class = (range.from() <= i && i <= range.to());
479  }
480  CHECK_EQ(pred(i), in_class);
481  }
482 }
483 
484 
485 TEST(CharacterClassEscapes) {
487  TestCharacterClassEscapes('.', IsRegExpNewline);
488  TestCharacterClassEscapes('d', IsDigit);
489  TestCharacterClassEscapes('D', NotDigit);
490  TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
491  TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
492  TestCharacterClassEscapes('w', IsRegExpWord);
493  TestCharacterClassEscapes('W', NotWord);
494 }
495 
496 
497 static RegExpNode* Compile(const char* input,
498  bool multiline,
499  bool is_ascii,
500  Zone* zone) {
502  Isolate* isolate = CcTest::i_isolate();
503  FlatStringReader reader(isolate, CStrVector(input));
504  RegExpCompileData compile_data;
505  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
506  &compile_data, zone))
507  return NULL;
508  Handle<String> pattern = isolate->factory()->
509  NewStringFromUtf8(CStrVector(input));
510  Handle<String> sample_subject =
511  isolate->factory()->NewStringFromUtf8(CStrVector(""));
512  RegExpEngine::Compile(&compile_data,
513  false,
514  false,
515  multiline,
516  pattern,
517  sample_subject,
518  is_ascii,
519  zone);
520  return compile_data.node;
521 }
522 
523 
524 static void Execute(const char* input,
525  bool multiline,
526  bool is_ascii,
527  bool dot_output = false) {
529  Zone zone(CcTest::i_isolate());
530  RegExpNode* node = Compile(input, multiline, is_ascii, &zone);
531  USE(node);
532 #ifdef DEBUG
533  if (dot_output) {
534  RegExpEngine::DotPrint(input, node, false);
535  }
536 #endif // DEBUG
537 }
538 
539 
540 class TestConfig {
541  public:
542  typedef int Key;
543  typedef int Value;
544  static const int kNoKey;
545  static int NoValue() { return 0; }
546  static inline int Compare(int a, int b) {
547  if (a < b)
548  return -1;
549  else if (a > b)
550  return 1;
551  else
552  return 0;
553  }
554 };
555 
556 
557 const int TestConfig::kNoKey = 0;
558 
559 
560 static unsigned PseudoRandom(int i, int j) {
561  return ~(~((i * 781) ^ (j * 329)));
562 }
563 
564 
565 TEST(SplayTreeSimple) {
567  static const unsigned kLimit = 1000;
568  Zone zone(CcTest::i_isolate());
569  ZoneSplayTree<TestConfig> tree(&zone);
570  bool seen[kLimit];
571  for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
572 #define CHECK_MAPS_EQUAL() do { \
573  for (unsigned k = 0; k < kLimit; k++) \
574  CHECK_EQ(seen[k], tree.Find(k, &loc)); \
575  } while (false)
576  for (int i = 0; i < 50; i++) {
577  for (int j = 0; j < 50; j++) {
578  unsigned next = PseudoRandom(i, j) % kLimit;
579  if (seen[next]) {
580  // We've already seen this one. Check the value and remove
581  // it.
583  CHECK(tree.Find(next, &loc));
584  CHECK_EQ(next, loc.key());
585  CHECK_EQ(3 * next, loc.value());
586  tree.Remove(next);
587  seen[next] = false;
589  } else {
590  // Check that it wasn't there already and then add it.
592  CHECK(!tree.Find(next, &loc));
593  CHECK(tree.Insert(next, &loc));
594  CHECK_EQ(next, loc.key());
595  loc.set_value(3 * next);
596  seen[next] = true;
598  }
599  int val = PseudoRandom(j, i) % kLimit;
600  if (seen[val]) {
602  CHECK(tree.FindGreatestLessThan(val, &loc));
603  CHECK_EQ(loc.key(), val);
604  break;
605  }
606  val = PseudoRandom(i + j, i - j) % kLimit;
607  if (seen[val]) {
609  CHECK(tree.FindLeastGreaterThan(val, &loc));
610  CHECK_EQ(loc.key(), val);
611  break;
612  }
613  }
614  }
615 }
616 
617 
618 TEST(DispatchTableConstruction) {
620  // Initialize test data.
621  static const int kLimit = 1000;
622  static const int kRangeCount = 8;
623  static const int kRangeSize = 16;
624  uc16 ranges[kRangeCount][2 * kRangeSize];
625  for (int i = 0; i < kRangeCount; i++) {
626  Vector<uc16> range(ranges[i], 2 * kRangeSize);
627  for (int j = 0; j < 2 * kRangeSize; j++) {
628  range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
629  }
630  range.Sort();
631  for (int j = 1; j < 2 * kRangeSize; j++) {
632  CHECK(range[j-1] <= range[j]);
633  }
634  }
635  // Enter test data into dispatch table.
636  Zone zone(CcTest::i_isolate());
637  DispatchTable table(&zone);
638  for (int i = 0; i < kRangeCount; i++) {
639  uc16* range = ranges[i];
640  for (int j = 0; j < 2 * kRangeSize; j += 2)
641  table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
642  }
643  // Check that the table looks as we would expect
644  for (int p = 0; p < kLimit; p++) {
645  OutSet* outs = table.Get(p);
646  for (int j = 0; j < kRangeCount; j++) {
647  uc16* range = ranges[j];
648  bool is_on = false;
649  for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
650  is_on = (range[k] <= p && p <= range[k + 1]);
651  CHECK_EQ(is_on, outs->Get(j));
652  }
653  }
654 }
655 
656 
657 // Test of debug-only syntax.
658 #ifdef DEBUG
659 
660 TEST(ParsePossessiveRepetition) {
661  bool old_flag_value = FLAG_regexp_possessive_quantifier;
662 
663  // Enable possessive quantifier syntax.
664  FLAG_regexp_possessive_quantifier = true;
665 
666  CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')");
667  CHECK_PARSE_EQ("a++", "(# 1 - p 'a')");
668  CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')");
669  CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')");
670  CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
671 
672  // Disable possessive quantifier syntax.
673  FLAG_regexp_possessive_quantifier = false;
674 
675  CHECK_PARSE_ERROR("a*+");
676  CHECK_PARSE_ERROR("a++");
677  CHECK_PARSE_ERROR("a?+");
678  CHECK_PARSE_ERROR("a{10,20}+");
679  CHECK_PARSE_ERROR("a{10,20}+b");
680 
681  FLAG_regexp_possessive_quantifier = old_flag_value;
682 }
683 
684 #endif
685 
686 // Tests of interpreter.
687 
688 
689 #ifndef V8_INTERPRETED_REGEXP
690 
691 #if V8_TARGET_ARCH_IA32
692 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
693 #elif V8_TARGET_ARCH_X64
694 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
695 #elif V8_TARGET_ARCH_ARM
696 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
697 #elif V8_TARGET_ARCH_ARM64
698 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
699 #elif V8_TARGET_ARCH_MIPS
700 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
701 #endif
702 
704  public:
706  : scope_(CcTest::isolate()),
707  env_(v8::Context::New(CcTest::isolate())) {
708  env_->Enter();
709  }
711  env_->Exit();
712  }
713  private:
714  v8::HandleScope scope_;
716 };
717 
718 
719 static ArchRegExpMacroAssembler::Result Execute(Code* code,
720  String* input,
721  int start_offset,
722  const byte* input_start,
723  const byte* input_end,
724  int* captures) {
726  code,
727  input,
728  start_offset,
729  input_start,
730  input_end,
731  captures,
732  0,
734 }
735 
736 
737 TEST(MacroAssemblerNativeSuccess) {
739  ContextInitializer initializer;
740  Isolate* isolate = CcTest::i_isolate();
741  Factory* factory = isolate->factory();
742  Zone zone(isolate);
743 
744  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
745 
746  m.Succeed();
747 
748  Handle<String> source = factory->NewStringFromAscii(CStrVector(""));
749  Handle<Object> code_object = m.GetCode(source);
750  Handle<Code> code = Handle<Code>::cast(code_object);
751 
752  int captures[4] = {42, 37, 87, 117};
753  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
755  const byte* start_adr =
756  reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
757 
759  Execute(*code,
760  *input,
761  0,
762  start_adr,
763  start_adr + seq_input->length(),
764  captures);
765 
767  CHECK_EQ(-1, captures[0]);
768  CHECK_EQ(-1, captures[1]);
769  CHECK_EQ(-1, captures[2]);
770  CHECK_EQ(-1, captures[3]);
771 }
772 
773 
774 TEST(MacroAssemblerNativeSimple) {
776  ContextInitializer initializer;
777  Isolate* isolate = CcTest::i_isolate();
778  Factory* factory = isolate->factory();
779  Zone zone(isolate);
780 
781  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
782 
783  Label fail, backtrack;
784  m.PushBacktrack(&fail);
785  m.CheckNotAtStart(NULL);
786  m.LoadCurrentCharacter(2, NULL);
787  m.CheckNotCharacter('o', NULL);
788  m.LoadCurrentCharacter(1, NULL, false);
789  m.CheckNotCharacter('o', NULL);
790  m.LoadCurrentCharacter(0, NULL, false);
791  m.CheckNotCharacter('f', NULL);
792  m.WriteCurrentPositionToRegister(0, 0);
793  m.WriteCurrentPositionToRegister(1, 3);
794  m.AdvanceCurrentPosition(3);
795  m.PushBacktrack(&backtrack);
796  m.Succeed();
797  m.Bind(&backtrack);
798  m.Backtrack();
799  m.Bind(&fail);
800  m.Fail();
801 
802  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
803  Handle<Object> code_object = m.GetCode(source);
804  Handle<Code> code = Handle<Code>::cast(code_object);
805 
806  int captures[4] = {42, 37, 87, 117};
807  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
809  Address start_adr = seq_input->GetCharsAddress();
810 
812  Execute(*code,
813  *input,
814  0,
815  start_adr,
816  start_adr + input->length(),
817  captures);
818 
820  CHECK_EQ(0, captures[0]);
821  CHECK_EQ(3, captures[1]);
822  CHECK_EQ(-1, captures[2]);
823  CHECK_EQ(-1, captures[3]);
824 
825  input = factory->NewStringFromAscii(CStrVector("barbarbar"));
826  seq_input = Handle<SeqOneByteString>::cast(input);
827  start_adr = seq_input->GetCharsAddress();
828 
829  result = Execute(*code,
830  *input,
831  0,
832  start_adr,
833  start_adr + input->length(),
834  captures);
835 
837 }
838 
839 
840 TEST(MacroAssemblerNativeSimpleUC16) {
842  ContextInitializer initializer;
843  Isolate* isolate = CcTest::i_isolate();
844  Factory* factory = isolate->factory();
845  Zone zone(isolate);
846 
847  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
848 
849  Label fail, backtrack;
850  m.PushBacktrack(&fail);
851  m.CheckNotAtStart(NULL);
852  m.LoadCurrentCharacter(2, NULL);
853  m.CheckNotCharacter('o', NULL);
854  m.LoadCurrentCharacter(1, NULL, false);
855  m.CheckNotCharacter('o', NULL);
856  m.LoadCurrentCharacter(0, NULL, false);
857  m.CheckNotCharacter('f', NULL);
858  m.WriteCurrentPositionToRegister(0, 0);
859  m.WriteCurrentPositionToRegister(1, 3);
860  m.AdvanceCurrentPosition(3);
861  m.PushBacktrack(&backtrack);
862  m.Succeed();
863  m.Bind(&backtrack);
864  m.Backtrack();
865  m.Bind(&fail);
866  m.Fail();
867 
868  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
869  Handle<Object> code_object = m.GetCode(source);
870  Handle<Code> code = Handle<Code>::cast(code_object);
871 
872  int captures[4] = {42, 37, 87, 117};
873  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
874  static_cast<uc16>(0x2603)};
875  Handle<String> input =
876  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
878  Address start_adr = seq_input->GetCharsAddress();
879 
881  Execute(*code,
882  *input,
883  0,
884  start_adr,
885  start_adr + input->length(),
886  captures);
887 
889  CHECK_EQ(0, captures[0]);
890  CHECK_EQ(3, captures[1]);
891  CHECK_EQ(-1, captures[2]);
892  CHECK_EQ(-1, captures[3]);
893 
894  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
895  static_cast<uc16>(0x2603)};
896  input = factory->NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
897  seq_input = Handle<SeqTwoByteString>::cast(input);
898  start_adr = seq_input->GetCharsAddress();
899 
900  result = Execute(*code,
901  *input,
902  0,
903  start_adr,
904  start_adr + input->length() * 2,
905  captures);
906 
908 }
909 
910 
911 TEST(MacroAssemblerNativeBacktrack) {
913  ContextInitializer initializer;
914  Isolate* isolate = CcTest::i_isolate();
915  Factory* factory = isolate->factory();
916  Zone zone(isolate);
917 
918  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
919 
920  Label fail;
921  Label backtrack;
922  m.LoadCurrentCharacter(10, &fail);
923  m.Succeed();
924  m.Bind(&fail);
925  m.PushBacktrack(&backtrack);
926  m.LoadCurrentCharacter(10, NULL);
927  m.Succeed();
928  m.Bind(&backtrack);
929  m.Fail();
930 
931  Handle<String> source = factory->NewStringFromAscii(CStrVector(".........."));
932  Handle<Object> code_object = m.GetCode(source);
933  Handle<Code> code = Handle<Code>::cast(code_object);
934 
935  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
937  Address start_adr = seq_input->GetCharsAddress();
938 
940  Execute(*code,
941  *input,
942  0,
943  start_adr,
944  start_adr + input->length(),
945  NULL);
946 
948 }
949 
950 
951 TEST(MacroAssemblerNativeBackReferenceASCII) {
953  ContextInitializer initializer;
954  Isolate* isolate = CcTest::i_isolate();
955  Factory* factory = isolate->factory();
956  Zone zone(isolate);
957 
958  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
959 
960  m.WriteCurrentPositionToRegister(0, 0);
961  m.AdvanceCurrentPosition(2);
962  m.WriteCurrentPositionToRegister(1, 0);
963  Label nomatch;
964  m.CheckNotBackReference(0, &nomatch);
965  m.Fail();
966  m.Bind(&nomatch);
967  m.AdvanceCurrentPosition(2);
968  Label missing_match;
969  m.CheckNotBackReference(0, &missing_match);
970  m.WriteCurrentPositionToRegister(2, 0);
971  m.Succeed();
972  m.Bind(&missing_match);
973  m.Fail();
974 
975  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
976  Handle<Object> code_object = m.GetCode(source);
977  Handle<Code> code = Handle<Code>::cast(code_object);
978 
979  Handle<String> input = factory->NewStringFromAscii(CStrVector("fooofo"));
981  Address start_adr = seq_input->GetCharsAddress();
982 
983  int output[4];
985  Execute(*code,
986  *input,
987  0,
988  start_adr,
989  start_adr + input->length(),
990  output);
991 
993  CHECK_EQ(0, output[0]);
994  CHECK_EQ(2, output[1]);
995  CHECK_EQ(6, output[2]);
996  CHECK_EQ(-1, output[3]);
997 }
998 
999 
1000 TEST(MacroAssemblerNativeBackReferenceUC16) {
1002  ContextInitializer initializer;
1003  Isolate* isolate = CcTest::i_isolate();
1004  Factory* factory = isolate->factory();
1005  Zone zone(isolate);
1006 
1007  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
1008 
1009  m.WriteCurrentPositionToRegister(0, 0);
1010  m.AdvanceCurrentPosition(2);
1011  m.WriteCurrentPositionToRegister(1, 0);
1012  Label nomatch;
1013  m.CheckNotBackReference(0, &nomatch);
1014  m.Fail();
1015  m.Bind(&nomatch);
1016  m.AdvanceCurrentPosition(2);
1017  Label missing_match;
1018  m.CheckNotBackReference(0, &missing_match);
1019  m.WriteCurrentPositionToRegister(2, 0);
1020  m.Succeed();
1021  m.Bind(&missing_match);
1022  m.Fail();
1023 
1024  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
1025  Handle<Object> code_object = m.GetCode(source);
1026  Handle<Code> code = Handle<Code>::cast(code_object);
1027 
1028  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1029  Handle<String> input =
1030  factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
1032  Address start_adr = seq_input->GetCharsAddress();
1033 
1034  int output[4];
1036  Execute(*code,
1037  *input,
1038  0,
1039  start_adr,
1040  start_adr + input->length() * 2,
1041  output);
1042 
1044  CHECK_EQ(0, output[0]);
1045  CHECK_EQ(2, output[1]);
1046  CHECK_EQ(6, output[2]);
1047  CHECK_EQ(-1, output[3]);
1048 }
1049 
1050 
1051 
1052 TEST(MacroAssemblernativeAtStart) {
1054  ContextInitializer initializer;
1055  Isolate* isolate = CcTest::i_isolate();
1056  Factory* factory = isolate->factory();
1057  Zone zone(isolate);
1058 
1059  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
1060 
1061  Label not_at_start, newline, fail;
1062  m.CheckNotAtStart(&not_at_start);
1063  // Check that prevchar = '\n' and current = 'f'.
1064  m.CheckCharacter('\n', &newline);
1065  m.Bind(&fail);
1066  m.Fail();
1067  m.Bind(&newline);
1068  m.LoadCurrentCharacter(0, &fail);
1069  m.CheckNotCharacter('f', &fail);
1070  m.Succeed();
1071 
1072  m.Bind(&not_at_start);
1073  // Check that prevchar = 'o' and current = 'b'.
1074  Label prevo;
1075  m.CheckCharacter('o', &prevo);
1076  m.Fail();
1077  m.Bind(&prevo);
1078  m.LoadCurrentCharacter(0, &fail);
1079  m.CheckNotCharacter('b', &fail);
1080  m.Succeed();
1081 
1082  Handle<String> source = factory->NewStringFromAscii(CStrVector("(^f|ob)"));
1083  Handle<Object> code_object = m.GetCode(source);
1084  Handle<Code> code = Handle<Code>::cast(code_object);
1085 
1086  Handle<String> input = factory->NewStringFromAscii(CStrVector("foobar"));
1088  Address start_adr = seq_input->GetCharsAddress();
1089 
1091  Execute(*code,
1092  *input,
1093  0,
1094  start_adr,
1095  start_adr + input->length(),
1096  NULL);
1097 
1099 
1100  result = Execute(*code,
1101  *input,
1102  3,
1103  start_adr + 3,
1104  start_adr + input->length(),
1105  NULL);
1106 
1108 }
1109 
1110 
1111 TEST(MacroAssemblerNativeBackRefNoCase) {
1113  ContextInitializer initializer;
1114  Isolate* isolate = CcTest::i_isolate();
1115  Factory* factory = isolate->factory();
1116  Zone zone(isolate);
1117 
1118  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
1119 
1120  Label fail, succ;
1121 
1122  m.WriteCurrentPositionToRegister(0, 0);
1123  m.WriteCurrentPositionToRegister(2, 0);
1124  m.AdvanceCurrentPosition(3);
1125  m.WriteCurrentPositionToRegister(3, 0);
1126  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1127  m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1128  Label expected_fail;
1129  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1130  m.Bind(&fail);
1131  m.Fail();
1132 
1133  m.Bind(&expected_fail);
1134  m.AdvanceCurrentPosition(3); // Skip "xYz"
1135  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1136  m.Fail();
1137 
1138  m.Bind(&succ);
1139  m.WriteCurrentPositionToRegister(1, 0);
1140  m.Succeed();
1141 
1142  Handle<String> source =
1143  factory->NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1144  Handle<Object> code_object = m.GetCode(source);
1145  Handle<Code> code = Handle<Code>::cast(code_object);
1146 
1147  Handle<String> input =
1148  factory->NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1150  Address start_adr = seq_input->GetCharsAddress();
1151 
1152  int output[4];
1154  Execute(*code,
1155  *input,
1156  0,
1157  start_adr,
1158  start_adr + input->length(),
1159  output);
1160 
1162  CHECK_EQ(0, output[0]);
1163  CHECK_EQ(12, output[1]);
1164  CHECK_EQ(0, output[2]);
1165  CHECK_EQ(3, output[3]);
1166 }
1167 
1168 
1169 
1170 TEST(MacroAssemblerNativeRegisters) {
1172  ContextInitializer initializer;
1173  Isolate* isolate = CcTest::i_isolate();
1174  Factory* factory = isolate->factory();
1175  Zone zone(isolate);
1176 
1177  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6, &zone);
1178 
1179  uc16 foo_chars[3] = {'f', 'o', 'o'};
1180  Vector<const uc16> foo(foo_chars, 3);
1181 
1182  enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1183  Label fail;
1184  Label backtrack;
1185  m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1186  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1187  m.PushBacktrack(&backtrack);
1188  m.WriteStackPointerToRegister(sp);
1189  // Fill stack and registers
1190  m.AdvanceCurrentPosition(2);
1191  m.WriteCurrentPositionToRegister(out1, 0);
1192  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1193  m.PushBacktrack(&fail);
1194  // Drop backtrack stack frames.
1195  m.ReadStackPointerFromRegister(sp);
1196  // And take the first backtrack (to &backtrack)
1197  m.Backtrack();
1198 
1199  m.PushCurrentPosition();
1200  m.AdvanceCurrentPosition(2);
1201  m.PopCurrentPosition();
1202 
1203  m.Bind(&backtrack);
1204  m.PopRegister(out1);
1205  m.ReadCurrentPositionFromRegister(out1);
1206  m.AdvanceCurrentPosition(3);
1207  m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1208 
1209  Label loop;
1210  m.SetRegister(loop_cnt, 0); // loop counter
1211  m.Bind(&loop);
1212  m.AdvanceRegister(loop_cnt, 1);
1213  m.AdvanceCurrentPosition(1);
1214  m.IfRegisterLT(loop_cnt, 3, &loop);
1215  m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1216 
1217  Label loop2;
1218  m.SetRegister(loop_cnt, 2); // loop counter
1219  m.Bind(&loop2);
1220  m.AdvanceRegister(loop_cnt, -1);
1221  m.AdvanceCurrentPosition(1);
1222  m.IfRegisterGE(loop_cnt, 0, &loop2);
1223  m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1224 
1225  Label loop3;
1226  Label exit_loop3;
1227  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1228  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1229  m.ReadCurrentPositionFromRegister(out3);
1230  m.Bind(&loop3);
1231  m.AdvanceCurrentPosition(1);
1232  m.CheckGreedyLoop(&exit_loop3);
1233  m.GoTo(&loop3);
1234  m.Bind(&exit_loop3);
1235  m.PopCurrentPosition();
1236  m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1237 
1238  m.Succeed();
1239 
1240  m.Bind(&fail);
1241  m.Fail();
1242 
1243  Handle<String> source =
1244  factory->NewStringFromAscii(CStrVector("<loop test>"));
1245  Handle<Object> code_object = m.GetCode(source);
1246  Handle<Code> code = Handle<Code>::cast(code_object);
1247 
1248  // String long enough for test (content doesn't matter).
1249  Handle<String> input =
1250  factory->NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1252  Address start_adr = seq_input->GetCharsAddress();
1253 
1254  int output[6];
1256  Execute(*code,
1257  *input,
1258  0,
1259  start_adr,
1260  start_adr + input->length(),
1261  output);
1262 
1264  CHECK_EQ(0, output[0]);
1265  CHECK_EQ(3, output[1]);
1266  CHECK_EQ(6, output[2]);
1267  CHECK_EQ(9, output[3]);
1268  CHECK_EQ(9, output[4]);
1269  CHECK_EQ(-1, output[5]);
1270 }
1271 
1272 
1273 TEST(MacroAssemblerStackOverflow) {
1275  ContextInitializer initializer;
1276  Isolate* isolate = CcTest::i_isolate();
1277  Factory* factory = isolate->factory();
1278  Zone zone(isolate);
1279 
1280  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
1281 
1282  Label loop;
1283  m.Bind(&loop);
1284  m.PushBacktrack(&loop);
1285  m.GoTo(&loop);
1286 
1287  Handle<String> source =
1288  factory->NewStringFromAscii(CStrVector("<stack overflow test>"));
1289  Handle<Object> code_object = m.GetCode(source);
1290  Handle<Code> code = Handle<Code>::cast(code_object);
1291 
1292  // String long enough for test (content doesn't matter).
1293  Handle<String> input =
1294  factory->NewStringFromAscii(CStrVector("dummy"));
1296  Address start_adr = seq_input->GetCharsAddress();
1297 
1299  Execute(*code,
1300  *input,
1301  0,
1302  start_adr,
1303  start_adr + input->length(),
1304  NULL);
1305 
1307  CHECK(isolate->has_pending_exception());
1308  isolate->clear_pending_exception();
1309 }
1310 
1311 
1312 TEST(MacroAssemblerNativeLotsOfRegisters) {
1314  ContextInitializer initializer;
1315  Isolate* isolate = CcTest::i_isolate();
1316  Factory* factory = isolate->factory();
1317  Zone zone(isolate);
1318 
1319  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2, &zone);
1320 
1321  // At least 2048, to ensure the allocated space for registers
1322  // span one full page.
1323  const int large_number = 8000;
1324  m.WriteCurrentPositionToRegister(large_number, 42);
1325  m.WriteCurrentPositionToRegister(0, 0);
1326  m.WriteCurrentPositionToRegister(1, 1);
1327  Label done;
1328  m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1329  m.Bind(&done);
1330  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1331  m.PopRegister(1);
1332  m.Succeed();
1333 
1334  Handle<String> source =
1335  factory->NewStringFromAscii(CStrVector("<huge register space test>"));
1336  Handle<Object> code_object = m.GetCode(source);
1337  Handle<Code> code = Handle<Code>::cast(code_object);
1338 
1339  // String long enough for test (content doesn't matter).
1340  Handle<String> input =
1341  factory->NewStringFromAscii(CStrVector("sample text"));
1343  Address start_adr = seq_input->GetCharsAddress();
1344 
1345  int captures[2];
1347  Execute(*code,
1348  *input,
1349  0,
1350  start_adr,
1351  start_adr + input->length(),
1352  captures);
1353 
1355  CHECK_EQ(0, captures[0]);
1356  CHECK_EQ(42, captures[1]);
1357 
1358  isolate->clear_pending_exception();
1359 }
1360 
1361 #else // V8_INTERPRETED_REGEXP
1362 
1365  byte codes[1024];
1366  Zone zone(CcTest::i_isolate());
1367  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone);
1368  // ^f(o)o.
1369  Label start, fail, backtrack;
1370 
1371  m.SetRegister(4, 42);
1372  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1373  m.AdvanceRegister(4, 42);
1374  m.GoTo(&start);
1375  m.Fail();
1376  m.Bind(&start);
1377  m.PushBacktrack(&fail);
1378  m.CheckNotAtStart(NULL);
1379  m.LoadCurrentCharacter(0, NULL);
1380  m.CheckNotCharacter('f', NULL);
1381  m.LoadCurrentCharacter(1, NULL);
1382  m.CheckNotCharacter('o', NULL);
1383  m.LoadCurrentCharacter(2, NULL);
1384  m.CheckNotCharacter('o', NULL);
1385  m.WriteCurrentPositionToRegister(0, 0);
1386  m.WriteCurrentPositionToRegister(1, 3);
1387  m.WriteCurrentPositionToRegister(2, 1);
1388  m.WriteCurrentPositionToRegister(3, 2);
1389  m.AdvanceCurrentPosition(3);
1390  m.PushBacktrack(&backtrack);
1391  m.Succeed();
1392  m.Bind(&backtrack);
1393  m.ClearRegisters(2, 3);
1394  m.Backtrack();
1395  m.Bind(&fail);
1396  m.PopRegister(0);
1397  m.Fail();
1398 
1399  Isolate* isolate = CcTest::i_isolate();
1400  Factory* factory = isolate->factory();
1401  HandleScope scope(isolate);
1402 
1403  Handle<String> source = factory->NewStringFromAscii(CStrVector("^f(o)o"));
1404  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1405  int captures[5];
1406 
1407  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1408  Handle<String> f1_16 =
1409  factory->NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1410 
1411  CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1412  CHECK_EQ(0, captures[0]);
1413  CHECK_EQ(3, captures[1]);
1414  CHECK_EQ(1, captures[2]);
1415  CHECK_EQ(2, captures[3]);
1416  CHECK_EQ(84, captures[4]);
1417 
1418  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1419  Handle<String> f2_16 =
1420  factory->NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1421 
1422  CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1423  CHECK_EQ(42, captures[0]);
1424 }
1425 
1426 #endif // V8_INTERPRETED_REGEXP
1427 
1428 
1429 TEST(AddInverseToTable) {
1431  static const int kLimit = 1000;
1432  static const int kRangeCount = 16;
1433  for (int t = 0; t < 10; t++) {
1434  Zone zone(CcTest::i_isolate());
1435  ZoneList<CharacterRange>* ranges =
1436  new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1437  for (int i = 0; i < kRangeCount; i++) {
1438  int from = PseudoRandom(t + 87, i + 25) % kLimit;
1439  int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1440  if (to > kLimit) to = kLimit;
1441  ranges->Add(CharacterRange(from, to), &zone);
1442  }
1443  DispatchTable table(&zone);
1444  DispatchTableConstructor cons(&table, false, &zone);
1445  cons.set_choice_index(0);
1446  cons.AddInverse(ranges);
1447  for (int i = 0; i < kLimit; i++) {
1448  bool is_on = false;
1449  for (int j = 0; !is_on && j < kRangeCount; j++)
1450  is_on = ranges->at(j).Contains(i);
1451  OutSet* set = table.Get(i);
1452  CHECK_EQ(is_on, set->Get(0) == false);
1453  }
1454  }
1455  Zone zone(CcTest::i_isolate());
1456  ZoneList<CharacterRange>* ranges =
1457  new(&zone) ZoneList<CharacterRange>(1, &zone);
1458  ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1459  DispatchTable table(&zone);
1460  DispatchTableConstructor cons(&table, false, &zone);
1461  cons.set_choice_index(0);
1462  cons.AddInverse(ranges);
1463  CHECK(!table.Get(0xFFFE)->Get(0));
1464  CHECK(table.Get(0xFFFF)->Get(0));
1465 }
1466 
1467 
1468 static uc32 canonicalize(uc32 c) {
1470  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1471  if (count == 0) {
1472  return c;
1473  } else {
1474  CHECK_EQ(1, count);
1475  return canon[0];
1476  }
1477 }
1478 
1479 
1480 TEST(LatinCanonicalize) {
1482  for (char lower = 'a'; lower <= 'z'; lower++) {
1483  char upper = lower + ('A' - 'a');
1484  CHECK_EQ(canonicalize(lower), canonicalize(upper));
1486  int length = un_canonicalize.get(lower, '\0', uncanon);
1487  CHECK_EQ(2, length);
1488  CHECK_EQ(upper, uncanon[0]);
1489  CHECK_EQ(lower, uncanon[1]);
1490  }
1491  for (uc32 c = 128; c < (1 << 21); c++)
1492  CHECK_GE(canonicalize(c), 128);
1494  // Canonicalization is only defined for the Basic Multilingual Plane.
1495  for (uc32 c = 0; c < (1 << 16); c++) {
1497  int length = to_upper.get(c, '\0', upper);
1498  if (length == 0) {
1499  length = 1;
1500  upper[0] = c;
1501  }
1502  uc32 u = upper[0];
1503  if (length > 1 || (c >= 128 && u < 128))
1504  u = c;
1505  CHECK_EQ(u, canonicalize(c));
1506  }
1507 }
1508 
1509 
1510 static uc32 CanonRangeEnd(uc32 c) {
1512  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1513  if (count == 0) {
1514  return c;
1515  } else {
1516  CHECK_EQ(1, count);
1517  return canon[0];
1518  }
1519 }
1520 
1521 
1522 TEST(RangeCanonicalization) {
1523  // Check that we arrive at the same result when using the basic
1524  // range canonicalization primitives as when using immediate
1525  // canonicalization.
1527  int block_start = 0;
1528  while (block_start <= 0xFFFF) {
1529  uc32 block_end = CanonRangeEnd(block_start);
1530  unsigned block_length = block_end - block_start + 1;
1531  if (block_length > 1) {
1533  int first_length = un_canonicalize.get(block_start, '\0', first);
1534  for (unsigned i = 1; i < block_length; i++) {
1536  int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1537  CHECK_EQ(first_length, succ_length);
1538  for (int j = 0; j < succ_length; j++) {
1539  int calc = first[j] + i;
1540  int found = succ[j];
1541  CHECK_EQ(calc, found);
1542  }
1543  }
1544  }
1545  block_start = block_start + block_length;
1546  }
1547 }
1548 
1549 
1550 TEST(UncanonicalizeEquivalence) {
1553  for (int i = 0; i < (1 << 16); i++) {
1554  int length = un_canonicalize.get(i, '\0', chars);
1555  for (int j = 0; j < length; j++) {
1557  int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1558  CHECK_EQ(length, length2);
1559  for (int k = 0; k < length; k++)
1560  CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1561  }
1562  }
1563 }
1564 
1565 
1566 static void TestRangeCaseIndependence(CharacterRange input,
1567  Vector<CharacterRange> expected) {
1568  Zone zone(CcTest::i_isolate());
1569  int count = expected.length();
1570  ZoneList<CharacterRange>* list =
1571  new(&zone) ZoneList<CharacterRange>(count, &zone);
1572  input.AddCaseEquivalents(list, false, &zone);
1573  CHECK_EQ(count, list->length());
1574  for (int i = 0; i < list->length(); i++) {
1575  CHECK_EQ(expected[i].from(), list->at(i).from());
1576  CHECK_EQ(expected[i].to(), list->at(i).to());
1577  }
1578 }
1579 
1580 
1581 static void TestSimpleRangeCaseIndependence(CharacterRange input,
1582  CharacterRange expected) {
1584  vector[0] = expected;
1585  TestRangeCaseIndependence(input, vector);
1586 }
1587 
1588 
1589 TEST(CharacterRangeCaseIndependence) {
1591  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1593  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1595  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1596  CharacterRange('A', 'Z'));
1597  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1598  CharacterRange('C', 'F'));
1599  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1600  CharacterRange('A', 'B'));
1601  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1602  CharacterRange('Y', 'Z'));
1603  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1604  CharacterRange('A', 'Z'));
1605  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1606  CharacterRange('a', 'z'));
1607  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1608  CharacterRange('c', 'f'));
1609  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1610  CharacterRange('a', 'z'));
1611  // Here we need to add [l-z] to complete the case independence of
1612  // [A-Za-z] but we expect [a-z] to be added since we always add a
1613  // whole block at a time.
1614  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1615  CharacterRange('a', 'z'));
1616 }
1617 
1618 
1619 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1620  if (ranges == NULL)
1621  return false;
1622  for (int i = 0; i < ranges->length(); i++) {
1623  CharacterRange range = ranges->at(i);
1624  if (range.from() <= c && c <= range.to())
1625  return true;
1626  }
1627  return false;
1628 }
1629 
1630 
1631 TEST(CharClassDifference) {
1633  Zone zone(CcTest::i_isolate());
1634  ZoneList<CharacterRange>* base =
1635  new(&zone) ZoneList<CharacterRange>(1, &zone);
1636  base->Add(CharacterRange::Everything(), &zone);
1638  ZoneList<CharacterRange>* included = NULL;
1639  ZoneList<CharacterRange>* excluded = NULL;
1640  CharacterRange::Split(base, overlay, &included, &excluded, &zone);
1641  for (int i = 0; i < (1 << 16); i++) {
1642  bool in_base = InClass(i, base);
1643  if (in_base) {
1644  bool in_overlay = false;
1645  for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1646  if (overlay[j] <= i && i < overlay[j+1])
1647  in_overlay = true;
1648  }
1649  CHECK_EQ(in_overlay, InClass(i, included));
1650  CHECK_EQ(!in_overlay, InClass(i, excluded));
1651  } else {
1652  CHECK(!InClass(i, included));
1653  CHECK(!InClass(i, excluded));
1654  }
1655  }
1656 }
1657 
1658 
1659 TEST(CanonicalizeCharacterSets) {
1661  Zone zone(CcTest::i_isolate());
1662  ZoneList<CharacterRange>* list =
1663  new(&zone) ZoneList<CharacterRange>(4, &zone);
1664  CharacterSet set(list);
1665 
1666  list->Add(CharacterRange(10, 20), &zone);
1667  list->Add(CharacterRange(30, 40), &zone);
1668  list->Add(CharacterRange(50, 60), &zone);
1669  set.Canonicalize();
1670  ASSERT_EQ(3, list->length());
1671  ASSERT_EQ(10, list->at(0).from());
1672  ASSERT_EQ(20, list->at(0).to());
1673  ASSERT_EQ(30, list->at(1).from());
1674  ASSERT_EQ(40, list->at(1).to());
1675  ASSERT_EQ(50, list->at(2).from());
1676  ASSERT_EQ(60, list->at(2).to());
1677 
1678  list->Rewind(0);
1679  list->Add(CharacterRange(10, 20), &zone);
1680  list->Add(CharacterRange(50, 60), &zone);
1681  list->Add(CharacterRange(30, 40), &zone);
1682  set.Canonicalize();
1683  ASSERT_EQ(3, list->length());
1684  ASSERT_EQ(10, list->at(0).from());
1685  ASSERT_EQ(20, list->at(0).to());
1686  ASSERT_EQ(30, list->at(1).from());
1687  ASSERT_EQ(40, list->at(1).to());
1688  ASSERT_EQ(50, list->at(2).from());
1689  ASSERT_EQ(60, list->at(2).to());
1690 
1691  list->Rewind(0);
1692  list->Add(CharacterRange(30, 40), &zone);
1693  list->Add(CharacterRange(10, 20), &zone);
1694  list->Add(CharacterRange(25, 25), &zone);
1695  list->Add(CharacterRange(100, 100), &zone);
1696  list->Add(CharacterRange(1, 1), &zone);
1697  set.Canonicalize();
1698  ASSERT_EQ(5, list->length());
1699  ASSERT_EQ(1, list->at(0).from());
1700  ASSERT_EQ(1, list->at(0).to());
1701  ASSERT_EQ(10, list->at(1).from());
1702  ASSERT_EQ(20, list->at(1).to());
1703  ASSERT_EQ(25, list->at(2).from());
1704  ASSERT_EQ(25, list->at(2).to());
1705  ASSERT_EQ(30, list->at(3).from());
1706  ASSERT_EQ(40, list->at(3).to());
1707  ASSERT_EQ(100, list->at(4).from());
1708  ASSERT_EQ(100, list->at(4).to());
1709 
1710  list->Rewind(0);
1711  list->Add(CharacterRange(10, 19), &zone);
1712  list->Add(CharacterRange(21, 30), &zone);
1713  list->Add(CharacterRange(20, 20), &zone);
1714  set.Canonicalize();
1715  ASSERT_EQ(1, list->length());
1716  ASSERT_EQ(10, list->at(0).from());
1717  ASSERT_EQ(30, list->at(0).to());
1718 }
1719 
1720 
1721 TEST(CharacterRangeMerge) {
1723  Zone zone(CcTest::i_isolate());
1724  ZoneList<CharacterRange> l1(4, &zone);
1725  ZoneList<CharacterRange> l2(4, &zone);
1726  // Create all combinations of intersections of ranges, both singletons and
1727  // longer.
1728 
1729  int offset = 0;
1730 
1731  // The five kinds of singleton intersections:
1732  // X
1733  // Y - outside before
1734  // Y - outside touching start
1735  // Y - overlap
1736  // Y - outside touching end
1737  // Y - outside after
1738 
1739  for (int i = 0; i < 5; i++) {
1740  l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1741  l2.Add(CharacterRange::Singleton(offset + i), &zone);
1742  offset += 6;
1743  }
1744 
1745  // The seven kinds of singleton/non-singleton intersections:
1746  // XXX
1747  // Y - outside before
1748  // Y - outside touching start
1749  // Y - inside touching start
1750  // Y - entirely inside
1751  // Y - inside touching end
1752  // Y - outside touching end
1753  // Y - disjoint after
1754 
1755  for (int i = 0; i < 7; i++) {
1756  l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1757  l2.Add(CharacterRange::Singleton(offset + i), &zone);
1758  offset += 8;
1759  }
1760 
1761  // The eleven kinds of non-singleton intersections:
1762  //
1763  // XXXXXXXX
1764  // YYYY - outside before.
1765  // YYYY - outside touching start.
1766  // YYYY - overlapping start
1767  // YYYY - inside touching start
1768  // YYYY - entirely inside
1769  // YYYY - inside touching end
1770  // YYYY - overlapping end
1771  // YYYY - outside touching end
1772  // YYYY - outside after
1773  // YYYYYYYY - identical
1774  // YYYYYYYYYYYY - containing entirely.
1775 
1776  for (int i = 0; i < 9; i++) {
1777  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1778  l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1779  offset += 22;
1780  }
1781  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1782  l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1783  offset += 22;
1784  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1785  l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1786  offset += 22;
1787 
1788  // Different kinds of multi-range overlap:
1789  // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1790  // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1791 
1792  l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1793  l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1794  for (int i = 0; i < 6; i++) {
1795  l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1796  l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1797  offset += 9;
1798  }
1799 
1802 
1803  ZoneList<CharacterRange> first_only(4, &zone);
1804  ZoneList<CharacterRange> second_only(4, &zone);
1805  ZoneList<CharacterRange> both(4, &zone);
1806 }
1807 
1808 
1809 TEST(Graph) {
1811  Execute("\\b\\w+\\b", false, true, true);
1812 }
byte * Address
Definition: globals.h:186
#define CHECK_MIN_MAX(input, min, max)
Definition: test-regexp.cc:142
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
Definition: flags.cc:269
OutSet * Get(uc16 value)
Definition: jsregexp.cc:5675
static bool Initialize(Deserializer *des)
Definition: v8.cc:61
bool Find(const Key &key, Locator *locator)
#define CHECK_EQ(expected, value)
Definition: checks.h:252
static Vector< const int > GetWordBounds()
Definition: jsregexp.cc:5247
bool Insert(const Key &key, Locator *locator)
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
void Sort(int(*cmp)(const T *, const T *))
Definition: utils.h:447
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1763
static Handle< T > cast(Handle< S > that)
Definition: handles.h:75
bool IsRegExpWord(uc16 c)
Handle< String > NewStringFromAscii(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.h:141
int32_t uc32
Definition: globals.h:310
void AddRange(CharacterRange range, int value, Zone *zone)
Definition: jsregexp.cc:5586
#define CHECK_PARSE_EQ(input, expected)
Definition: test-regexp.cc:140
SmartArrayPointer< const char > ToString(Zone *zone)
Definition: ast.cc:994
static CharacterRange Everything()
Definition: jsregexp.h:274
bool Get(unsigned value)
Definition: jsregexp.cc:5572
#define ASSERT(condition)
Definition: checks.h:329
static const int kMaxWidth
Definition: unicode.h:275
void clear_pending_exception()
Definition: isolate.h:579
#define CHECK(condition)
Definition: checks.h:75
Factory * factory()
Definition: isolate.h:995
#define CHECK_GE(a, b)
Definition: checks.h:261
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:50
int foo
void Add(Vector< const char > format, Vector< FmtElm > elms)
static void Split(ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
Definition: jsregexp.cc:5282
uint8_t byte
Definition: globals.h:185
Handle< String > NewStringFromUtf8(Vector< const char > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:273
const Register sp
static int Convert(uchar c, uchar n, uchar *result, bool *allow_caching_ptr)
Definition: unicode.cc:1307
static int NoValue()
Definition: test-regexp.cc:545
#define CHECK_PARSE_ERROR(input)
Definition: test-regexp.cc:139
static CharacterRange Range(uc16 from, uc16 to)
Definition: jsregexp.h:270
static const int kInfinity
Definition: ast.h:2502
static const int kMaxWidth
Definition: unicode.h:253
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5367
void AddInverse(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5950
static int Compare(int a, int b)
Definition: test-regexp.cc:546
virtual int min_match()=0
void AddCaseEquivalents(ZoneList< CharacterRange > *ranges, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:5301
#define CHECK_MAPS_EQUAL()
bool IsRegExpNewline(uc16 c)
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter trace hydrogen to given file name trace inlining decisions trace store elimination trace all use positions trace global value numbering trace hydrogen escape analysis trace the tracking of allocation sites trace map generalization environment for every instruction deoptimize every n garbage collections put a break point before deoptimizing deoptimize uncommon cases use on stack replacement trace array bounds check elimination perform array index dehoisting use load elimination use store elimination use constant folding eliminate unreachable code number of stress runs when picking a function to watch for shared function not JSFunction itself flushes the cache of optimized code for closures on every GC functions with arguments object maximum number of escape analysis fix point iterations allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms concurrent on stack replacement do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes number of stack frames inspected by the profiler percentage of ICs that must have type info to allow optimization extra verbose compilation tracing generate extra code(assertions) for debugging") DEFINE_bool(code_comments
static RegExpImpl::IrregexpResult Match(Isolate *isolate, Handle< ByteArray > code, Handle< String > subject, int *captures, int start_position)
Handle< String > NewStringFromTwoByte(Vector< const uc16 > str, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:282
int length() const
Definition: utils.h:420
#define CHECK_SIMPLE(input, simple)
Definition: test-regexp.cc:141
static i::Isolate * i_isolate()
Definition: cctest.h:102
bool has_pending_exception()
Definition: isolate.h:587
bool FindLeastGreaterThan(const Key &key, Locator *locator)
Vector< const char > CStrVector(const char *data)
Definition: utils.h:574
static const int kNoKey
Definition: test-regexp.cc:544
bool is_null() const
Definition: handles.h:81
bool FindGreatestLessThan(const Key &key, Locator *locator)
static const int kMaxWidth
Definition: unicode.h:268
uint16_t uc16
Definition: globals.h:309
#define ASSERT_EQ(v1, v2)
Definition: checks.h:330
static void DotPrint(const char *label, RegExpNode *node, bool ignore_case)
void USE(T)
Definition: globals.h:341
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
SmartArrayPointer< const char > ToCString() const
virtual int max_match()=0
static const int kMaxWidth
Definition: unicode.h:261
static void AddClassEscape(uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5199
Definition: cctest.h:83
static bool Initialize()
Definition: api.cc:4967
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:267
unsigned int uchar
Definition: unicode.h:40
static CompilationResult Compile(RegExpCompileData *input, bool ignore_case, bool global, bool multiline, Handle< String > pattern, Handle< String > sample_subject, bool is_ascii, Zone *zone)
Definition: jsregexp.cc:6001
static v8::Isolate * isolate()
Definition: cctest.h:96