28 #ifndef V8_STRING_SEARCH_H_
29 #define V8_STRING_SEARCH_H_
76 template <
typename PatternChar,
typename SubjectChar>
83 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
85 strategy_ = &FailSearch;
89 int pattern_length = pattern_.
length();
91 if (pattern_length == 1) {
92 strategy_ = &SingleCharSearch;
95 strategy_ = &LinearSearch;
98 strategy_ = &InitialSearch;
102 return strategy_(
this, subject, index);
106 if (
sizeof(PatternChar) == 1) {
110 ASSERT(
sizeof(PatternChar) == 2);
117 typedef int (*SearchFunction)(
128 static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
129 Vector<const SubjectChar> subject,
132 static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
133 Vector<const SubjectChar> subject,
136 static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
137 Vector<const SubjectChar> subject,
140 static int BoyerMooreHorspoolSearch(
141 StringSearch<PatternChar, SubjectChar>* search,
142 Vector<const SubjectChar> subject,
145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
146 Vector<const SubjectChar> subject,
149 void PopulateBoyerMooreHorspoolTable();
151 void PopulateBoyerMooreTable();
153 static inline int CharOccurrence(
int* bad_char_occurrence,
154 SubjectChar char_code) {
155 if (
sizeof(SubjectChar) == 1) {
156 return bad_char_occurrence[
static_cast<int>(char_code)];
158 if (
sizeof(PatternChar) == 1) {
162 return bad_char_occurrence[
static_cast<unsigned int>(char_code)];
166 return bad_char_occurrence[equiv_class];
176 int* bad_char_table() {
177 return isolate_->bad_char_shift_table();
181 int* good_suffix_shift_table() {
184 return isolate_->good_suffix_shift_table() - start_;
189 int* suffix_table() {
192 return isolate_->suffix_table() - start_;
197 Vector<const PatternChar> pattern_;
199 SearchFunction strategy_;
209 template <
typename PatternChar,
typename SubjectChar>
210 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
211 StringSearch<PatternChar, SubjectChar>* search,
212 Vector<const SubjectChar> subject,
215 PatternChar pattern_first_char = search->pattern_[0];
217 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
218 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
219 memchr(subject.start() + i,
221 subject.length() - i));
222 if (pos ==
NULL)
return -1;
223 return static_cast<int>(pos - subject.start());
225 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
230 SubjectChar search_char =
static_cast<SubjectChar
>(pattern_first_char);
231 int n = subject.length();
233 if (subject[i++] == search_char)
return i - 1;
244 template <
typename PatternChar,
typename SubjectChar>
246 const SubjectChar* subject,
251 if (pattern[pos] != subject[pos]) {
255 }
while (pos < length);
261 template <
typename PatternChar,
typename SubjectChar>
262 int StringSearch<PatternChar, SubjectChar>::LinearSearch(
263 StringSearch<PatternChar, SubjectChar>* search,
264 Vector<const SubjectChar> subject,
266 Vector<const PatternChar> pattern = search->pattern_;
267 ASSERT(pattern.length() > 1);
268 int pattern_length = pattern.length();
269 PatternChar pattern_first_char = pattern[0];
271 int n = subject.length() - pattern_length;
273 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
274 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
275 memchr(subject.start() + i,
278 if (pos ==
NULL)
return -1;
279 i =
static_cast<int>(pos - subject.start()) + 1;
281 if (subject[i++] != pattern_first_char)
continue;
287 pattern_length - 1)) {
298 template <
typename PatternChar,
typename SubjectChar>
299 int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
300 StringSearch<PatternChar, SubjectChar>* search,
301 Vector<const SubjectChar> subject,
303 Vector<const PatternChar> pattern = search->pattern_;
304 int subject_length = subject.length();
305 int pattern_length = pattern.length();
307 int start = search->start_;
309 int* bad_char_occurence = search->bad_char_table();
310 int* good_suffix_shift = search->good_suffix_shift_table();
312 PatternChar last_char = pattern[pattern_length - 1];
313 int index = start_index;
315 while (index <= subject_length - pattern_length) {
316 int j = pattern_length - 1;
318 while (last_char != (c = subject[index + j])) {
320 j - CharOccurrence(bad_char_occurence, c);
322 if (index > subject_length - pattern_length) {
326 while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
329 }
else if (j < start) {
332 index += pattern_length - 1
333 - CharOccurrence(bad_char_occurence,
334 static_cast<SubjectChar>(last_char));
336 int gs_shift = good_suffix_shift[j + 1];
338 CharOccurrence(bad_char_occurence, c);
339 int shift = j - bc_occ;
340 if (gs_shift > shift) {
351 template <
typename PatternChar,
typename SubjectChar>
352 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
353 int pattern_length = pattern_.length();
354 const PatternChar* pattern = pattern_.start();
358 int length = pattern_length - start;
362 int* shift_table = good_suffix_shift_table();
363 int* suffix_table = this->suffix_table();
366 for (
int i = start; i < pattern_length; i++) {
367 shift_table[i] = length;
369 shift_table[pattern_length] = 1;
370 suffix_table[pattern_length] = pattern_length + 1;
372 if (pattern_length <= start) {
377 PatternChar last_char = pattern[pattern_length - 1];
378 int suffix = pattern_length + 1;
380 int i = pattern_length;
382 PatternChar c = pattern[i - 1];
383 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
384 if (shift_table[suffix] == length) {
385 shift_table[suffix] = suffix - i;
387 suffix = suffix_table[suffix];
389 suffix_table[--i] = --suffix;
390 if (suffix == pattern_length) {
392 while ((i > start) && (pattern[i - 1] != last_char)) {
393 if (shift_table[pattern_length] == length) {
394 shift_table[pattern_length] = pattern_length - i;
396 suffix_table[--i] = pattern_length;
399 suffix_table[--i] = --suffix;
405 if (suffix < pattern_length) {
406 for (
int i = start; i <= pattern_length; i++) {
407 if (shift_table[i] == length) {
408 shift_table[i] = suffix - start;
411 suffix = suffix_table[suffix];
421 template <
typename PatternChar,
typename SubjectChar>
422 int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
423 StringSearch<PatternChar, SubjectChar>* search,
424 Vector<const SubjectChar> subject,
426 Vector<const PatternChar> pattern = search->pattern_;
427 int subject_length = subject.length();
428 int pattern_length = pattern.length();
429 int* char_occurrences = search->bad_char_table();
430 int badness = -pattern_length;
433 PatternChar last_char = pattern[pattern_length - 1];
434 int last_char_shift = pattern_length - 1 -
435 CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
437 int index = start_index;
438 while (index <= subject_length - pattern_length) {
439 int j = pattern_length - 1;
441 while (last_char != (subject_char = subject[index + j])) {
442 int bc_occ = CharOccurrence(char_occurrences, subject_char);
443 int shift = j - bc_occ;
445 badness += 1 - shift;
446 if (index > subject_length - pattern_length) {
451 while (j >= 0 && pattern[j] == (subject[index + j])) j--;
455 index += last_char_shift;
460 badness += (pattern_length - j) - last_char_shift;
462 search->PopulateBoyerMooreTable();
463 search->strategy_ = &BoyerMooreSearch;
464 return BoyerMooreSearch(search, subject, index);
472 template <
typename PatternChar,
typename SubjectChar>
473 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
474 int pattern_length = pattern_.length();
476 int* bad_char_occurrence = bad_char_table();
483 int table_size = AlphabetSize();
485 memset(bad_char_occurrence,
487 table_size *
sizeof(*bad_char_occurrence));
489 for (
int i = 0; i < table_size; i++) {
490 bad_char_occurrence[i] = start - 1;
493 for (
int i = start; i < pattern_length - 1; i++) {
494 PatternChar c = pattern_[i];
495 int bucket = (
sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
496 bad_char_occurrence[bucket] = i;
506 template <
typename PatternChar,
typename SubjectChar>
507 int StringSearch<PatternChar, SubjectChar>::InitialSearch(
508 StringSearch<PatternChar, SubjectChar>* search,
509 Vector<const SubjectChar> subject,
511 Vector<const PatternChar> pattern = search->pattern_;
512 int pattern_length = pattern.length();
516 int badness = -10 - (pattern_length << 2);
520 PatternChar pattern_first_char = pattern[0];
521 for (
int i = index, n = subject.length() - pattern_length; i <= n; i++) {
524 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
525 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
526 memchr(subject.start() + i,
532 i =
static_cast<int>(pos - subject.start());
534 if (subject[i] != pattern_first_char)
continue;
538 if (pattern[j] != subject[i + j]) {
542 }
while (j < pattern_length);
543 if (j == pattern_length) {
548 search->PopulateBoyerMooreHorspoolTable();
549 search->strategy_ = &BoyerMooreHorspoolSearch;
550 return BoyerMooreHorspoolSearch(search, subject, i);
561 template <
typename SubjectChar,
typename PatternChar>
567 return search.
Search(subject, start_index);
572 #endif // V8_STRING_SEARCH_H_
static int AlphabetSize()
bool CharCompare(const PatternChar *pattern, const SubjectChar *subject, int length)
static const int kBMMaxShift
int Search(Vector< const SubjectChar > subject, int index)
#define ASSERT(condition)
static bool IsAscii(const char *chars, int length)
static const int kAsciiAlphabetSize
static const int kUC16AlphabetSize
StringSearch(Isolate *isolate, Vector< const PatternChar > pattern)
int SearchString(Isolate *isolate, Vector< const SubjectChar > subject, Vector< const PatternChar > pattern, int start_index)
#define ASSERT_EQ(v1, v2)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination trace on stack replacement optimize closures functions with arguments object optimize functions containing for in loops profiler considers IC stability primitive functions trigger their own optimization re try self optimization if it failed insert an interrupt check at function exit execution budget before interrupt is triggered call count before self optimization self_optimization count_based_interrupts weighted_back_edges trace_opt emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 enable use of ARMv7 instructions if enable use of MIPS FPU instructions if NULL
static const int kBMMaxShift
static bool IsAsciiString(Vector< const char >)
static const unsigned kMaxAsciiCharCodeU
static const int kUC16AlphabetSize
static bool IsAsciiString(Vector< const uc16 > string)
static const int kBMMinPatternLength