28 #ifndef V8_STRING_SEARCH_H_
29 #define V8_STRING_SEARCH_H_
76 template <
typename PatternChar,
typename SubjectChar>
83 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
85 strategy_ = &FailSearch;
89 int pattern_length = pattern_.
length();
91 if (pattern_length == 1) {
92 strategy_ = &SingleCharSearch;
95 strategy_ = &LinearSearch;
98 strategy_ = &InitialSearch;
102 return strategy_(
this, subject, index);
106 if (
sizeof(PatternChar) == 1) {
110 ASSERT(
sizeof(PatternChar) == 2);
117 typedef int (*SearchFunction)(
128 static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
129 Vector<const SubjectChar> subject,
132 static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
133 Vector<const SubjectChar> subject,
136 static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
137 Vector<const SubjectChar> subject,
140 static int BoyerMooreHorspoolSearch(
141 StringSearch<PatternChar, SubjectChar>* search,
142 Vector<const SubjectChar> subject,
145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
146 Vector<const SubjectChar> subject,
149 void PopulateBoyerMooreHorspoolTable();
151 void PopulateBoyerMooreTable();
153 static inline bool exceedsOneByte(uint8_t c) {
157 static inline bool exceedsOneByte(
uint16_t c) {
161 static inline int CharOccurrence(
int* bad_char_occurrence,
162 SubjectChar char_code) {
163 if (
sizeof(SubjectChar) == 1) {
164 return bad_char_occurrence[
static_cast<int>(char_code)];
166 if (
sizeof(PatternChar) == 1) {
167 if (exceedsOneByte(char_code)) {
170 return bad_char_occurrence[
static_cast<unsigned int>(char_code)];
174 return bad_char_occurrence[equiv_class];
184 int* bad_char_table() {
185 return isolate_->bad_char_shift_table();
189 int* good_suffix_shift_table() {
192 return isolate_->good_suffix_shift_table() - start_;
197 int* suffix_table() {
200 return isolate_->suffix_table() - start_;
205 Vector<const PatternChar> pattern_;
207 SearchFunction strategy_;
217 template <
typename PatternChar,
typename SubjectChar>
218 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
219 StringSearch<PatternChar, SubjectChar>* search,
220 Vector<const SubjectChar> subject,
223 PatternChar pattern_first_char = search->pattern_[0];
225 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
226 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
227 memchr(subject.start() + i,
229 subject.length() - i));
230 if (pos ==
NULL)
return -1;
231 return static_cast<int>(pos - subject.start());
233 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
234 if (exceedsOneByte(pattern_first_char)) {
238 SubjectChar search_char =
static_cast<SubjectChar
>(pattern_first_char);
239 int n = subject.length();
241 if (subject[i++] == search_char)
return i - 1;
252 template <
typename PatternChar,
typename SubjectChar>
254 const SubjectChar* subject,
259 if (pattern[pos] != subject[pos]) {
263 }
while (pos < length);
269 template <
typename PatternChar,
typename SubjectChar>
271 StringSearch<PatternChar, SubjectChar>* search,
272 Vector<const SubjectChar> subject,
274 Vector<const PatternChar> pattern = search->pattern_;
275 ASSERT(pattern.length() > 1);
276 int pattern_length = pattern.length();
277 PatternChar pattern_first_char = pattern[0];
279 int n = subject.length() - pattern_length;
281 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
282 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
283 memchr(subject.start() + i,
286 if (pos ==
NULL)
return -1;
287 i =
static_cast<int>(pos - subject.start()) + 1;
289 if (subject[i++] != pattern_first_char)
continue;
295 pattern_length - 1)) {
306 template <
typename PatternChar,
typename SubjectChar>
307 int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
308 StringSearch<PatternChar, SubjectChar>* search,
309 Vector<const SubjectChar> subject,
311 Vector<const PatternChar> pattern = search->pattern_;
312 int subject_length = subject.length();
313 int pattern_length = pattern.length();
315 int start = search->start_;
317 int* bad_char_occurence = search->bad_char_table();
318 int* good_suffix_shift = search->good_suffix_shift_table();
320 PatternChar last_char = pattern[pattern_length - 1];
321 int index = start_index;
323 while (index <= subject_length - pattern_length) {
324 int j = pattern_length - 1;
326 while (last_char != (c = subject[index + j])) {
328 j - CharOccurrence(bad_char_occurence, c);
330 if (index > subject_length - pattern_length) {
334 while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
337 }
else if (j < start) {
340 index += pattern_length - 1
341 - CharOccurrence(bad_char_occurence,
342 static_cast<SubjectChar>(last_char));
344 int gs_shift = good_suffix_shift[j + 1];
346 CharOccurrence(bad_char_occurence, c);
347 int shift = j - bc_occ;
348 if (gs_shift > shift) {
359 template <
typename PatternChar,
typename SubjectChar>
360 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
361 int pattern_length = pattern_.length();
362 const PatternChar* pattern = pattern_.start();
366 int length = pattern_length - start;
370 int* shift_table = good_suffix_shift_table();
371 int* suffix_table = this->suffix_table();
374 for (
int i = start; i < pattern_length; i++) {
375 shift_table[i] = length;
377 shift_table[pattern_length] = 1;
378 suffix_table[pattern_length] = pattern_length + 1;
380 if (pattern_length <= start) {
385 PatternChar last_char = pattern[pattern_length - 1];
386 int suffix = pattern_length + 1;
388 int i = pattern_length;
390 PatternChar c = pattern[i - 1];
391 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
392 if (shift_table[suffix] == length) {
393 shift_table[suffix] = suffix - i;
395 suffix = suffix_table[suffix];
397 suffix_table[--i] = --suffix;
398 if (suffix == pattern_length) {
400 while ((i > start) && (pattern[i - 1] != last_char)) {
401 if (shift_table[pattern_length] == length) {
402 shift_table[pattern_length] = pattern_length - i;
404 suffix_table[--i] = pattern_length;
407 suffix_table[--i] = --suffix;
413 if (suffix < pattern_length) {
414 for (
int i = start; i <= pattern_length; i++) {
415 if (shift_table[i] == length) {
416 shift_table[i] = suffix - start;
419 suffix = suffix_table[suffix];
429 template <
typename PatternChar,
typename SubjectChar>
430 int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
431 StringSearch<PatternChar, SubjectChar>* search,
432 Vector<const SubjectChar> subject,
434 Vector<const PatternChar> pattern = search->pattern_;
435 int subject_length = subject.length();
436 int pattern_length = pattern.length();
437 int* char_occurrences = search->bad_char_table();
438 int badness = -pattern_length;
441 PatternChar last_char = pattern[pattern_length - 1];
442 int last_char_shift = pattern_length - 1 -
443 CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
445 int index = start_index;
446 while (index <= subject_length - pattern_length) {
447 int j = pattern_length - 1;
449 while (last_char != (subject_char = subject[index + j])) {
450 int bc_occ = CharOccurrence(char_occurrences, subject_char);
451 int shift = j - bc_occ;
453 badness += 1 -
shift;
454 if (index > subject_length - pattern_length) {
459 while (j >= 0 && pattern[j] == (subject[index + j])) j--;
463 index += last_char_shift;
468 badness += (pattern_length - j) - last_char_shift;
470 search->PopulateBoyerMooreTable();
471 search->strategy_ = &BoyerMooreSearch;
472 return BoyerMooreSearch(search, subject, index);
480 template <
typename PatternChar,
typename SubjectChar>
481 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
482 int pattern_length = pattern_.length();
484 int* bad_char_occurrence = bad_char_table();
491 int table_size = AlphabetSize();
493 memset(bad_char_occurrence,
495 table_size *
sizeof(*bad_char_occurrence));
497 for (
int i = 0; i < table_size; i++) {
498 bad_char_occurrence[i] = start - 1;
501 for (
int i = start; i < pattern_length - 1; i++) {
502 PatternChar c = pattern_[i];
503 int bucket = (
sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
504 bad_char_occurrence[bucket] = i;
514 template <
typename PatternChar,
typename SubjectChar>
515 int StringSearch<PatternChar, SubjectChar>::InitialSearch(
516 StringSearch<PatternChar, SubjectChar>* search,
517 Vector<const SubjectChar> subject,
519 Vector<const PatternChar> pattern = search->pattern_;
520 int pattern_length = pattern.length();
524 int badness = -10 - (pattern_length << 2);
528 PatternChar pattern_first_char = pattern[0];
529 for (
int i = index, n = subject.length() - pattern_length; i <= n; i++) {
532 if (
sizeof(SubjectChar) == 1 &&
sizeof(PatternChar) == 1) {
533 const SubjectChar* pos =
reinterpret_cast<const SubjectChar*
>(
534 memchr(subject.start() + i,
540 i =
static_cast<int>(pos - subject.start());
542 if (subject[i] != pattern_first_char)
continue;
546 if (pattern[j] != subject[i + j]) {
550 }
while (j < pattern_length);
551 if (j == pattern_length) {
556 search->PopulateBoyerMooreHorspoolTable();
557 search->strategy_ = &BoyerMooreHorspoolSearch;
558 return BoyerMooreHorspoolSearch(search, subject, i);
569 template <
typename SubjectChar,
typename PatternChar>
575 return search.
Search(subject, start_index);
580 #endif // V8_STRING_SEARCH_H_
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
static int AlphabetSize()
bool CharCompare(const PatternChar *pattern, const SubjectChar *subject, int length)
static const int kBMMaxShift
static bool IsOneByteString(Vector< const uint8_t > string)
int Search(Vector< const SubjectChar > subject, int index)
#define ASSERT(condition)
static const uint32_t kMaxOneByteCharCodeU
static bool IsOneByte(const uc16 *chars, int length)
static const int kAsciiAlphabetSize
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array shift
static const int kUC16AlphabetSize
StringSearch(Isolate *isolate, Vector< const PatternChar > pattern)
static bool IsOneByteString(Vector< const uc16 > string)
int SearchString(Isolate *isolate, Vector< const SubjectChar > subject, Vector< const PatternChar > pattern, int start_index)
#define ASSERT_EQ(v1, v2)
static const int kBMMaxShift
static const int kUC16AlphabetSize
static const int kBMMinPatternLength
int LinearSearch(T *array, Name *name, int len, int valid_entries)