5 #ifndef SRC_STRING_SEARCH_H_ 6 #define SRC_STRING_SEARCH_H_ 8 #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS 14 namespace stringsearch {
24 static const uint32_t kMaxOneByteCharCodeU = 0xff;
29 Vector(T*
data,
size_t length,
bool isForward)
30 : start_(data), length_(length), is_forward_(isForward) {
31 CHECK(length > 0 && data !=
nullptr);
36 const T* start()
const {
return start_; }
39 size_t length()
const {
return length_; }
43 size_t forward()
const {
return is_forward_; }
46 T& operator[](
size_t index)
const {
47 CHECK(index < length_);
48 return start_[is_forward_ ? index : (length_ - index - 1)];
64 class StringSearchBase {
70 static const int kBMMaxShift = 250;
79 static const int kLatin1AlphabetSize = 256;
80 static const int kUC16AlphabetSize = 256;
85 static const int kBMMinPatternLength = 8;
88 static int kBadCharShiftTable[kUC16AlphabetSize];
90 static int kGoodSuffixShiftTable[kBMMaxShift + 1];
93 static int kSuffixTable[kBMMaxShift + 1];
96 template <
typename Char>
97 class StringSearch :
private StringSearchBase {
99 explicit StringSearch(Vector<const Char> pattern)
100 : pattern_(pattern), start_(0) {
101 if (pattern.length() >= kBMMaxShift) {
102 start_ = pattern.length() - kBMMaxShift;
105 size_t pattern_length = pattern_.length();
106 CHECK_GT(pattern_length, 0);
107 if (pattern_length < kBMMinPatternLength) {
108 if (pattern_length == 1) {
109 strategy_ = &SingleCharSearch;
112 strategy_ = &LinearSearch;
115 strategy_ = &InitialSearch;
118 size_t Search(Vector<const Char> subject,
size_t index) {
119 return strategy_(
this, subject, index);
122 static inline int AlphabetSize() {
123 if (
sizeof(Char) == 1) {
125 return kLatin1AlphabetSize;
128 return kUC16AlphabetSize;
131 static_assert(
sizeof(Char) ==
sizeof(uint8_t) ||
132 sizeof(Char) ==
sizeof(uint16_t),
133 "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");
137 typedef size_t (*SearchFunction)(
142 static size_t SingleCharSearch(StringSearch<Char>* search,
143 Vector<const Char> subject,
146 static size_t LinearSearch(StringSearch<Char>* search,
147 Vector<const Char> subject,
150 static size_t InitialSearch(StringSearch<Char>* search,
151 Vector<const Char> subject,
154 static size_t BoyerMooreHorspoolSearch(
155 StringSearch<Char>* search,
156 Vector<const Char> subject,
159 static size_t BoyerMooreSearch(StringSearch<Char>* search,
160 Vector<const Char> subject,
163 void PopulateBoyerMooreHorspoolTable();
165 void PopulateBoyerMooreTable();
167 static inline int CharOccurrence(
int* bad_char_occurrence,
169 if (
sizeof(Char) == 1) {
170 return bad_char_occurrence[
static_cast<int>(char_code)];
173 int equiv_class = char_code % kUC16AlphabetSize;
174 return bad_char_occurrence[equiv_class];
180 int* bad_char_table() {
return kBadCharShiftTable; }
183 int* good_suffix_shift_table() {
186 return kGoodSuffixShiftTable - start_;
191 int* suffix_table() {
194 return kSuffixTable - start_;
198 Vector<const Char> pattern_;
200 SearchFunction strategy_;
206 template <
typename T,
typename U>
207 inline T AlignDown(T value, U alignment) {
208 return reinterpret_cast<T
>(
209 (
reinterpret_cast<uintptr_t
>(value) & ~(alignment - 1)));
213 inline uint8_t GetHighestValueByte(uint16_t character) {
214 return Max(static_cast<uint8_t>(character & 0xFF),
215 static_cast<uint8_t>(character >> 8));
219 inline uint8_t GetHighestValueByte(uint8_t character) {
return character; }
225 inline const void* MemrchrFill(
const void* haystack, uint8_t needle,
226 size_t haystack_len) {
228 return memrchr(haystack, needle, haystack_len);
230 const uint8_t* haystack8 =
static_cast<const uint8_t*
>(haystack);
231 for (
size_t i = haystack_len - 1; i !=
static_cast<size_t>(-1); i--) {
232 if (haystack8[i] == needle) {
233 return haystack8 + i;
243 template <
typename Char>
244 inline size_t FindFirstCharacter(Vector<const Char> pattern,
245 Vector<const Char> subject,
size_t index) {
246 const Char pattern_first_char = pattern[0];
247 const size_t max_n = (subject.length() - pattern.length() + 1);
251 const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
254 const size_t bytes_to_search = (max_n - pos) *
sizeof(Char);
255 const void* void_pos;
256 if (subject.forward()) {
258 CHECK_LE(pos, max_n);
259 CHECK_LE(max_n - pos, SIZE_MAX /
sizeof(Char));
260 void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);
262 CHECK_LE(pos, subject.length());
263 CHECK_LE(subject.length() - pos, SIZE_MAX /
sizeof(Char));
264 void_pos = MemrchrFill(subject.start() + pattern.length() - 1,
268 const Char* char_pos =
static_cast<const Char*
>(void_pos);
269 if (char_pos ==
nullptr)
270 return subject.length();
273 char_pos = AlignDown(char_pos,
sizeof(Char));
274 size_t raw_pos =
static_cast<size_t>(char_pos - subject.start());
275 pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);
276 if (subject[pos] == pattern_first_char) {
281 }
while (++pos < max_n);
283 return subject.length();
290 inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
291 Vector<const uint8_t> subject,
293 const uint8_t pattern_first_char = pattern[0];
294 const size_t subj_len = subject.length();
295 const size_t max_n = (subject.length() - pattern.length() + 1);
298 if (subject.forward()) {
299 pos = memchr(subject.start() + index, pattern_first_char, max_n - index);
301 pos = MemrchrFill(subject.start() + pattern.length() - 1,
305 const uint8_t* char_pos =
static_cast<const uint8_t*
>(pos);
306 if (char_pos ==
nullptr) {
310 size_t raw_pos =
static_cast<size_t>(char_pos - subject.start());
311 return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);
318 template <
typename Char>
319 size_t StringSearch<Char>::SingleCharSearch(
320 StringSearch<Char>* search,
321 Vector<const Char> subject,
323 CHECK_EQ(1, search->pattern_.length());
324 return FindFirstCharacter(search->pattern_, subject, index);
332 template <
typename Char>
333 size_t StringSearch<Char>::LinearSearch(
334 StringSearch<Char>* search,
335 Vector<const Char> subject,
337 Vector<const Char> pattern = search->pattern_;
338 CHECK_GT(pattern.length(), 1);
339 const size_t pattern_length = pattern.length();
340 const size_t n = subject.length() - pattern_length;
341 for (
size_t i = index; i <=
n; i++) {
342 i = FindFirstCharacter(pattern, subject, i);
343 if (i == subject.length())
344 return subject.length();
348 for (
size_t j = 1; j < pattern_length; j++) {
349 if (pattern[j] != subject[i + j]) {
358 return subject.length();
365 template <
typename Char>
366 size_t StringSearch<Char>::BoyerMooreSearch(
367 StringSearch<Char>* search,
368 Vector<const Char> subject,
369 size_t start_index) {
370 Vector<const Char> pattern = search->pattern_;
371 const size_t subject_length = subject.length();
372 const size_t pattern_length = pattern.length();
374 size_t start = search->start_;
376 int* bad_char_occurrence = search->bad_char_table();
377 int* good_suffix_shift = search->good_suffix_shift_table();
379 Char last_char = pattern[pattern_length - 1];
380 size_t index = start_index;
382 while (index <= subject_length - pattern_length) {
383 size_t j = pattern_length - 1;
385 while (last_char != (c = subject[index + j])) {
386 int shift = j - CharOccurrence(bad_char_occurrence, c);
388 if (index > subject_length - pattern_length) {
389 return subject.length();
392 while (pattern[j] == (c = subject[index + j])) {
401 index += pattern_length - 1 -
402 CharOccurrence(bad_char_occurrence,
403 static_cast<Char>(last_char));
405 int gs_shift = good_suffix_shift[j + 1];
406 int bc_occ = CharOccurrence(bad_char_occurrence, c);
407 int shift = j - bc_occ;
408 if (gs_shift > shift) {
415 return subject.length();
418 template <
typename Char>
419 void StringSearch<Char>::PopulateBoyerMooreTable() {
420 const size_t pattern_length = pattern_.length();
421 Vector<const Char> pattern = pattern_;
424 const size_t start = start_;
425 const size_t length = pattern_length - start;
429 int* shift_table = good_suffix_shift_table();
430 int* suffix_table = this->suffix_table();
433 for (
size_t i = start; i < pattern_length; i++) {
434 shift_table[i] = length;
436 shift_table[pattern_length] = 1;
437 suffix_table[pattern_length] = pattern_length + 1;
439 if (pattern_length <= start) {
444 Char last_char = pattern_[pattern_length - 1];
445 size_t suffix = pattern_length + 1;
447 size_t i = pattern_length;
449 Char c = pattern[i - 1];
450 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
451 if (static_cast<size_t>(shift_table[suffix]) == length) {
452 shift_table[suffix] = suffix - i;
454 suffix = suffix_table[suffix];
456 suffix_table[--i] = --suffix;
457 if (suffix == pattern_length) {
459 while ((i > start) && (pattern[i - 1] != last_char)) {
460 if (static_cast<size_t>(shift_table[pattern_length]) == length) {
461 shift_table[pattern_length] = pattern_length - i;
463 suffix_table[--i] = pattern_length;
466 suffix_table[--i] = --suffix;
472 if (suffix < pattern_length) {
473 for (
size_t i = start; i <= pattern_length; i++) {
474 if (static_cast<size_t>(shift_table[i]) == length) {
475 shift_table[i] = suffix - start;
478 suffix = suffix_table[suffix];
488 template <
typename Char>
489 size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
490 StringSearch<Char>* search,
491 Vector<const Char> subject,
492 size_t start_index) {
493 Vector<const Char> pattern = search->pattern_;
494 const size_t subject_length = subject.length();
495 const size_t pattern_length = pattern.length();
496 int* char_occurrences = search->bad_char_table();
497 int64_t badness = -pattern_length;
500 Char last_char = pattern[pattern_length - 1];
501 int last_char_shift =
503 CharOccurrence(char_occurrences, static_cast<Char>(last_char));
506 size_t index = start_index;
507 while (index <= subject_length - pattern_length) {
508 size_t j = pattern_length - 1;
510 while (last_char != (subject_char = subject[index + j])) {
511 int bc_occ = CharOccurrence(char_occurrences, subject_char);
512 int shift = j - bc_occ;
514 badness += 1 - shift;
515 if (index > subject_length - pattern_length) {
516 return subject_length;
520 while (pattern[j] == (subject[index + j])) {
526 index += last_char_shift;
531 badness += (pattern_length - j) - last_char_shift;
533 search->PopulateBoyerMooreTable();
534 search->strategy_ = &BoyerMooreSearch;
535 return BoyerMooreSearch(search, subject, index);
538 return subject.length();
541 template <
typename Char>
542 void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
543 const size_t pattern_length = pattern_.length();
545 int* bad_char_occurrence = bad_char_table();
548 const size_t start = start_;
552 const size_t table_size = AlphabetSize();
555 memset(bad_char_occurrence, -1, table_size *
sizeof(*bad_char_occurrence));
557 for (
size_t i = 0; i < table_size; i++) {
558 bad_char_occurrence[i] = start - 1;
561 for (
size_t i = start; i < pattern_length - 1; i++) {
562 Char c = pattern_[i];
563 int bucket = (
sizeof(Char) == 1) ? c : c % AlphabetSize();
564 bad_char_occurrence[bucket] = i;
574 template <
typename Char>
575 size_t StringSearch<Char>::InitialSearch(
576 StringSearch<Char>* search,
577 Vector<const Char> subject,
579 Vector<const Char> pattern = search->pattern_;
580 const size_t pattern_length = pattern.length();
584 int64_t badness = -10 - (pattern_length << 2);
588 for (
size_t i = index, n = subject.length() - pattern_length; i <=
n; i++) {
591 i = FindFirstCharacter(pattern, subject, i);
592 if (i == subject.length())
593 return subject.length();
597 if (pattern[j] != subject[i + j]) {
601 }
while (j < pattern_length);
602 if (j == pattern_length) {
607 search->PopulateBoyerMooreHorspoolTable();
608 search->strategy_ = &BoyerMooreHorspoolSearch;
609 return BoyerMooreHorspoolSearch(search, subject, i);
612 return subject.length();
619 template <
typename Char>
620 size_t SearchString(Vector<const Char> subject,
621 Vector<const Char> pattern,
622 size_t start_index) {
623 StringSearch<Char> search(pattern);
624 return search.Search(subject, start_index);
630 using node::stringsearch::Vector;
632 template <
typename Char>
633 size_t SearchString(
const Char* haystack,
634 size_t haystack_length,
636 size_t needle_length,
643 Vector<const Char> v_needle = Vector<const Char>(
644 needle, needle_length, is_forward);
645 Vector<const Char> v_haystack = Vector<const Char>(
646 haystack, haystack_length, is_forward);
647 CHECK(haystack_length >= needle_length);
648 size_t diff = haystack_length - needle_length;
649 size_t relative_start_index;
651 relative_start_index = start_index;
652 }
else if (diff < start_index) {
653 relative_start_index = 0;
655 relative_start_index = diff - start_index;
657 size_t pos = node::stringsearch::SearchString(
658 v_haystack, v_needle, relative_start_index);
659 if (pos == haystack_length) {
663 return is_forward ? pos : (haystack_length - needle_length - pos);
667 #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS 669 #endif // SRC_STRING_SEARCH_H_
union node::cares_wrap::@8::CaresAsyncData::@0 data