From 1cb68a06f8c0ea3ad29cab6efe5ac673817596ce Mon Sep 17 00:00:00 2001 From: pthier Date: Tue, 6 May 2025 10:28:03 +0200 Subject: [PATCH] Reland "[regexp] Simdify global atom match with single character pattern" This is a reland of commit 36f07e9a04484dd4b97713f8e821d3b83ade8f53 Changes since revert: Accumulate number of matches after a cache hit instead of overwriting them. Original change's description: > [regexp] Simdify global atom match with single character pattern > > Use highway to find matching characters for RegExp with a single > character atom pattern. > > Bug: 413411337 > Change-Id: I9bf686aca2da37025613a9227eb0ec69176a676f > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6487695 > Reviewed-by: Jakob Linke > Commit-Queue: Patrick Thier > Cr-Commit-Position: refs/heads/main@{#100006} Fixed: 414857029 Bug: 413411337 Change-Id: I3ebd72f3b91ce5e7b603e43540cd4e10090c1868 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6513551 Reviewed-by: Camillo Bruni Commit-Queue: Patrick Thier Cr-Commit-Position: refs/heads/main@{#100088} --- src/runtime/runtime-regexp.cc | 95 ++++++++++++++++++++++++++++++++--- test/mjsunit/regexp-global.js | 24 +++++++++ 2 files changed, 112 insertions(+), 7 deletions(-) diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc index 5689cdc8d1db..e5cee437d1c3 100644 --- a/v8/src/runtime/runtime-regexp.cc +++ b/v8/src/runtime/runtime-regexp.cc @@ -3,7 +3,9 @@ // found in the LICENSE file. #include +#include +#include "hwy/highway.h" #include "src/base/small-vector.h" #include "src/base/strings.h" #include "src/common/message-template.h" @@ -2164,13 +2166,92 @@ inline void RegExpMatchGlobalAtom_OneCharPattern( Isolate* isolate, base::Vector subject, const PChar pattern, int start_index, int* number_of_matches, int* last_match_index, const DisallowGarbageCollection& no_gc) { - for (int i = start_index; i < subject.length(); i++) { - // Subtle: the valid variants are {SChar,PChar} in: - // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case, - // we cast the uint8_t pattern to uc16 for the comparison. - if (subject[i] != static_cast(pattern)) continue; - (*number_of_matches)++; - (*last_match_index) = i; + static_assert(std::is_unsigned_v); + static_assert(std::is_unsigned_v); + // We can utilize SIMD to check multiple characters at once. + // Since the pattern is a single char, we create a mask setting each lane in + // the vector to the pattern char. + // Since reductions from a vector to a general purpose register (i.e. + // ReduceSum in this algorithm) are expensive, we keep a count for each lane + // in a vector until the count could potentially overflow and only reduce to + // a general purpose register then. I.e. if SChar is uint8_t, we have a + // 16xuint8_t vector to count matches, which we reduce to an int every 255 + // blocks. + namespace hw = hwy::HWY_NAMESPACE; + hw::ScalableTag tag; + // We need a wider tag to avoid overflows on lanes when summing up submatches. + using WidenedTag = hw::RepartitionToWide; + WidenedTag sum_tag; + static constexpr size_t stride = hw::Lanes(tag); + // Subtle: the valid variants are {SChar,PChar} in: + // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case, + // we cast the uint8_t pattern to uc16 for the comparison. + const auto mask = hw::Set(tag, static_cast(pattern)); + + int matches = 0; + auto submatches = hw::Zero(tag); + const SChar* last_match_block = nullptr; + hw::Mask last_match_vec; + + const SChar* block = subject.data() + start_index; + const SChar* end = subject.data() + subject.length(); + + // ReduceSum is expensive, so we gather matches into a vector. max_count is + // the maximum number of matches we can count in the vector before it + // overflows. + int max_count = std::numeric_limits::max(); + while (block + stride * max_count <= end) { + for (int i = 0; i < max_count; i++, block += stride) { + const auto input = hw::LoadU(tag, block); + const auto match = input == mask; + // Lanes with matches have all bits set, so we subtract to increase the + // count by 1. + submatches = hw::Sub(submatches, hw::VecFromMask(tag, match)); + if (!hw::AllFalse(tag, match)) { + last_match_block = block; + last_match_vec = match; + } + } + // SumsOf2 promotes the sum of 2 consecutive lanes into a wider lane. + auto promoted_submatches = hw::SumsOf2(submatches); + // Wider lane sums can be reduces without overflows. + matches += hw::ReduceSum(sum_tag, promoted_submatches); + submatches = hw::Zero(tag); + } + + // For blocks shorter than stride * max_count, lanes in submatches can't + // overflow. + DCHECK_LT(end - block, stride * max_count); + for (; block + stride <= end; block += stride) { + const auto input = hw::LoadU(tag, block); + const auto match = input == mask; + submatches = hw::Sub(submatches, hw::VecFromMask(tag, match)); + if (!hw::AllFalse(tag, match)) { + last_match_block = block; + last_match_vec = match; + } + } + auto promoted_submatches = hw::SumsOf2(submatches); + matches += hw::ReduceSum(sum_tag, promoted_submatches); + + // Handle remaining chars. + // last_match_block already contains the last match position, so use a special + // vector with lane 0 set to extract the last_match_index later. + const auto scalar_last_match_vec = hw::FirstN(tag, 1); + for (SChar c = *block; block < end; c = *(++block)) { + if (c != static_cast(pattern)) continue; + matches++; + last_match_block = block; + last_match_vec = scalar_last_match_vec; + } + + // Store results. + *number_of_matches += matches; + if (last_match_block != nullptr) { + DCHECK(!hw::AllFalse(tag, last_match_vec)); + *last_match_index = static_cast( + last_match_block + hw::FindKnownLastTrue(tag, last_match_vec) - + subject.data()); } }