forked from pool/nodejs-electron
OBS-URL: https://build.opensuse.org/package/show/devel:languages:nodejs/nodejs-electron?expand=0&rev=220
151 lines
6.1 KiB
Diff
151 lines
6.1 KiB
Diff
From 1cb68a06f8c0ea3ad29cab6efe5ac673817596ce Mon Sep 17 00:00:00 2001
|
|
From: pthier <pthier@chromium.org>
|
|
Date: Tue, 6 May 2025 10:28:03 +0200
|
|
Subject: [PATCH] Reland "[regexp] Simdify global atom match with single
|
|
character pattern"
|
|
|
|
This is a reland of commit 36f07e9a04484dd4b97713f8e821d3b83ade8f53
|
|
|
|
Changes since revert: Accumulate number of matches after a cache hit
|
|
instead of overwriting them.
|
|
|
|
Original change's description:
|
|
> [regexp] Simdify global atom match with single character pattern
|
|
>
|
|
> Use highway to find matching characters for RegExp with a single
|
|
> character atom pattern.
|
|
>
|
|
> Bug: 413411337
|
|
> Change-Id: I9bf686aca2da37025613a9227eb0ec69176a676f
|
|
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6487695
|
|
> Reviewed-by: Jakob Linke <jgruber@chromium.org>
|
|
> Commit-Queue: Patrick Thier <pthier@chromium.org>
|
|
> Cr-Commit-Position: refs/heads/main@{#100006}
|
|
|
|
Fixed: 414857029
|
|
Bug: 413411337
|
|
Change-Id: I3ebd72f3b91ce5e7b603e43540cd4e10090c1868
|
|
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6513551
|
|
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
|
|
Commit-Queue: Patrick Thier <pthier@chromium.org>
|
|
Cr-Commit-Position: refs/heads/main@{#100088}
|
|
---
|
|
src/runtime/runtime-regexp.cc | 95 ++++++++++++++++++++++++++++++++---
|
|
test/mjsunit/regexp-global.js | 24 +++++++++
|
|
2 files changed, 112 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
|
|
index 5689cdc8d1db..e5cee437d1c3 100644
|
|
--- a/v8/src/runtime/runtime-regexp.cc
|
|
+++ b/v8/src/runtime/runtime-regexp.cc
|
|
@@ -3,7 +3,9 @@
|
|
// found in the LICENSE file.
|
|
|
|
#include <functional>
|
|
+#include <type_traits>
|
|
|
|
+#include "hwy/highway.h"
|
|
#include "src/base/small-vector.h"
|
|
#include "src/base/strings.h"
|
|
#include "src/common/message-template.h"
|
|
@@ -2164,13 +2166,92 @@ inline void RegExpMatchGlobalAtom_OneCharPattern(
|
|
Isolate* isolate, base::Vector<const SChar> subject, const PChar pattern,
|
|
int start_index, int* number_of_matches, int* last_match_index,
|
|
const DisallowGarbageCollection& no_gc) {
|
|
- for (int i = start_index; i < subject.length(); i++) {
|
|
- // Subtle: the valid variants are {SChar,PChar} in:
|
|
- // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case,
|
|
- // we cast the uint8_t pattern to uc16 for the comparison.
|
|
- if (subject[i] != static_cast<const SChar>(pattern)) continue;
|
|
- (*number_of_matches)++;
|
|
- (*last_match_index) = i;
|
|
+ static_assert(std::is_unsigned_v<SChar>);
|
|
+ static_assert(std::is_unsigned_v<PChar>);
|
|
+ // We can utilize SIMD to check multiple characters at once.
|
|
+ // Since the pattern is a single char, we create a mask setting each lane in
|
|
+ // the vector to the pattern char.
|
|
+ // Since reductions from a vector to a general purpose register (i.e.
|
|
+ // ReduceSum in this algorithm) are expensive, we keep a count for each lane
|
|
+ // in a vector until the count could potentially overflow and only reduce to
|
|
+ // a general purpose register then. I.e. if SChar is uint8_t, we have a
|
|
+ // 16xuint8_t vector to count matches, which we reduce to an int every 255
|
|
+ // blocks.
|
|
+ namespace hw = hwy::HWY_NAMESPACE;
|
|
+ hw::ScalableTag<SChar> tag;
|
|
+ // We need a wider tag to avoid overflows on lanes when summing up submatches.
|
|
+ using WidenedTag = hw::RepartitionToWide<decltype(tag)>;
|
|
+ WidenedTag sum_tag;
|
|
+ static constexpr size_t stride = hw::Lanes(tag);
|
|
+ // Subtle: the valid variants are {SChar,PChar} in:
|
|
+ // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case,
|
|
+ // we cast the uint8_t pattern to uc16 for the comparison.
|
|
+ const auto mask = hw::Set(tag, static_cast<const SChar>(pattern));
|
|
+
|
|
+ int matches = 0;
|
|
+ auto submatches = hw::Zero(tag);
|
|
+ const SChar* last_match_block = nullptr;
|
|
+ hw::Mask<decltype(tag)> last_match_vec;
|
|
+
|
|
+ const SChar* block = subject.data() + start_index;
|
|
+ const SChar* end = subject.data() + subject.length();
|
|
+
|
|
+ // ReduceSum is expensive, so we gather matches into a vector. max_count is
|
|
+ // the maximum number of matches we can count in the vector before it
|
|
+ // overflows.
|
|
+ int max_count = std::numeric_limits<SChar>::max();
|
|
+ while (block + stride * max_count <= end) {
|
|
+ for (int i = 0; i < max_count; i++, block += stride) {
|
|
+ const auto input = hw::LoadU(tag, block);
|
|
+ const auto match = input == mask;
|
|
+ // Lanes with matches have all bits set, so we subtract to increase the
|
|
+ // count by 1.
|
|
+ submatches = hw::Sub(submatches, hw::VecFromMask(tag, match));
|
|
+ if (!hw::AllFalse(tag, match)) {
|
|
+ last_match_block = block;
|
|
+ last_match_vec = match;
|
|
+ }
|
|
+ }
|
|
+ // SumsOf2 promotes the sum of 2 consecutive lanes into a wider lane.
|
|
+ auto promoted_submatches = hw::SumsOf2(submatches);
|
|
+ // Wider lane sums can be reduces without overflows.
|
|
+ matches += hw::ReduceSum(sum_tag, promoted_submatches);
|
|
+ submatches = hw::Zero(tag);
|
|
+ }
|
|
+
|
|
+ // For blocks shorter than stride * max_count, lanes in submatches can't
|
|
+ // overflow.
|
|
+ DCHECK_LT(end - block, stride * max_count);
|
|
+ for (; block + stride <= end; block += stride) {
|
|
+ const auto input = hw::LoadU(tag, block);
|
|
+ const auto match = input == mask;
|
|
+ submatches = hw::Sub(submatches, hw::VecFromMask(tag, match));
|
|
+ if (!hw::AllFalse(tag, match)) {
|
|
+ last_match_block = block;
|
|
+ last_match_vec = match;
|
|
+ }
|
|
+ }
|
|
+ auto promoted_submatches = hw::SumsOf2(submatches);
|
|
+ matches += hw::ReduceSum(sum_tag, promoted_submatches);
|
|
+
|
|
+ // Handle remaining chars.
|
|
+ // last_match_block already contains the last match position, so use a special
|
|
+ // vector with lane 0 set to extract the last_match_index later.
|
|
+ const auto scalar_last_match_vec = hw::FirstN(tag, 1);
|
|
+ for (SChar c = *block; block < end; c = *(++block)) {
|
|
+ if (c != static_cast<const SChar>(pattern)) continue;
|
|
+ matches++;
|
|
+ last_match_block = block;
|
|
+ last_match_vec = scalar_last_match_vec;
|
|
+ }
|
|
+
|
|
+ // Store results.
|
|
+ *number_of_matches += matches;
|
|
+ if (last_match_block != nullptr) {
|
|
+ DCHECK(!hw::AllFalse(tag, last_match_vec));
|
|
+ *last_match_index = static_cast<int>(
|
|
+ last_match_block + hw::FindKnownLastTrue(tag, last_match_vec) -
|
|
+ subject.data());
|
|
}
|
|
}
|
|
|