Skip to content

Commit e3204d5

Browse files
pthierV8 LUCI CQ
pthier
authored and
V8 LUCI CQ
committed
[json] Simdify scanning for characters requiring escaping in Stringifier
Split AppendStringChecked() into separate versions for 1-byte and 2-byte and use SIMD to vectorize the 1-byte version using highway. Bug: 380044242, 349594804 Change-Id: I69b3e574d9f4bcd92222e415605eb73829151e89 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6039761 Reviewed-by: Anton Bikineev <[email protected]> Reviewed-by: Igor Sheludko <[email protected]> Commit-Queue: Patrick Thier <[email protected]> Cr-Commit-Position: refs/heads/main@{#98173}
1 parent fa6be46 commit e3204d5

File tree

1 file changed

+145
-53
lines changed

1 file changed

+145
-53
lines changed

src/json/json-stringifier.cc

+145-53
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <string_view>
88

9+
#include "hwy/highway.h"
910
#include "src/base/strings.h"
1011
#include "src/common/assert-scope.h"
1112
#include "src/common/message-template.h"
@@ -1931,10 +1932,30 @@ class FastJsonStringifier {
19311932

19321933
V8_INLINE FastJsonStringifierResult
19331934
AppendStringChecked(Tagged<String> string);
1935+
1936+
template <typename SrcChar>
1937+
requires(sizeof(SrcChar) == sizeof(uint8_t))
1938+
V8_INLINE FastJsonStringifierResult
1939+
AppendStringChecked(const SrcChar* chars, uint32_t length,
1940+
const DisallowGarbageCollection& no_gc);
1941+
1942+
template <typename SrcChar>
1943+
requires(sizeof(SrcChar) == sizeof(uint8_t))
1944+
V8_INLINE FastJsonStringifierResult AppendStringCheckedScalar(
1945+
const SrcChar* chars, uint32_t length, uint32_t start,
1946+
uint32_t uncopied_src_index, const DisallowGarbageCollection& no_gc);
1947+
19341948
template <typename SrcChar>
1949+
requires(sizeof(SrcChar) == sizeof(uint8_t))
19351950
V8_INLINE FastJsonStringifierResult
1936-
AppendStringChecked(Tagged<String> string, const SrcChar* chars,
1937-
uint32_t length, const DisallowGarbageCollection& no_gc);
1951+
AppendStringCheckedSIMD(const SrcChar* chars, uint32_t length,
1952+
const DisallowGarbageCollection& no_gc);
1953+
1954+
template <typename SrcChar>
1955+
requires(sizeof(SrcChar) == sizeof(base::uc16))
1956+
V8_INLINE FastJsonStringifierResult
1957+
AppendStringChecked(const SrcChar* chars, uint32_t length,
1958+
const DisallowGarbageCollection& no_gc);
19381959

19391960
static constexpr uint32_t kGlobalInterruptBudget = 200000;
19401961
static constexpr uint32_t kArrayInterruptLength = 4000;
@@ -2092,7 +2113,7 @@ FastJsonStringifierResult FastJsonStringifier<Char>::SerializeObjectKey(
20922113
const uint32_t length = string->length();
20932114
Separator(comma);
20942115
AppendCharacter('"');
2095-
AppendStringChecked(string, chars, length, no_gc);
2116+
AppendStringChecked(chars, length, no_gc);
20962117
AppendCharacter('"');
20972118
AppendCharacter(':');
20982119
return SUCCESS;
@@ -2121,7 +2142,7 @@ FastJsonStringifierResult FastJsonStringifier<Char>::SerializeString(
21212142
if (V8_UNLIKELY(result != SUCCESS)) return result;
21222143
}
21232144
AppendCharacter('"');
2124-
AppendStringChecked(string, chars, length, no_gc);
2145+
AppendStringChecked(chars, length, no_gc);
21252146
AppendCharacter('"');
21262147
return SUCCESS;
21272148
}
@@ -2641,84 +2662,155 @@ FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked(
26412662
Tagged<String> string) {
26422663
DisallowGarbageCollection no_gc;
26432664
DCHECK_EQ(string->map()->instance_type(), INTERNALIZED_ONE_BYTE_STRING_TYPE);
2644-
return AppendStringChecked(string,
2645-
Cast<SeqOneByteString>(string)->GetChars(no_gc),
2665+
return AppendStringChecked(Cast<SeqOneByteString>(string)->GetChars(no_gc),
26462666
string->length(), no_gc);
26472667
}
26482668

26492669
template <typename Char>
26502670
template <typename SrcChar>
2671+
requires(sizeof(SrcChar) == sizeof(uint8_t))
26512672
FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked(
2652-
Tagged<String> string, const SrcChar* chars, uint32_t length,
2673+
const SrcChar* chars, uint32_t length,
26532674
const DisallowGarbageCollection& no_gc) {
2654-
int escape_char_idx = -1;
2675+
constexpr int kUseSimdLengthThreshold = 32;
2676+
if (length >= kUseSimdLengthThreshold) {
2677+
return AppendStringCheckedSIMD(chars, length, no_gc);
2678+
}
2679+
return AppendStringCheckedScalar(chars, length, 0, 0, no_gc);
2680+
}
2681+
2682+
template <typename Char>
2683+
template <typename SrcChar>
2684+
requires(sizeof(SrcChar) == sizeof(uint8_t))
2685+
FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringCheckedScalar(
2686+
const SrcChar* chars, uint32_t length, uint32_t start,
2687+
uint32_t uncopied_src_index, const DisallowGarbageCollection& no_gc) {
2688+
for (uint32_t i = start; i < length; i++) {
2689+
SrcChar c = chars[i];
2690+
if (V8_LIKELY(DoNotEscape(c))) continue;
2691+
buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index);
2692+
AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]);
2693+
uncopied_src_index = i + 1;
2694+
}
2695+
if (uncopied_src_index < length) {
2696+
buffer_.Append(chars + uncopied_src_index, length - uncopied_src_index);
2697+
}
2698+
return SUCCESS;
2699+
}
2700+
2701+
template <typename Char>
2702+
template <typename SrcChar>
2703+
requires(sizeof(SrcChar) == sizeof(uint8_t))
2704+
FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringCheckedSIMD(
2705+
const SrcChar* chars, uint32_t length,
2706+
const DisallowGarbageCollection& no_gc) {
2707+
namespace hw = hwy::HWY_NAMESPACE;
2708+
2709+
uint32_t uncopied_src_index = 0; // Index of first char not copied yet.
2710+
const SrcChar* block = chars;
2711+
const SrcChar* end = chars + length;
2712+
hw::FixedTag<SrcChar, 16> tag;
2713+
static int stride = hw::Lanes(tag);
2714+
2715+
const auto mask_0x20 = hw::Set(tag, 0x20);
2716+
const auto mask_0x22 = hw::Set(tag, 0x22);
2717+
const auto mask_0x5c = hw::Set(tag, 0x5c);
2718+
2719+
for (; block + (stride - 1) < end; block += stride) {
2720+
const auto input = hw::LoadU(tag, block);
2721+
const auto has_lower_than_0x20 = input < mask_0x20;
2722+
const auto has_0x22 = input == mask_0x22;
2723+
const auto has_0x5c = input == mask_0x5c;
2724+
const auto result = hw::Or(hw::Or(has_lower_than_0x20, has_0x22), has_0x5c);
2725+
2726+
// No character that needs escaping found in block.
2727+
if (V8_LIKELY(hw::AllFalse(tag, result))) continue;
2728+
2729+
size_t index = hw::FindKnownFirstTrue(tag, result);
2730+
Char found_char = block[index];
2731+
const int char_index =
2732+
static_cast<int>(block - chars) + static_cast<int>(index);
2733+
const int copy_length = char_index - uncopied_src_index;
2734+
buffer_.Append(chars + uncopied_src_index, copy_length);
2735+
AppendCString(&JsonEscapeTable[found_char * kJsonEscapeTableEntrySize]);
2736+
uncopied_src_index = char_index + 1;
2737+
// Advance to character after the one that was found to need escaping.
2738+
// Subtract stride as it will be added again at the beginning of the loop.
2739+
block += static_cast<int>(index + 1 - stride);
2740+
}
2741+
2742+
// Handle remaining characters.
2743+
const uint32_t start_index = static_cast<uint32_t>(block - chars);
2744+
return AppendStringCheckedScalar(chars, length, start_index,
2745+
uncopied_src_index, no_gc);
2746+
}
2747+
2748+
template <typename Char>
2749+
template <typename SrcChar>
2750+
requires(sizeof(SrcChar) == sizeof(base::uc16))
2751+
FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked(
2752+
const SrcChar* chars, uint32_t length,
2753+
const DisallowGarbageCollection& no_gc) {
2754+
uint32_t uncopied_src_index = 0; // Index of first char not copied yet.
26552755
// TODO(pthier): Add SIMD version.
26562756
for (uint32_t i = 0; i < length; i++) {
26572757
SrcChar c = chars[i];
26582758
if (V8_LIKELY(DoNotEscape(c))) continue;
2659-
if constexpr (!is_one_byte) {
2660-
if (sizeof(SrcChar) != 1 &&
2661-
base::IsInRange(c, static_cast<SrcChar>(0xD800),
2662-
static_cast<SrcChar>(0xDFFF))) {
2663-
// The current character is a surrogate.
2664-
buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1));
2665-
2666-
char double_to_radix_chars[kDoubleToRadixMaxChars];
2667-
base::Vector<char> double_to_radix_buffer =
2668-
base::ArrayVector(double_to_radix_chars);
2669-
if (c <= 0xDBFF) {
2670-
// The current character is a leading surrogate.
2671-
if (i + 1 < length) {
2672-
// There is a next character.
2673-
SrcChar next = chars[i + 1];
2674-
if (base::IsInRange(next, static_cast<SrcChar>(0xDC00),
2675-
static_cast<SrcChar>(0xDFFF))) {
2676-
// The next character is a trailing surrogate, meaning this is a
2677-
// surrogate pair.
2678-
AppendCharacter(c);
2679-
AppendCharacter(next);
2680-
i++;
2681-
} else {
2682-
// The next character is not a trailing surrogate. Thus, the
2683-
// current character is a lone leading surrogate.
2684-
AppendCStringLiteral("\\u");
2685-
std::string_view hex =
2686-
DoubleToRadixStringView(c, 16, double_to_radix_buffer);
2687-
AppendString(hex);
2688-
}
2759+
if (sizeof(SrcChar) != 1 && base::IsInRange(c, static_cast<SrcChar>(0xD800),
2760+
static_cast<SrcChar>(0xDFFF))) {
2761+
// The current character is a surrogate.
2762+
buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index);
2763+
char double_to_radix_chars[kDoubleToRadixMaxChars];
2764+
base::Vector<char> double_to_radix_buffer =
2765+
base::ArrayVector(double_to_radix_chars);
2766+
if (c <= 0xDBFF) {
2767+
// The current character is a leading surrogate.
2768+
if (i + 1 < length) {
2769+
// There is a next character.
2770+
SrcChar next = chars[i + 1];
2771+
if (base::IsInRange(next, static_cast<SrcChar>(0xDC00),
2772+
static_cast<SrcChar>(0xDFFF))) {
2773+
// The next character is a trailing surrogate, meaning this is a
2774+
// surrogate pair.
2775+
AppendCharacter(c);
2776+
AppendCharacter(next);
2777+
i++;
26892778
} else {
2690-
// There is no next character. Thus, the current character is a lone
2691-
// leading surrogate.
2779+
// The next character is not a trailing surrogate. Thus, the
2780+
// current character is a lone leading surrogate.
26922781
AppendCStringLiteral("\\u");
26932782
std::string_view hex =
26942783
DoubleToRadixStringView(c, 16, double_to_radix_buffer);
26952784
AppendString(hex);
26962785
}
26972786
} else {
2698-
// The current character is a lone trailing surrogate. (If it had been
2699-
// preceded by a leading surrogate, we would've ended up in the other
2700-
// branch earlier on, and the current character would've been handled
2701-
// as part of the surrogate pair already.)
2787+
// There is no next character. Thus, the current character is a lone
2788+
// leading surrogate.
27022789
AppendCStringLiteral("\\u");
27032790
std::string_view hex =
27042791
DoubleToRadixStringView(c, 16, double_to_radix_buffer);
27052792
AppendString(hex);
27062793
}
2707-
escape_char_idx = i;
27082794
} else {
2709-
buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1));
2710-
DCHECK_LT(c, 0x60);
2711-
AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]);
2712-
escape_char_idx = i;
2795+
// The current character is a lone trailing surrogate. (If it had been
2796+
// preceded by a leading surrogate, we would've ended up in the other
2797+
// branch earlier on, and the current character would've been handled
2798+
// as part of the surrogate pair already.)
2799+
AppendCStringLiteral("\\u");
2800+
std::string_view hex =
2801+
DoubleToRadixStringView(c, 16, double_to_radix_buffer);
2802+
AppendString(hex);
27132803
}
2804+
uncopied_src_index = i + 1;
27142805
} else {
2715-
buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1));
2806+
buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index);
2807+
DCHECK_LT(c, 0x60);
27162808
AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]);
2717-
escape_char_idx = i;
2809+
uncopied_src_index = i + 1;
27182810
}
27192811
}
2720-
if (static_cast<uint32_t>(escape_char_idx + 1) < length) {
2721-
buffer_.Append(chars + escape_char_idx + 1, length - (escape_char_idx + 1));
2812+
if (uncopied_src_index < length) {
2813+
buffer_.Append(chars + uncopied_src_index, length - uncopied_src_index);
27222814
}
27232815
return SUCCESS;
27242816
}

0 commit comments

Comments
 (0)