|
6 | 6 |
|
7 | 7 | #include <string_view>
|
8 | 8 |
|
| 9 | +#include "hwy/highway.h" |
9 | 10 | #include "src/base/strings.h"
|
10 | 11 | #include "src/common/assert-scope.h"
|
11 | 12 | #include "src/common/message-template.h"
|
@@ -1931,10 +1932,30 @@ class FastJsonStringifier {
|
1931 | 1932 |
|
1932 | 1933 | V8_INLINE FastJsonStringifierResult
|
1933 | 1934 | AppendStringChecked(Tagged<String> string);
|
| 1935 | + |
| 1936 | + template <typename SrcChar> |
| 1937 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
| 1938 | + V8_INLINE FastJsonStringifierResult |
| 1939 | + AppendStringChecked(const SrcChar* chars, uint32_t length, |
| 1940 | + const DisallowGarbageCollection& no_gc); |
| 1941 | + |
| 1942 | + template <typename SrcChar> |
| 1943 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
| 1944 | + V8_INLINE FastJsonStringifierResult AppendStringCheckedScalar( |
| 1945 | + const SrcChar* chars, uint32_t length, uint32_t start, |
| 1946 | + uint32_t uncopied_src_index, const DisallowGarbageCollection& no_gc); |
| 1947 | + |
1934 | 1948 | template <typename SrcChar>
|
| 1949 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
1935 | 1950 | V8_INLINE FastJsonStringifierResult
|
1936 |
| - AppendStringChecked(Tagged<String> string, const SrcChar* chars, |
1937 |
| - uint32_t length, const DisallowGarbageCollection& no_gc); |
| 1951 | + AppendStringCheckedSIMD(const SrcChar* chars, uint32_t length, |
| 1952 | + const DisallowGarbageCollection& no_gc); |
| 1953 | + |
| 1954 | + template <typename SrcChar> |
| 1955 | + requires(sizeof(SrcChar) == sizeof(base::uc16)) |
| 1956 | + V8_INLINE FastJsonStringifierResult |
| 1957 | + AppendStringChecked(const SrcChar* chars, uint32_t length, |
| 1958 | + const DisallowGarbageCollection& no_gc); |
1938 | 1959 |
|
1939 | 1960 | static constexpr uint32_t kGlobalInterruptBudget = 200000;
|
1940 | 1961 | static constexpr uint32_t kArrayInterruptLength = 4000;
|
@@ -2092,7 +2113,7 @@ FastJsonStringifierResult FastJsonStringifier<Char>::SerializeObjectKey(
|
2092 | 2113 | const uint32_t length = string->length();
|
2093 | 2114 | Separator(comma);
|
2094 | 2115 | AppendCharacter('"');
|
2095 |
| - AppendStringChecked(string, chars, length, no_gc); |
| 2116 | + AppendStringChecked(chars, length, no_gc); |
2096 | 2117 | AppendCharacter('"');
|
2097 | 2118 | AppendCharacter(':');
|
2098 | 2119 | return SUCCESS;
|
@@ -2121,7 +2142,7 @@ FastJsonStringifierResult FastJsonStringifier<Char>::SerializeString(
|
2121 | 2142 | if (V8_UNLIKELY(result != SUCCESS)) return result;
|
2122 | 2143 | }
|
2123 | 2144 | AppendCharacter('"');
|
2124 |
| - AppendStringChecked(string, chars, length, no_gc); |
| 2145 | + AppendStringChecked(chars, length, no_gc); |
2125 | 2146 | AppendCharacter('"');
|
2126 | 2147 | return SUCCESS;
|
2127 | 2148 | }
|
@@ -2641,84 +2662,155 @@ FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked(
|
2641 | 2662 | Tagged<String> string) {
|
2642 | 2663 | DisallowGarbageCollection no_gc;
|
2643 | 2664 | DCHECK_EQ(string->map()->instance_type(), INTERNALIZED_ONE_BYTE_STRING_TYPE);
|
2644 |
| - return AppendStringChecked(string, |
2645 |
| - Cast<SeqOneByteString>(string)->GetChars(no_gc), |
| 2665 | + return AppendStringChecked(Cast<SeqOneByteString>(string)->GetChars(no_gc), |
2646 | 2666 | string->length(), no_gc);
|
2647 | 2667 | }
|
2648 | 2668 |
|
2649 | 2669 | template <typename Char>
|
2650 | 2670 | template <typename SrcChar>
|
| 2671 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
2651 | 2672 | FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked(
|
2652 |
| - Tagged<String> string, const SrcChar* chars, uint32_t length, |
| 2673 | + const SrcChar* chars, uint32_t length, |
2653 | 2674 | const DisallowGarbageCollection& no_gc) {
|
2654 |
| - int escape_char_idx = -1; |
| 2675 | + constexpr int kUseSimdLengthThreshold = 32; |
| 2676 | + if (length >= kUseSimdLengthThreshold) { |
| 2677 | + return AppendStringCheckedSIMD(chars, length, no_gc); |
| 2678 | + } |
| 2679 | + return AppendStringCheckedScalar(chars, length, 0, 0, no_gc); |
| 2680 | +} |
| 2681 | + |
| 2682 | +template <typename Char> |
| 2683 | +template <typename SrcChar> |
| 2684 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
| 2685 | +FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringCheckedScalar( |
| 2686 | + const SrcChar* chars, uint32_t length, uint32_t start, |
| 2687 | + uint32_t uncopied_src_index, const DisallowGarbageCollection& no_gc) { |
| 2688 | + for (uint32_t i = start; i < length; i++) { |
| 2689 | + SrcChar c = chars[i]; |
| 2690 | + if (V8_LIKELY(DoNotEscape(c))) continue; |
| 2691 | + buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index); |
| 2692 | + AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]); |
| 2693 | + uncopied_src_index = i + 1; |
| 2694 | + } |
| 2695 | + if (uncopied_src_index < length) { |
| 2696 | + buffer_.Append(chars + uncopied_src_index, length - uncopied_src_index); |
| 2697 | + } |
| 2698 | + return SUCCESS; |
| 2699 | +} |
| 2700 | + |
| 2701 | +template <typename Char> |
| 2702 | +template <typename SrcChar> |
| 2703 | + requires(sizeof(SrcChar) == sizeof(uint8_t)) |
| 2704 | +FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringCheckedSIMD( |
| 2705 | + const SrcChar* chars, uint32_t length, |
| 2706 | + const DisallowGarbageCollection& no_gc) { |
| 2707 | + namespace hw = hwy::HWY_NAMESPACE; |
| 2708 | + |
| 2709 | + uint32_t uncopied_src_index = 0; // Index of first char not copied yet. |
| 2710 | + const SrcChar* block = chars; |
| 2711 | + const SrcChar* end = chars + length; |
| 2712 | + hw::FixedTag<SrcChar, 16> tag; |
| 2713 | + static int stride = hw::Lanes(tag); |
| 2714 | + |
| 2715 | + const auto mask_0x20 = hw::Set(tag, 0x20); |
| 2716 | + const auto mask_0x22 = hw::Set(tag, 0x22); |
| 2717 | + const auto mask_0x5c = hw::Set(tag, 0x5c); |
| 2718 | + |
| 2719 | + for (; block + (stride - 1) < end; block += stride) { |
| 2720 | + const auto input = hw::LoadU(tag, block); |
| 2721 | + const auto has_lower_than_0x20 = input < mask_0x20; |
| 2722 | + const auto has_0x22 = input == mask_0x22; |
| 2723 | + const auto has_0x5c = input == mask_0x5c; |
| 2724 | + const auto result = hw::Or(hw::Or(has_lower_than_0x20, has_0x22), has_0x5c); |
| 2725 | + |
| 2726 | + // No character that needs escaping found in block. |
| 2727 | + if (V8_LIKELY(hw::AllFalse(tag, result))) continue; |
| 2728 | + |
| 2729 | + size_t index = hw::FindKnownFirstTrue(tag, result); |
| 2730 | + Char found_char = block[index]; |
| 2731 | + const int char_index = |
| 2732 | + static_cast<int>(block - chars) + static_cast<int>(index); |
| 2733 | + const int copy_length = char_index - uncopied_src_index; |
| 2734 | + buffer_.Append(chars + uncopied_src_index, copy_length); |
| 2735 | + AppendCString(&JsonEscapeTable[found_char * kJsonEscapeTableEntrySize]); |
| 2736 | + uncopied_src_index = char_index + 1; |
| 2737 | + // Advance to character after the one that was found to need escaping. |
| 2738 | + // Subtract stride as it will be added again at the beginning of the loop. |
| 2739 | + block += static_cast<int>(index + 1 - stride); |
| 2740 | + } |
| 2741 | + |
| 2742 | + // Handle remaining characters. |
| 2743 | + const uint32_t start_index = static_cast<uint32_t>(block - chars); |
| 2744 | + return AppendStringCheckedScalar(chars, length, start_index, |
| 2745 | + uncopied_src_index, no_gc); |
| 2746 | +} |
| 2747 | + |
| 2748 | +template <typename Char> |
| 2749 | +template <typename SrcChar> |
| 2750 | + requires(sizeof(SrcChar) == sizeof(base::uc16)) |
| 2751 | +FastJsonStringifierResult FastJsonStringifier<Char>::AppendStringChecked( |
| 2752 | + const SrcChar* chars, uint32_t length, |
| 2753 | + const DisallowGarbageCollection& no_gc) { |
| 2754 | + uint32_t uncopied_src_index = 0; // Index of first char not copied yet. |
2655 | 2755 | // TODO(pthier): Add SIMD version.
|
2656 | 2756 | for (uint32_t i = 0; i < length; i++) {
|
2657 | 2757 | SrcChar c = chars[i];
|
2658 | 2758 | if (V8_LIKELY(DoNotEscape(c))) continue;
|
2659 |
| - if constexpr (!is_one_byte) { |
2660 |
| - if (sizeof(SrcChar) != 1 && |
2661 |
| - base::IsInRange(c, static_cast<SrcChar>(0xD800), |
2662 |
| - static_cast<SrcChar>(0xDFFF))) { |
2663 |
| - // The current character is a surrogate. |
2664 |
| - buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1)); |
2665 |
| - |
2666 |
| - char double_to_radix_chars[kDoubleToRadixMaxChars]; |
2667 |
| - base::Vector<char> double_to_radix_buffer = |
2668 |
| - base::ArrayVector(double_to_radix_chars); |
2669 |
| - if (c <= 0xDBFF) { |
2670 |
| - // The current character is a leading surrogate. |
2671 |
| - if (i + 1 < length) { |
2672 |
| - // There is a next character. |
2673 |
| - SrcChar next = chars[i + 1]; |
2674 |
| - if (base::IsInRange(next, static_cast<SrcChar>(0xDC00), |
2675 |
| - static_cast<SrcChar>(0xDFFF))) { |
2676 |
| - // The next character is a trailing surrogate, meaning this is a |
2677 |
| - // surrogate pair. |
2678 |
| - AppendCharacter(c); |
2679 |
| - AppendCharacter(next); |
2680 |
| - i++; |
2681 |
| - } else { |
2682 |
| - // The next character is not a trailing surrogate. Thus, the |
2683 |
| - // current character is a lone leading surrogate. |
2684 |
| - AppendCStringLiteral("\\u"); |
2685 |
| - std::string_view hex = |
2686 |
| - DoubleToRadixStringView(c, 16, double_to_radix_buffer); |
2687 |
| - AppendString(hex); |
2688 |
| - } |
| 2759 | + if (sizeof(SrcChar) != 1 && base::IsInRange(c, static_cast<SrcChar>(0xD800), |
| 2760 | + static_cast<SrcChar>(0xDFFF))) { |
| 2761 | + // The current character is a surrogate. |
| 2762 | + buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index); |
| 2763 | + char double_to_radix_chars[kDoubleToRadixMaxChars]; |
| 2764 | + base::Vector<char> double_to_radix_buffer = |
| 2765 | + base::ArrayVector(double_to_radix_chars); |
| 2766 | + if (c <= 0xDBFF) { |
| 2767 | + // The current character is a leading surrogate. |
| 2768 | + if (i + 1 < length) { |
| 2769 | + // There is a next character. |
| 2770 | + SrcChar next = chars[i + 1]; |
| 2771 | + if (base::IsInRange(next, static_cast<SrcChar>(0xDC00), |
| 2772 | + static_cast<SrcChar>(0xDFFF))) { |
| 2773 | + // The next character is a trailing surrogate, meaning this is a |
| 2774 | + // surrogate pair. |
| 2775 | + AppendCharacter(c); |
| 2776 | + AppendCharacter(next); |
| 2777 | + i++; |
2689 | 2778 | } else {
|
2690 |
| - // There is no next character. Thus, the current character is a lone |
2691 |
| - // leading surrogate. |
| 2779 | + // The next character is not a trailing surrogate. Thus, the |
| 2780 | + // current character is a lone leading surrogate. |
2692 | 2781 | AppendCStringLiteral("\\u");
|
2693 | 2782 | std::string_view hex =
|
2694 | 2783 | DoubleToRadixStringView(c, 16, double_to_radix_buffer);
|
2695 | 2784 | AppendString(hex);
|
2696 | 2785 | }
|
2697 | 2786 | } else {
|
2698 |
| - // The current character is a lone trailing surrogate. (If it had been |
2699 |
| - // preceded by a leading surrogate, we would've ended up in the other |
2700 |
| - // branch earlier on, and the current character would've been handled |
2701 |
| - // as part of the surrogate pair already.) |
| 2787 | + // There is no next character. Thus, the current character is a lone |
| 2788 | + // leading surrogate. |
2702 | 2789 | AppendCStringLiteral("\\u");
|
2703 | 2790 | std::string_view hex =
|
2704 | 2791 | DoubleToRadixStringView(c, 16, double_to_radix_buffer);
|
2705 | 2792 | AppendString(hex);
|
2706 | 2793 | }
|
2707 |
| - escape_char_idx = i; |
2708 | 2794 | } else {
|
2709 |
| - buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1)); |
2710 |
| - DCHECK_LT(c, 0x60); |
2711 |
| - AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]); |
2712 |
| - escape_char_idx = i; |
| 2795 | + // The current character is a lone trailing surrogate. (If it had been |
| 2796 | + // preceded by a leading surrogate, we would've ended up in the other |
| 2797 | + // branch earlier on, and the current character would've been handled |
| 2798 | + // as part of the surrogate pair already.) |
| 2799 | + AppendCStringLiteral("\\u"); |
| 2800 | + std::string_view hex = |
| 2801 | + DoubleToRadixStringView(c, 16, double_to_radix_buffer); |
| 2802 | + AppendString(hex); |
2713 | 2803 | }
|
| 2804 | + uncopied_src_index = i + 1; |
2714 | 2805 | } else {
|
2715 |
| - buffer_.Append(chars + escape_char_idx + 1, i - (escape_char_idx + 1)); |
| 2806 | + buffer_.Append(chars + uncopied_src_index, i - uncopied_src_index); |
| 2807 | + DCHECK_LT(c, 0x60); |
2716 | 2808 | AppendCString(&JsonEscapeTable[c * kJsonEscapeTableEntrySize]);
|
2717 |
| - escape_char_idx = i; |
| 2809 | + uncopied_src_index = i + 1; |
2718 | 2810 | }
|
2719 | 2811 | }
|
2720 |
| - if (static_cast<uint32_t>(escape_char_idx + 1) < length) { |
2721 |
| - buffer_.Append(chars + escape_char_idx + 1, length - (escape_char_idx + 1)); |
| 2812 | + if (uncopied_src_index < length) { |
| 2813 | + buffer_.Append(chars + uncopied_src_index, length - uncopied_src_index); |
2722 | 2814 | }
|
2723 | 2815 | return SUCCESS;
|
2724 | 2816 | }
|
|
0 commit comments