Skip to content

Commit ec7a1d8

Browse files
committed
Fix character skipping after a surrogate pair
In a string the first character following a surrogate pair is skipped by the lexer, but the rest of the string is parsed as usual.
1 parent 3948630 commit ec7a1d8

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

src/json.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -6856,8 +6856,8 @@ class basic_json
68566856
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
68576857
(i + 7), 4).c_str(), nullptr, 16);
68586858
result += to_unicode(codepoint, codepoint2);
6859-
// skip the next 11 characters (xxxx\uyyyy)
6860-
i += 11;
6859+
// skip the next 10 characters (xxxx\uyyyy)
6860+
i += 10;
68616861
}
68626862
else
68636863
{

src/json.hpp.re2c

+2-2
Original file line numberDiff line numberDiff line change
@@ -6162,8 +6162,8 @@ class basic_json
61626162
auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
61636163
(i + 7), 4).c_str(), nullptr, 16);
61646164
result += to_unicode(codepoint, codepoint2);
6165-
// skip the next 11 characters (xxxx\uyyyy)
6166-
i += 11;
6165+
// skip the next 10 characters (xxxx\uyyyy)
6166+
i += 10;
61676167
}
61686168
else
61696169
{

test/unit.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -10205,4 +10205,9 @@ TEST_CASE("regression tests")
1020510205
j["string"] = bytes;
1020610206
CHECK(j["string"] == "\u0007\u0007");
1020710207
}
10208+
10209+
SECTION("character following a surrogate pair is skipped")
10210+
{
10211+
CHECK(json::parse("\"\\ud80c\\udc60abc\"").get<json::string_t>() == u8"\U00013060abc");
10212+
}
1020810213
}

0 commit comments

Comments
 (0)