|
20 | 20 | #include <catch2/catch_message.hpp>
|
21 | 21 | #include <catch2/catch_test_macros.hpp>
|
22 | 22 |
|
| 23 | +#include <iostream> |
23 | 24 | #include <string_view>
|
24 | 25 | #include <variant>
|
25 | 26 |
|
| 27 | +#if 0 || defined(LIBUNICODE_TRACE) |
| 28 | + #include <format> |
| 29 | + #include <iostream> |
| 30 | + |
| 31 | + #define TRACE(...) std::cout << std::format(__VA_ARGS__) |
| 32 | +#else |
| 33 | + #define TRACE(...) ((void) 0) |
| 34 | +#endif |
| 35 | + |
26 | 36 | using namespace std::string_view_literals;
|
27 | 37 | using namespace std::string_literals;
|
28 | 38 | using std::pair;
|
@@ -55,6 +65,31 @@ std::ostream& operator<<(std::ostream& os, expectation const& e)
|
55 | 65 | } // namespace std
|
56 | 66 | // }}}
|
57 | 67 |
|
| 68 | +namespace fmt |
| 69 | +{ |
| 70 | + |
| 71 | +template <> |
| 72 | +struct formatter<expectation>: formatter<std::string_view> |
| 73 | +{ |
| 74 | + template <typename FormatContext> |
| 75 | + auto format(expectation const& e, FormatContext& ctx) const |
| 76 | + { |
| 77 | + return format_to(ctx.out(), "{{ offset: {}, size: {}, width: {} }}", e.offset, e.size, e.width); |
| 78 | + } |
| 79 | +}; |
| 80 | + |
| 81 | +template <> |
| 82 | +struct formatter<std::pair<unicode::StopCondition, unsigned>>: formatter<std::string_view> |
| 83 | +{ |
| 84 | + template <typename FormatContext> |
| 85 | + auto format(std::pair<unicode::StopCondition, unsigned> const& v, FormatContext& ctx) const |
| 86 | + { |
| 87 | + return format_to(ctx.out(), "{{{}, {}}}", v.first, v.second); |
| 88 | + } |
| 89 | +}; |
| 90 | + |
| 91 | +} // namespace fmt |
| 92 | + |
58 | 93 | // {{{ helpers
|
59 | 94 | namespace
|
60 | 95 | {
|
@@ -120,12 +155,34 @@ struct complex_unicode_sequence
|
120 | 155 | return os << "{ value: \"" << e(seq.value) << "\", width: " << seq.width << " }";
|
121 | 156 | }
|
122 | 157 |
|
| 158 | +using Record = std::variant<invalid_sequence, ascii_sequence, complex_unicode_sequence>; |
| 159 | + |
123 | 160 | } // namespace
|
124 | 161 |
|
125 |
| -namespace |
| 162 | +namespace fmt |
| 163 | +{ |
| 164 | +template <> |
| 165 | +struct formatter<Record>: formatter<std::string_view> |
126 | 166 | {
|
| 167 | + template <typename FormatContext> |
| 168 | + auto format(Record const& r, FormatContext& ctx) const |
| 169 | + { |
| 170 | + if (std::holds_alternative<invalid_sequence>(r)) |
| 171 | + return fmt::format_to(ctx.out(), "invalid_sequence {{ value: \"{}\" }}", std::get<invalid_sequence>(r).value); |
| 172 | + else if (std::holds_alternative<ascii_sequence>(r)) |
| 173 | + return fmt::format_to(ctx.out(), "ascii_sequence {{ value: \"{}\" }}", std::get<ascii_sequence>(r).value); |
| 174 | + else |
| 175 | + return fmt::format_to(ctx.out(), |
| 176 | + "complex_unicode_sequence {{ value: \"{}\", width: {} }}", |
| 177 | + std::get<complex_unicode_sequence>(r).value, |
| 178 | + std::get<complex_unicode_sequence>(r).width); |
| 179 | + } |
| 180 | +}; |
127 | 181 |
|
128 |
| -using Record = std::variant<invalid_sequence, ascii_sequence, complex_unicode_sequence>; |
| 182 | +} // namespace fmt |
| 183 | + |
| 184 | +namespace |
| 185 | +{ |
129 | 186 |
|
130 | 187 | auto constexpr FamilyEmoji = U"\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466"sv;
|
131 | 188 | auto constexpr SmileyEmoji = U"\U0001F600"sv;
|
@@ -649,3 +706,90 @@ TEST_CASE("grapheme_line_segmenter.complex.sliced_calls")
|
649 | 706 | CHECK(result2.stop_condition == StopCondition::UnexpectedInput); // control character \033
|
650 | 707 | REQUIRE(e(result2.text) == e(u8(SmileyEmoji)));
|
651 | 708 | }
|
| 709 | + |
| 710 | +TEST_CASE("grapheme_utf8.0") |
| 711 | +{ |
| 712 | + auto constexpr text = "\xC3\xB6"sv; // 'ö' |
| 713 | + |
| 714 | + const auto* input = text.data(); |
| 715 | + const auto* const end = text.data() + text.size(); |
| 716 | + |
| 717 | + auto recorder = event_recorder { "single_utf8" }; |
| 718 | + auto segmenter = grapheme_line_segmenter { recorder, ""sv }; |
| 719 | + |
| 720 | + auto const chunk = std::string_view(input, end); |
| 721 | + TRACE("Processing {}...{} ({})\n", (void*) input, (void*) end, std::distance(input, end)); |
| 722 | + segmenter.reset(chunk); |
| 723 | + |
| 724 | + auto const result = segmenter.process(10); |
| 725 | + TRACE("result: [text: \"{}\", width: {}, stop: {}]\n", result.text, result.width, [](auto val) { |
| 726 | + switch (val) |
| 727 | + { |
| 728 | + case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; |
| 729 | + case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; |
| 730 | + case unicode::StopCondition::EndOfInput: return "EndOfInput"; |
| 731 | + } |
| 732 | + return "Unknown"; |
| 733 | + }(result.stop_condition)); |
| 734 | + |
| 735 | + CHECK(result.text == text); |
| 736 | + CHECK(result.width == 0); |
| 737 | +} |
| 738 | + |
| 739 | +TEST_CASE("grapheme_utf8.1") |
| 740 | +{ |
| 741 | + auto constexpr text = "\xC3\xB6 "sv; // 'ö ' |
| 742 | + |
| 743 | + const auto* input = text.data(); |
| 744 | + const auto* const end = text.data() + text.size(); |
| 745 | + |
| 746 | + auto recorder = event_recorder { "single_utf8" }; |
| 747 | + auto segmenter = grapheme_line_segmenter { recorder, ""sv }; |
| 748 | + |
| 749 | + auto const chunk = std::string_view(input, end); |
| 750 | + TRACE("Processing {}...{} ({})\n", (void*) input, (void*) end, std::distance(input, end)); |
| 751 | + segmenter.reset(chunk); |
| 752 | + |
| 753 | + auto const result = segmenter.process(10); |
| 754 | + TRACE("result: [text: \"{}\", width: {}, stop: {}]\n", result.text, result.width, [](auto val) { |
| 755 | + switch (val) |
| 756 | + { |
| 757 | + case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; |
| 758 | + case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; |
| 759 | + case unicode::StopCondition::EndOfInput: return "EndOfInput"; |
| 760 | + } |
| 761 | + return "Unknown"; |
| 762 | + }(result.stop_condition)); |
| 763 | + |
| 764 | + CHECK(result.text == text); |
| 765 | + CHECK(result.width == 2); |
| 766 | +} |
| 767 | + |
| 768 | +TEST_CASE("grapheme_utf8.2") |
| 769 | +{ |
| 770 | + auto constexpr text = "a\xC3\xB6a"sv; // 'aöa' |
| 771 | + |
| 772 | + const auto* input = text.data(); |
| 773 | + const auto* const end = text.data() + text.size(); |
| 774 | + |
| 775 | + auto recorder = event_recorder { "single_utf8" }; |
| 776 | + auto segmenter = grapheme_line_segmenter { recorder, ""sv }; |
| 777 | + |
| 778 | + auto const chunk = std::string_view(input, end); |
| 779 | + TRACE("Processing {}...{} ({})\n", (void*) input, (void*) end, std::distance(input, end)); |
| 780 | + segmenter.reset(chunk); |
| 781 | + |
| 782 | + auto const result = segmenter.process(10); |
| 783 | + TRACE("result: [text: \"{}\", width: {}, stop: {}]\n", result.text, result.width, [](auto val) { |
| 784 | + switch (val) |
| 785 | + { |
| 786 | + case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; |
| 787 | + case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; |
| 788 | + case unicode::StopCondition::EndOfInput: return "EndOfInput"; |
| 789 | + } |
| 790 | + return "Unknown"; |
| 791 | + }(result.stop_condition)); |
| 792 | + |
| 793 | + CHECK(result.text == text); |
| 794 | + CHECK(result.width == 3); |
| 795 | +} |
0 commit comments