contour-terminal
diff --git a/‎src/libunicode/CMakeLists.txt
+3 b/‎src/libunicode/CMakeLists.txt
+3
diff --git a/‎src/libunicode/grapheme_line_segmenter.cpp
+313 b/‎src/libunicode/grapheme_line_segmenter.cpp
+313
@@ -86,6 +86,7 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
     capi.cpp
     codepoint_properties.cpp
     emoji_segmenter.cpp
+    grapheme_line_segmenter.cpp
     grapheme_segmenter.cpp
     scan.cpp
     script_segmenter.cpp
@@ -103,6 +104,7 @@ set(public_headers
     codepoint_properties.h
     convert.h
     emoji_segmenter.h
+    grapheme_line_segmenter.h
     grapheme_segmenter.h
     intrinsics.h
     multistage_table_view.h
@@ -186,6 +188,7 @@ if(LIBUNICODE_TESTING)
         capi_test.cpp
         convert_test.cpp
         emoji_segmenter_test.cpp
+        grapheme_line_segmenter_test.cpp
         grapheme_segmenter_test.cpp
         run_segmenter_test.cpp
         scan_test.cpp
 
@@ -0,0 +1,313 @@
+/**
+ * This file is part of the "libunicode" project
+ *   Copyright (c) 2023 Christian Parpart <[email protected]>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <libunicode/grapheme_line_segmenter.h>
+#include <libunicode/grapheme_segmenter.h>
+#include <libunicode/intrinsics.h>
+#include <libunicode/utf8.h>
+#include <libunicode/width.h>
+
+#include <algorithm>
+#include <cassert>
+
+namespace unicode
+{
+
+namespace
+{
+    [[maybe_unused]] int countTrailingZeroBits(unsigned int value) noexcept
+    {
+#if defined(_WIN32)
+        return _tzcnt_u32(value);
+#else
+        return __builtin_ctz(value);
+#endif
+    }
+
+    constexpr bool is_control(char ch) noexcept
+    {
+        return static_cast<uint8_t>(ch) < 0x20;
+    }
+
+    // Tests if given UTF-8 byte is part of a complex Unicode codepoint, that is, a value greater than U+7E.
+    constexpr bool is_complex(char ch) noexcept
+    {
+        return static_cast<uint8_t>(ch) & 0x80;
+    }
+
+    // Tests if given UTF-8 byte is a single US-ASCII text codepoint. This excludes control characters.
+    constexpr bool is_ascii(char ch) noexcept
+    {
+        return !is_control(ch) && !is_complex(ch);
+    }
+} // namespace
+
+unsigned grapheme_line_segmenter::process_ascii(unsigned maxWidth) noexcept
+{
+    auto input = _buffer.data();
+    auto const end = _buffer.data() + std::min(static_cast<unsigned>(_buffer.size()), maxWidth);
+
+    intrinsics::m128i const ControlCodeMax = intrinsics::set1_epi8(0x20); // 0..0x1F
+    intrinsics::m128i const Complex = intrinsics::set1_epi8(-128);        // equals to 0x80 (0b1000'0000)
+
+    while (input < end - sizeof(intrinsics::m128i))
+    {
+        intrinsics::m128i batch = intrinsics::load_unaligned((intrinsics::m128i*) input);
+        intrinsics::m128i isControl = intrinsics::compare_less(batch, ControlCodeMax);
+        intrinsics::m128i isComplex = intrinsics::and128(batch, Complex);
+        // intrinsics::m128i isComplex = _mm_cmplt_epi8(batch, Complex);
+        intrinsics::m128i testPack = intrinsics::or128(isControl, isComplex);
+        if (int const check = intrinsics::movemask_epi8(testPack); check != 0)
+        {
+            int advance = countTrailingZeroBits(static_cast<unsigned>(check));
+            input += advance;
+            break;
+        }
+        input += sizeof(intrinsics::m128i);
+    }
+
+    while (input != end && is_ascii(*input))
+        ++input;
+
+    // if (static_cast<size_t>(distance(_buffer.data(), input)))
+    //     fmt::print(
+    //         "countAsciiTextChars: {} bytes: \"{}\"\n",
+    //         static_cast<size_t>(distance(_buffer.data(), input)),
+    //         (string_view(_buffer.data(), static_cast<size_t>(distance(_buffer.data(), input)))));
+
+    return static_cast<unsigned>(std::distance(_buffer.data(), input));
+}
+
+void grapheme_line_segmenter::reset(std::string_view buffer) noexcept
+{
+    _buffer = buffer;
+
+    _utf8 = {};
+    _lastCodepointHint = 0;
+    _currentClusterWidth = 0;
+}
+
+void grapheme_line_segmenter::expand_buffer_by(size_t count) noexcept
+{
+    _buffer = std::string_view(_buffer.data(), _buffer.size() + count);
+}
+
+void grapheme_line_segmenter::move_forward_to(char const* pos) noexcept
+{
+    assert(_buffer.data() <= pos && pos <= _buffer.data() + _buffer.size());
+    auto const skippedBytesCount = static_cast<size_t>(pos - _buffer.data());
+    _buffer.remove_prefix(skippedBytesCount);
+    _lastCodepointHint = 0;
+    _currentClusterWidth = 0;
+    _utf8 = {};
+}
+
+grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned maxWidth) noexcept
+{
+    printf("Processing %zu bytes @%p: \"%s\"\n", _buffer.size(), (void*) _buffer.data(), _buffer.data());
+
+    if (_buffer.empty())
+        return result_type { .text = _buffer.substr(0, 0), .width = 0 };
+
+    char const* start = _buffer.data();
+    char const* const resultStart = _utf8.expectedLength ? start - _utf8.currentLength : start;
+
+    // Number of bytes used in the current line.
+    size_t totalByteCountProcessed = 0;
+
+    // Number of width used in the current line.
+    unsigned totalWidthProcessed = 0;
+
+    auto const makeResult = [&]() -> result_type {
+        return result_type { .text = std::string_view(resultStart, totalByteCountProcessed),
+                             .width = totalWidthProcessed };
+    };
+
+    enum class State
+    {
+        ASCII,
+        ComplexUnicode,
+    };
+
+    while (maxWidth > 0 && !_buffer.empty())
+    {
+        State const state =
+            (_utf8.expectedLength != 0 || is_complex(_buffer.front())) ? State::ComplexUnicode : State::ASCII;
+
+        switch (state)
+        {
+            case State::ASCII: {
+                auto const count = process_ascii(maxWidth);
+                if (count == 0)
+                    return makeResult();
+                _events.on_ascii(_buffer.substr(0, count));
+                maxWidth -= count;
+                totalWidthProcessed += count;
+                totalByteCountProcessed += count;
+                _buffer.remove_prefix(count);
+                break;
+            }
+            case State::ComplexUnicode: {
+                auto const sub = process_complex_unicode(maxWidth);
+                if (sub.width == 0)
+                    return makeResult();
+                maxWidth -= sub.width;
+                totalWidthProcessed += sub.width;
+                totalByteCountProcessed += sub.text.size();
+                _buffer.remove_prefix(sub.text.size());
+                break;
+            }
+        }
+    }
+
+    return makeResult();
+}
+
+grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_unicode(
+    unsigned maxWidth) noexcept
+{
+    char const* const start = _buffer.data();
+    char const* const end = start + _buffer.size();
+
+    char const* input = start;              // current input processing position
+    char const* clusterStart = start;       // start position of current grapheme cluster
+    char const* lastCodepointStart = start; // start position of last codepoint
+    unsigned consumedWidth = 0;             // width consumed for the current line
+    unsigned currentCodepointLength = 0;    // bytes consumed for the current codepoint
+
+    char const* const lastClusterStart =
+        _utf8.expectedLength ? start - _utf8.currentLength : start; // start position of last grapheme cluster
+
+    char const* lastClusterEnd = lastClusterStart; // end position of last grapheme cluster
+
+    printf("process_complex_unicode: start at %p\n", (void*) lastClusterStart);
+    int iteration = 0;
+    while (input != end && consumedWidth <= maxWidth)
+    {
+        ++iteration;
+        if (is_control(*input))
+            printf("Terminating, because control character 0x%02X.\n", static_cast<uint8_t>(*input));
+        else if (!is_complex(*input))
+            printf("Terminating, because single US-ASCII text codepoint. '%c'\n", *input);
+        if (is_control(*input) || !is_complex(*input))
+        {
+            // ASCII control character or single US-ASCII text codepoint.
+
+            if (_utf8.expectedLength)
+            {
+                // Incomplete UTF-8 sequence hit. That's invalid as well.
+                ++consumedWidth;
+                _events.on_invalid(std::string_view(input, input + 1));
+                _utf8 = {};
+            }
+
+            _lastCodepointHint = 0;
+            lastClusterEnd = input;
+            currentCodepointLength = 0;
+            break;
+        }
+
+        printf("complex input (%d): 0x%02X\n", iteration, static_cast<uint8_t>(*input));
+        auto const result = from_utf8(_utf8, static_cast<uint8_t>(*input++));
+        ++currentCodepointLength;
+
+        if (holds_alternative<Incomplete>(result))
+            continue;
+
+        if (holds_alternative<Success>(result))
+        {
+            auto const prevCodepoint = _lastCodepointHint;
+            auto const nextCodepoint = std::get<Success>(result).value;
+            auto const nextWidth =
+                std::max(_currentClusterWidth, static_cast<unsigned>(unicode::width(nextCodepoint)));
+            _lastCodepointHint = nextCodepoint;
+            if (grapheme_segmenter::breakable(prevCodepoint, nextCodepoint))
+            {
+                // Flush out current grapheme cluster's East Asian Width.
+                consumedWidth += _currentClusterWidth;
+                maxWidth -= _currentClusterWidth;
+
+                if (consumedWidth + nextWidth > maxWidth)
+                {
+                    // Currently scanned grapheme cluster won't fit. Break at start.
+                    _currentClusterWidth = 0;
+                    input -= currentCodepointLength;
+                    break;
+                }
+
+                _events.on_grapheme_cluster(std::string_view(clusterStart, currentCodepointLength),
+                                            _currentClusterWidth);
+
+                // And start a new grapheme cluster.
+                _currentClusterWidth = nextWidth;
+                clusterStart = lastCodepointStart;
+                lastCodepointStart = input - currentCodepointLength;
+                currentCodepointLength = 0;
+                lastClusterEnd = input;
+            }
+            else
+            {
+                lastClusterEnd = input;
+                // Increase width on VS16 but do not decrease on VS15.
+                if (nextCodepoint == 0xFE0F) // VS16
+                {
+                    _currentClusterWidth = 2;
+                    if (consumedWidth + _currentClusterWidth > maxWidth)
+                    {
+                        // Rewinding by {currentCodepointLength} bytes (overflow due to VS16).
+                        _currentClusterWidth = 0;
+                        input = clusterStart;
+                        break;
+                    }
+                }
+
+                // Consumed {currentCodepointLength} bytes for grapheme cluster.
+                lastCodepointStart = input - currentCodepointLength;
+            }
+        }
+        else
+        {
+            assert(holds_alternative<Invalid>(result));
+            consumedWidth++;
+            _events.on_invalid(std::string_view(clusterStart, currentCodepointLength));
+            _currentClusterWidth = 0;
+            _lastCodepointHint = 0;
+            _utf8.expectedLength = 0;
+            currentCodepointLength = 0;
+            clusterStart = input;
+            lastClusterEnd = input;
+        }
+    }
+
+    consumedWidth += _currentClusterWidth;
+
+    _currentClusterWidth = 0;
+
+    // if (currentCodepointLength <= _buffer.size())
+    //     _buffer.remove_prefix(currentCodepointLength);
+    // else
+    //     abort();
+
+    assert(lastClusterStart <= lastClusterEnd);
+
+    auto const resultLength = static_cast<size_t>(std::distance(lastClusterStart, lastClusterEnd));
+    printf("lastClusterEnd: %p, size: %zu\n", (void*) lastClusterEnd, resultLength);
+    return result_type { .text = std::string_view(lastClusterStart, resultLength), .width = consumedWidth };
+}
+
+ConvertResult grapheme_line_segmenter::process_single_byte(uint8_t byte) noexcept
+{
+    return from_utf8(_utf8, byte);
+}
+
+} // namespace unicode