@@ -55,6 +55,7 @@ namespace
55
55
void grapheme_line_segmenter::reset (std::string_view buffer) noexcept
56
56
{
57
57
_buffer = buffer;
58
+ _next = buffer.data ();
58
59
59
60
_utf8 = {};
60
61
_lastCodepointHint = 0 ;
@@ -70,7 +71,7 @@ void grapheme_line_segmenter::move_forward_to(char const* pos) noexcept
70
71
{
71
72
assert (_buffer.data () <= pos && pos <= _buffer.data () + _buffer.size ());
72
73
auto const skippedBytesCount = static_cast <size_t >(pos - _buffer.data ());
73
- _buffer. remove_prefix ( skippedBytesCount) ;
74
+ _next += skippedBytesCount;
74
75
_lastCodepointHint = 0 ;
75
76
_currentClusterWidth = 0 ;
76
77
_utf8 = {};
@@ -83,7 +84,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
83
84
if (_buffer.empty ())
84
85
return result_type { .text = _buffer.substr (0 , 0 ), .width = 0 };
85
86
86
- char const * start = _buffer. data () ;
87
+ char const * start = _next ;
87
88
char const * const resultStart = _utf8.expectedLength ? start - _utf8.currentLength : start;
88
89
89
90
// Number of bytes used in the current line.
@@ -118,17 +119,20 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
118
119
maxWidth -= count;
119
120
totalWidthProcessed += count;
120
121
totalByteCountProcessed += count;
121
- _buffer. remove_prefix ( count) ;
122
+ _next += count;
122
123
break ;
123
124
}
124
125
case State::ComplexUnicode: {
125
126
auto const sub = process_complex_unicode (maxWidth);
126
- if (sub.width == 0 )
127
+ if (sub.graphemeClusterCount == 0 )
128
+ {
129
+ _next += sub.byteCount ;
127
130
return makeResult ();
128
- maxWidth -= sub.width ;
129
- totalWidthProcessed += sub.width ;
130
- totalByteCountProcessed += sub.text .size ();
131
- _buffer.remove_prefix (sub.text .size ());
131
+ }
132
+ maxWidth -= sub.graphemeClusterCount ;
133
+ totalWidthProcessed += sub.graphemeClusterCount ;
134
+ totalByteCountProcessed += sub.byteCount ;
135
+ _next += sub.byteCount ;
132
136
break ;
133
137
}
134
138
}
@@ -137,7 +141,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
137
141
return makeResult ();
138
142
}
139
143
140
- unsigned grapheme_line_segmenter::process_ascii (unsigned maxWidth) noexcept
144
+ unsigned grapheme_line_segmenter::process_ascii (unsigned maxWidth) const noexcept
141
145
{
142
146
auto input = _buffer.data ();
143
147
auto const end = _buffer.data () + std::min (static_cast <unsigned >(_buffer.size ()), maxWidth);
@@ -173,20 +177,19 @@ unsigned grapheme_line_segmenter::process_ascii(unsigned maxWidth) noexcept
173
177
return static_cast <unsigned >(std::distance (_buffer.data (), input));
174
178
}
175
179
176
- grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_unicode (
177
- unsigned maxWidth) noexcept
180
+ auto grapheme_line_segmenter::process_complex_unicode (unsigned maxWidth) noexcept -> unicode_process_result
178
181
{
179
182
char const * const start = _buffer.data ();
180
183
char const * const end = start + _buffer.size ();
181
184
182
- char const * input = start ; // current input processing position
185
+ char const * input = _next ; // current input processing position
183
186
char const * clusterStart = start; // start position of current grapheme cluster
184
187
char const * lastCodepointStart = start; // start position of last codepoint
185
188
unsigned consumedWidth = 0 ; // width consumed for the current line
186
189
unsigned currentCodepointLength = 0 ; // bytes consumed for the current codepoint
187
190
188
191
char const * const lastClusterStart =
189
- _utf8.expectedLength ? start - _utf8.currentLength : start ; // start position of last grapheme cluster
192
+ _utf8.expectedLength ? input - _utf8.currentLength : input ; // start position of last grapheme cluster
190
193
191
194
char const * lastClusterEnd = lastClusterStart; // end position of last grapheme cluster
192
195
@@ -228,8 +231,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_un
228
231
{
229
232
auto const prevCodepoint = _lastCodepointHint;
230
233
auto const nextCodepoint = std::get<Success>(result).value ;
231
- auto const nextWidth =
232
- std::max (_currentClusterWidth, static_cast <unsigned >(unicode::width (nextCodepoint)));
234
+ auto const nextWidth = std::max (_currentClusterWidth, static_cast <unsigned >(unicode::width (nextCodepoint)));
233
235
_lastCodepointHint = nextCodepoint;
234
236
if (grapheme_segmenter::breakable (prevCodepoint, nextCodepoint))
235
237
{
@@ -293,16 +295,15 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_un
293
295
294
296
_currentClusterWidth = 0 ;
295
297
296
- // if (currentCodepointLength <= _buffer.size())
297
- // _buffer.remove_prefix(currentCodepointLength);
298
- // else
299
- // abort();
298
+ _next = input;
300
299
301
300
assert (lastClusterStart <= lastClusterEnd);
302
301
303
302
auto const resultLength = static_cast <size_t >(std::distance (lastClusterStart, lastClusterEnd));
304
303
printf (" lastClusterEnd: %p, size: %zu\n " , (void *) lastClusterEnd, resultLength);
305
- return result_type { .text = std::string_view (lastClusterStart, resultLength), .width = consumedWidth };
304
+ return unicode_process_result { .graphemeClusterCount = consumedWidth, .byteCount = resultLength };
305
+ // return result_type { .text = std::string_view(lastClusterStart, resultLength), .width = consumedWidth
306
+ // };
306
307
}
307
308
308
309
ConvertResult grapheme_line_segmenter::process_single_byte (uint8_t byte) noexcept
0 commit comments