Skip to content

Commit ba9775e

Browse files
and3mdmikke89
andauthored
Fix word break when the first character of a token is multi-byte (#753)
Co-authored-by: Michael Ragazzon <[email protected]>
1 parent 4747606 commit ba9775e

File tree

2 files changed

+20
-19
lines changed

2 files changed

+20
-19
lines changed

Source/Core/ElementText.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,32 +250,33 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width
250250
// Try to break up the word
251251
max_token_width = int(maximum_line_width - line_width);
252252
const int token_max_size = int(next_token_begin - token_begin);
253-
bool force_loop_break_after_next = false;
253+
const char* partial_string_end = token_begin + token_max_size;
254254

255255
// @performance: Can be made much faster. Use string width heuristics and logarithmic search.
256-
for (int i = token_max_size - 1; i > 0; --i)
256+
while (true)
257257
{
258+
partial_string_end = StringUtilities::SeekBackwardUTF8(partial_string_end - 1, token_begin);
259+
260+
bool force_loop_break_at_end = false;
261+
if (partial_string_end == token_begin)
262+
{
263+
// Not even the first character of the token fits. Let it overflow onto the next line if we can.
264+
if (allow_empty || !line.empty())
265+
return false;
266+
267+
// Continue by forcing the first character to be consumed, even though it will overflow.
268+
partial_string_end = StringUtilities::SeekForwardUTF8(token_begin + 1, token_begin + token_max_size);
269+
force_loop_break_at_end = true;
270+
}
271+
258272
token.clear();
259273
next_token_begin = token_begin;
260-
const char* partial_string_end = StringUtilities::SeekBackwardUTF8(token_begin + i, token_begin);
261274
BuildToken(token, next_token_begin, partial_string_end, line.empty() && trim_whitespace_prefix, collapse_white_space,
262275
break_at_endline, text_transform_property, decode_escape_characters);
263276
token_width = font_engine_interface->GetStringWidth(font_face_handle, token, text_shaping_context, previous_codepoint);
264277

265-
if (force_loop_break_after_next || token_width <= max_token_width)
266-
{
278+
if (force_loop_break_at_end || token_width <= max_token_width)
267279
break;
268-
}
269-
else if (next_token_begin == token_begin)
270-
{
271-
// This means the first character of the token doesn't fit. Let it overflow into the next line if we can.
272-
if (allow_empty || !line.empty())
273-
return false;
274-
275-
// Not even the first character of the line fits. Go back to consume the first character even though it will overflow.
276-
i += 2;
277-
force_loop_break_after_next = true;
278-
}
279280
}
280281

281282
break_line = true;

Tests/Data/VisualTests/word_break.rml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@
4444
<hr/>
4545
<h1>Zero-width box</h1>
4646
<p>word-break: normal</p>
47-
<div class="box zero">A WORD</div>
47+
<div class="box zero"> WORD</div>
4848
<p>word-break: break-all</p>
49-
<div class="box zero break-all">A WORD</div>
49+
<div class="box zero break-all"> WORD</div>
5050
<p>word-break: break-word</p>
51-
<div class="box zero break-word">A WORD</div>
51+
<div class="box zero break-word"> WORD</div>
5252
</body>
5353
</rml>

0 commit comments

Comments
 (0)