@@ -125,7 +125,13 @@ static bool isContentChar(UChar32 c) {
125
125
|| inRange (c, 0xE000 , 0x10FFFF );
126
126
}
127
127
128
- // See `s` in the MessageFormat 2 grammar
128
+ // See `bidi` in the MF2 grammar
129
+ static bool isBidi (UChar32 c) {
130
+ return (c == 0x061C || c == 0x200E || c == 0x200F ||
131
+ inRange (c, 0x2066 , 0x2069 ));
132
+ }
133
+
134
+ // See `ws` in the MessageFormat 2 grammar
129
135
inline bool isWhitespace (UChar32 c) {
130
136
switch (c) {
131
137
case SPACE:
@@ -153,8 +159,8 @@ static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }
153
159
154
160
static bool isNameStart (UChar32 c) {
155
161
return isAlpha (c) || c == UNDERSCORE || inRange (c, 0x00C0 , 0x00D6 ) || inRange (c, 0x00D8 , 0x00F6 ) ||
156
- inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x1FFF ) ||
157
- inRange (c, 0x200C , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
162
+ inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x061B ) ||
163
+ inRange (c, 0x061D , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
158
164
inRange (c, 0x3001 , 0xD7FF ) || inRange (c, 0xF900 , 0xFDCF ) || inRange (c, 0xFDF0 , 0xFFFD ) ||
159
165
inRange (c, 0x10000 , 0xEFFFF );
160
166
}
@@ -347,7 +353,7 @@ option, or the optional space before an attribute.
347
353
No pre, no post.
348
354
A message may end with whitespace, so `index` may equal `len()` on exit.
349
355
*/
350
- void Parser::parseWhitespaceMaybeRequired ( bool required, UErrorCode& errorCode) {
356
+ void Parser::parseRequiredWS ( UErrorCode& errorCode) {
351
357
bool sawWhitespace = false ;
352
358
353
359
// The loop exits either when we consume all the input,
@@ -358,7 +364,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
358
364
// If whitespace isn't required -- or if we saw it already --
359
365
// then the caller is responsible for checking this case and
360
366
// setting an error if necessary.
361
- if (!required || sawWhitespace) {
367
+ if (sawWhitespace) {
362
368
// Not an error.
363
369
return ;
364
370
}
@@ -380,24 +386,51 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
380
386
}
381
387
}
382
388
383
- if (!sawWhitespace && required ) {
389
+ if (!sawWhitespace) {
384
390
ERROR (errorCode);
385
391
}
386
392
}
387
393
394
+ void Parser::parseOptionalBidi () {
395
+ while (true ) {
396
+ if (!inBounds ()) {
397
+ return ;
398
+ }
399
+ if (isBidi (peek ())) {
400
+ next ();
401
+ } else {
402
+ break ;
403
+ }
404
+ }
405
+ }
406
+
388
407
/*
389
- No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
408
+ No pre, no post, because a message may end with whitespace
409
+ Matches `s` in the MF2 grammar
390
410
*/
391
411
void Parser::parseRequiredWhitespace (UErrorCode& errorCode) {
392
- parseWhitespaceMaybeRequired (true , errorCode);
412
+ parseOptionalBidi ();
413
+ parseRequiredWS (errorCode);
414
+ parseOptionalWhitespace ();
393
415
normalizedInput += SPACE;
394
416
}
395
417
396
418
/*
397
419
No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
398
420
*/
399
- void Parser::parseOptionalWhitespace (UErrorCode& errorCode) {
400
- parseWhitespaceMaybeRequired (false , errorCode);
421
+ void Parser::parseOptionalWhitespace () {
422
+ while (true ) {
423
+ if (!inBounds ()) {
424
+ return ;
425
+ }
426
+ auto cp = peek ();
427
+ if (isWhitespace (cp) || isBidi (cp)) {
428
+ maybeAdvanceLine ();
429
+ next ();
430
+ } else {
431
+ break ;
432
+ }
433
+ }
401
434
}
402
435
403
436
// Consumes a single character, signaling an error if `peek()` != `c`
@@ -442,11 +475,11 @@ void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode)
442
475
*/
443
476
void Parser::parseTokenWithWhitespace (const std::u16string_view& token, UErrorCode& errorCode) {
444
477
// No need for error check or bounds check before parseOptionalWhitespace
445
- parseOptionalWhitespace (errorCode );
478
+ parseOptionalWhitespace ();
446
479
// Establish precondition
447
480
CHECK_BOUNDS (errorCode);
448
481
parseToken (token, errorCode);
449
- parseOptionalWhitespace (errorCode );
482
+ parseOptionalWhitespace ();
450
483
// Guarantee postcondition
451
484
CHECK_BOUNDS (errorCode);
452
485
}
@@ -458,12 +491,12 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
458
491
then consumes optional whitespace again
459
492
*/
460
493
void Parser::parseTokenWithWhitespace (UChar32 c, UErrorCode& errorCode) {
461
- // No need for error check or bounds check before parseOptionalWhitespace(errorCode )
462
- parseOptionalWhitespace (errorCode );
494
+ // No need for error check or bounds check before parseOptionalWhitespace()
495
+ parseOptionalWhitespace ();
463
496
// Establish precondition
464
497
CHECK_BOUNDS (errorCode);
465
498
parseToken (c, errorCode);
466
- parseOptionalWhitespace (errorCode );
499
+ parseOptionalWhitespace ();
467
500
// Guarantee postcondition
468
501
CHECK_BOUNDS (errorCode);
469
502
}
@@ -482,11 +515,17 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
482
515
483
516
U_ASSERT (inBounds ());
484
517
485
- if (!isNameStart (peek ())) {
518
+ if (!( isNameStart (peek ()) || isBidi ( peek () ))) {
486
519
ERROR (errorCode);
487
520
return name;
488
521
}
489
522
523
+ // name = [bidi] name-start *name-char [bidi]
524
+
525
+ // [bidi]
526
+ parseOptionalBidi ();
527
+
528
+ // name-start *name-char
490
529
while (isNameChar (peek ())) {
491
530
UChar32 c = peek ();
492
531
name += c;
@@ -497,6 +536,10 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
497
536
break ;
498
537
}
499
538
}
539
+
540
+ // [bidi]
541
+ parseOptionalBidi ();
542
+
500
543
return name;
501
544
}
502
545
@@ -845,7 +888,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
845
888
// about whether whitespace precedes another
846
889
// attribute, or the '=' sign
847
890
int32_t savedIndex = index;
848
- parseOptionalWhitespace (errorCode );
891
+ parseOptionalWhitespace ();
849
892
850
893
Operand rand;
851
894
if (peek () == EQUALS) {
@@ -1131,7 +1174,7 @@ the comment in `parseOptions()` for details.
1131
1174
// (the character is either the required space before an annotation, or optional
1132
1175
// trailing space after the literal or variable). It's still ambiguous which
1133
1176
// one does apply.
1134
- parseOptionalWhitespace (status );
1177
+ parseOptionalWhitespace ();
1135
1178
// Restore precondition
1136
1179
CHECK_BOUNDS (status);
1137
1180
@@ -1202,7 +1245,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
1202
1245
// Parse opening brace
1203
1246
parseToken (LEFT_CURLY_BRACE, status);
1204
1247
// Optional whitespace after opening brace
1205
- parseOptionalWhitespace (status );
1248
+ parseOptionalWhitespace ();
1206
1249
1207
1250
Expression::Builder exprBuilder (status);
1208
1251
// Restore precondition
@@ -1245,7 +1288,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
1245
1288
1246
1289
// Parse optional space
1247
1290
// (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
1248
- parseOptionalWhitespace (status );
1291
+ parseOptionalWhitespace ();
1249
1292
1250
1293
// Either an operand or operator (or both) must have been set already,
1251
1294
// so there can't be an error
@@ -1321,7 +1364,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
1321
1364
CHECK_BOUNDS (status);
1322
1365
1323
1366
parseToken (ID_INPUT, status);
1324
- parseOptionalWhitespace (status );
1367
+ parseOptionalWhitespace ();
1325
1368
1326
1369
// Restore precondition before calling parseExpression()
1327
1370
CHECK_BOUNDS (status);
@@ -1382,7 +1425,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
1382
1425
// Avoid looping infinitely
1383
1426
CHECK_ERROR (status);
1384
1427
1385
- parseOptionalWhitespace (status );
1428
+ parseOptionalWhitespace ();
1386
1429
// Restore precondition
1387
1430
CHECK_BOUNDS (status);
1388
1431
}
@@ -1492,8 +1535,8 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
1492
1535
1493
1536
// We've seen at least one whitespace-key pair, so now we can parse
1494
1537
// *(s key) [s]
1495
- while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ())) { // Try to recover from errors
1496
- bool wasWhitespace = isWhitespace (peek ());
1538
+ while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ()) || isBidi ( peek ())) {
1539
+ bool wasWhitespace = isWhitespace (peek ()) || isBidi ( peek ()) ;
1497
1540
parseRequiredWhitespace (status);
1498
1541
if (!wasWhitespace) {
1499
1542
// Avoid infinite loop when parsing something like:
@@ -1551,7 +1594,7 @@ Markup Parser::parseMarkup(UErrorCode& status) {
1551
1594
// Consume the '{'
1552
1595
next ();
1553
1596
normalizedInput += LEFT_CURLY_BRACE;
1554
- parseOptionalWhitespace (status );
1597
+ parseOptionalWhitespace ();
1555
1598
bool closing = false ;
1556
1599
switch (peek ()) {
1557
1600
case NUMBER_SIGN: {
@@ -1578,19 +1621,19 @@ Markup Parser::parseMarkup(UErrorCode& status) {
1578
1621
1579
1622
// Parse the options, which must begin with a ' '
1580
1623
// if present
1581
- if (inBounds () && isWhitespace (peek ())) {
1624
+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1582
1625
OptionAdder<Markup::Builder> optionAdder (builder);
1583
1626
parseOptions (optionAdder, status);
1584
1627
}
1585
1628
1586
1629
// Parse the attributes, which also must begin
1587
1630
// with a ' '
1588
- if (inBounds () && isWhitespace (peek ())) {
1631
+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1589
1632
AttributeAdder<Markup::Builder> attrAdder (builder);
1590
1633
parseAttributes (attrAdder, status);
1591
1634
}
1592
1635
1593
- parseOptionalWhitespace (status );
1636
+ parseOptionalWhitespace ();
1594
1637
1595
1638
bool standalone = false ;
1596
1639
// Check if this is a standalone or not
@@ -1638,7 +1681,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
1638
1681
isMarkup = true ;
1639
1682
break ;
1640
1683
}
1641
- if (!isWhitespace (c)) {
1684
+ if (!( isWhitespace (c) || isBidi (c))) {
1642
1685
break ;
1643
1686
}
1644
1687
tempIndex++;
@@ -1738,8 +1781,7 @@ void Parser::parseSelectors(UErrorCode& status) {
1738
1781
// "Backtracking" is required here. It's not clear if whitespace is
1739
1782
// (`[s]` selector) or (`[s]` variant)
1740
1783
while (isWhitespace (peek ()) || peek () == DOLLAR) {
1741
- int32_t whitespaceStart = index;
1742
- parseRequiredWhitespace (status);
1784
+ parseOptionalWhitespace ();
1743
1785
// Restore precondition
1744
1786
CHECK_BOUNDS (status);
1745
1787
if (peek () != DOLLAR) {
@@ -1771,24 +1813,9 @@ void Parser::parseSelectors(UErrorCode& status) {
1771
1813
} \
1772
1814
1773
1815
// Parse variants
1774
- // matcher = match-statement s variant *(o variant)
1775
-
1776
- // Parse first variant
1777
- parseRequiredWhitespace (status);
1778
- if (!inBounds ()) {
1779
- ERROR (status);
1780
- return ;
1781
- }
1782
- parseVariant (status);
1783
- if (!inBounds ()) {
1784
- // Not an error; there might be only one variant
1785
- return ;
1786
- }
1787
-
1788
- while (isWhitespace (peek ()) || isKeyStart (peek ())) {
1789
- parseOptionalWhitespace (status);
1790
- // Restore the precondition.
1791
- // Trailing whitespace is allowed.
1816
+ while (isWhitespace (peek ()) || isBidi (peek ()) || isKeyStart (peek ())) {
1817
+ // Trailing whitespace is allowed
1818
+ parseOptionalWhitespace ();
1792
1819
if (!inBounds ()) {
1793
1820
return ;
1794
1821
}
@@ -1874,7 +1901,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
1874
1901
bool complex = false ;
1875
1902
// First, "look ahead" to determine if this is a simple or complex
1876
1903
// message. To do that, check the first non-whitespace character.
1877
- while (inBounds (index) && isWhitespace (peek ())) {
1904
+ while (inBounds (index) && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1878
1905
next ();
1879
1906
}
1880
1907
@@ -1894,10 +1921,10 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
1894
1921
// Message can be empty, so we need to only look ahead
1895
1922
// if we know it's non-empty
1896
1923
if (complex) {
1897
- parseOptionalWhitespace (status );
1924
+ parseOptionalWhitespace ();
1898
1925
parseDeclarations (status);
1899
1926
parseBody (status);
1900
- parseOptionalWhitespace (status );
1927
+ parseOptionalWhitespace ();
1901
1928
} else {
1902
1929
// Simple message
1903
1930
// For normalization, quote the pattern
0 commit comments