@@ -125,7 +125,13 @@ static bool isContentChar(UChar32 c) {
125
125
|| inRange (c, 0xE000 , 0x10FFFF );
126
126
}
127
127
128
- // See `s` in the MessageFormat 2 grammar
128
+ // See `bidi` in the MF2 grammar
129
+ static bool isBidi (UChar32 c) {
130
+ return (c == 0x061C || c == 0x200E || c == 0x200F ||
131
+ inRange (c, 0x2066 , 0x2069 ));
132
+ }
133
+
134
+ // See `ws` in the MessageFormat 2 grammar
129
135
inline bool isWhitespace (UChar32 c) {
130
136
switch (c) {
131
137
case SPACE:
@@ -153,8 +159,8 @@ static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }
153
159
154
160
static bool isNameStart (UChar32 c) {
155
161
return isAlpha (c) || c == UNDERSCORE || inRange (c, 0x00C0 , 0x00D6 ) || inRange (c, 0x00D8 , 0x00F6 ) ||
156
- inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x1FFF ) ||
157
- inRange (c, 0x200C , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
162
+ inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x061B ) ||
163
+ inRange (c, 0x061D , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
158
164
inRange (c, 0x3001 , 0xD7FF ) || inRange (c, 0xF900 , 0xFDCF ) || inRange (c, 0xFDF0 , 0xFFFD ) ||
159
165
inRange (c, 0x10000 , 0xEFFFF );
160
166
}
@@ -347,7 +353,7 @@ option, or the optional space before an attribute.
347
353
No pre, no post.
348
354
A message may end with whitespace, so `index` may equal `len()` on exit.
349
355
*/
350
- void Parser::parseWhitespaceMaybeRequired ( bool required, UErrorCode& errorCode) {
356
+ void Parser::parseRequiredWS ( UErrorCode& errorCode) {
351
357
bool sawWhitespace = false ;
352
358
353
359
// The loop exits either when we consume all the input,
@@ -358,7 +364,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
358
364
// If whitespace isn't required -- or if we saw it already --
359
365
// then the caller is responsible for checking this case and
360
366
// setting an error if necessary.
361
- if (!required || sawWhitespace) {
367
+ if (sawWhitespace) {
362
368
// Not an error.
363
369
return ;
364
370
}
@@ -380,24 +386,51 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
380
386
}
381
387
}
382
388
383
- if (!sawWhitespace && required ) {
389
+ if (!sawWhitespace) {
384
390
ERROR (errorCode);
385
391
}
386
392
}
387
393
394
+ void Parser::parseOptionalBidi () {
395
+ while (true ) {
396
+ if (!inBounds ()) {
397
+ return ;
398
+ }
399
+ if (isBidi (peek ())) {
400
+ next ();
401
+ } else {
402
+ break ;
403
+ }
404
+ }
405
+ }
406
+
388
407
/*
389
- No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
408
+ No pre, no post, because a message may end with whitespace
409
+ Matches `s` in the MF2 grammar
390
410
*/
391
411
void Parser::parseRequiredWhitespace (UErrorCode& errorCode) {
392
- parseWhitespaceMaybeRequired (true , errorCode);
412
+ parseOptionalBidi ();
413
+ parseRequiredWS (errorCode);
414
+ parseOptionalWhitespace ();
393
415
normalizedInput += SPACE;
394
416
}
395
417
396
418
/*
397
419
No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
398
420
*/
399
- void Parser::parseOptionalWhitespace (UErrorCode& errorCode) {
400
- parseWhitespaceMaybeRequired (false , errorCode);
421
+ void Parser::parseOptionalWhitespace () {
422
+ while (true ) {
423
+ if (!inBounds ()) {
424
+ return ;
425
+ }
426
+ auto cp = peek ();
427
+ if (isWhitespace (cp) || isBidi (cp)) {
428
+ maybeAdvanceLine ();
429
+ next ();
430
+ } else {
431
+ break ;
432
+ }
433
+ }
401
434
}
402
435
403
436
// Consumes a single character, signaling an error if `peek()` != `c`
@@ -442,11 +475,11 @@ void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode)
442
475
*/
443
476
void Parser::parseTokenWithWhitespace (const std::u16string_view& token, UErrorCode& errorCode) {
444
477
// No need for error check or bounds check before parseOptionalWhitespace
445
- parseOptionalWhitespace (errorCode );
478
+ parseOptionalWhitespace ();
446
479
// Establish precondition
447
480
CHECK_BOUNDS (errorCode);
448
481
parseToken (token, errorCode);
449
- parseOptionalWhitespace (errorCode );
482
+ parseOptionalWhitespace ();
450
483
// Guarantee postcondition
451
484
CHECK_BOUNDS (errorCode);
452
485
}
@@ -458,12 +491,12 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
458
491
then consumes optional whitespace again
459
492
*/
460
493
void Parser::parseTokenWithWhitespace (UChar32 c, UErrorCode& errorCode) {
461
- // No need for error check or bounds check before parseOptionalWhitespace(errorCode )
462
- parseOptionalWhitespace (errorCode );
494
+ // No need for error check or bounds check before parseOptionalWhitespace()
495
+ parseOptionalWhitespace ();
463
496
// Establish precondition
464
497
CHECK_BOUNDS (errorCode);
465
498
parseToken (c, errorCode);
466
- parseOptionalWhitespace (errorCode );
499
+ parseOptionalWhitespace ();
467
500
// Guarantee postcondition
468
501
CHECK_BOUNDS (errorCode);
469
502
}
@@ -482,11 +515,17 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
482
515
483
516
U_ASSERT (inBounds ());
484
517
485
- if (!isNameStart (peek ())) {
518
+ if (!( isNameStart (peek ()) || isBidi ( peek () ))) {
486
519
ERROR (errorCode);
487
520
return name;
488
521
}
489
522
523
+ // name = [bidi] name-start *name-char [bidi]
524
+
525
+ // [bidi]
526
+ parseOptionalBidi ();
527
+
528
+ // name-start *name-char
490
529
while (isNameChar (peek ())) {
491
530
UChar32 c = peek ();
492
531
name += c;
@@ -497,6 +536,10 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
497
536
break ;
498
537
}
499
538
}
539
+
540
+ // [bidi]
541
+ parseOptionalBidi ();
542
+
500
543
return name;
501
544
}
502
545
@@ -853,7 +896,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
853
896
// about whether whitespace precedes another
854
897
// attribute, or the '=' sign
855
898
int32_t savedIndex = index;
856
- parseOptionalWhitespace (errorCode );
899
+ parseOptionalWhitespace ();
857
900
858
901
Operand rand;
859
902
if (peek () == EQUALS) {
@@ -1149,7 +1192,7 @@ the comment in `parseOptions()` for details.
1149
1192
// (the character is either the required space before an annotation, or optional
1150
1193
// trailing space after the literal or variable). It's still ambiguous which
1151
1194
// one does apply.
1152
- parseOptionalWhitespace (status );
1195
+ parseOptionalWhitespace ();
1153
1196
// Restore precondition
1154
1197
CHECK_BOUNDS (status);
1155
1198
@@ -1220,7 +1263,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
1220
1263
// Parse opening brace
1221
1264
parseToken (LEFT_CURLY_BRACE, status);
1222
1265
// Optional whitespace after opening brace
1223
- parseOptionalWhitespace (status );
1266
+ parseOptionalWhitespace ();
1224
1267
1225
1268
Expression::Builder exprBuilder (status);
1226
1269
// Restore precondition
@@ -1263,7 +1306,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
1263
1306
1264
1307
// Parse optional space
1265
1308
// (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
1266
- parseOptionalWhitespace (status );
1309
+ parseOptionalWhitespace ();
1267
1310
1268
1311
// Either an operand or operator (or both) must have been set already,
1269
1312
// so there can't be an error
@@ -1339,7 +1382,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
1339
1382
CHECK_BOUNDS (status);
1340
1383
1341
1384
parseToken (ID_INPUT, status);
1342
- parseOptionalWhitespace (status );
1385
+ parseOptionalWhitespace ();
1343
1386
1344
1387
// Restore precondition before calling parseExpression()
1345
1388
CHECK_BOUNDS (status);
@@ -1400,7 +1443,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
1400
1443
// Avoid looping infinitely
1401
1444
CHECK_ERROR (status);
1402
1445
1403
- parseOptionalWhitespace (status );
1446
+ parseOptionalWhitespace ();
1404
1447
// Restore precondition
1405
1448
CHECK_BOUNDS (status);
1406
1449
}
@@ -1510,8 +1553,8 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
1510
1553
1511
1554
// We've seen at least one whitespace-key pair, so now we can parse
1512
1555
// *(s key) [s]
1513
- while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ())) { // Try to recover from errors
1514
- bool wasWhitespace = isWhitespace (peek ());
1556
+ while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ()) || isBidi ( peek ())) {
1557
+ bool wasWhitespace = isWhitespace (peek ()) || isBidi ( peek ()) ;
1515
1558
parseRequiredWhitespace (status);
1516
1559
if (!wasWhitespace) {
1517
1560
// Avoid infinite loop when parsing something like:
@@ -1569,7 +1612,7 @@ Markup Parser::parseMarkup(UErrorCode& status) {
1569
1612
// Consume the '{'
1570
1613
next ();
1571
1614
normalizedInput += LEFT_CURLY_BRACE;
1572
- parseOptionalWhitespace (status );
1615
+ parseOptionalWhitespace ();
1573
1616
bool closing = false ;
1574
1617
switch (peek ()) {
1575
1618
case NUMBER_SIGN: {
@@ -1596,19 +1639,19 @@ Markup Parser::parseMarkup(UErrorCode& status) {
1596
1639
1597
1640
// Parse the options, which must begin with a ' '
1598
1641
// if present
1599
- if (inBounds () && isWhitespace (peek ())) {
1642
+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1600
1643
OptionAdder<Markup::Builder> optionAdder (builder);
1601
1644
parseOptions (optionAdder, status);
1602
1645
}
1603
1646
1604
1647
// Parse the attributes, which also must begin
1605
1648
// with a ' '
1606
- if (inBounds () && isWhitespace (peek ())) {
1649
+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1607
1650
AttributeAdder<Markup::Builder> attrAdder (builder);
1608
1651
parseAttributes (attrAdder, status);
1609
1652
}
1610
1653
1611
- parseOptionalWhitespace (status );
1654
+ parseOptionalWhitespace ();
1612
1655
1613
1656
bool standalone = false ;
1614
1657
// Check if this is a standalone or not
@@ -1656,7 +1699,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
1656
1699
isMarkup = true ;
1657
1700
break ;
1658
1701
}
1659
- if (!isWhitespace (c)) {
1702
+ if (!( isWhitespace (c) || isBidi (c))) {
1660
1703
break ;
1661
1704
}
1662
1705
tempIndex++;
@@ -1740,7 +1783,7 @@ void Parser::parseSelectors(UErrorCode& status) {
1740
1783
// "Backtracking" is required here. It's not clear if whitespace is
1741
1784
// (`[s]` selector) or (`[s]` variant)
1742
1785
while (isWhitespace (peek ()) || peek () == LEFT_CURLY_BRACE) {
1743
- parseOptionalWhitespace (status );
1786
+ parseOptionalWhitespace ();
1744
1787
// Restore precondition
1745
1788
CHECK_BOUNDS (status);
1746
1789
if (peek () != LEFT_CURLY_BRACE) {
@@ -1770,9 +1813,9 @@ void Parser::parseSelectors(UErrorCode& status) {
1770
1813
} \
1771
1814
1772
1815
// Parse variants
1773
- while (isWhitespace (peek ()) || isKeyStart (peek ())) {
1816
+ while (isWhitespace (peek ()) || isBidi ( peek ()) || isKeyStart (peek ())) {
1774
1817
// Trailing whitespace is allowed
1775
- parseOptionalWhitespace (status );
1818
+ parseOptionalWhitespace ();
1776
1819
if (!inBounds ()) {
1777
1820
return ;
1778
1821
}
@@ -1871,7 +1914,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
1871
1914
bool complex = false ;
1872
1915
// First, "look ahead" to determine if this is a simple or complex
1873
1916
// message. To do that, check the first non-whitespace character.
1874
- while (inBounds (index) && isWhitespace (peek ())) {
1917
+ while (inBounds (index) && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
1875
1918
next ();
1876
1919
}
1877
1920
@@ -1891,10 +1934,10 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
1891
1934
// Message can be empty, so we need to only look ahead
1892
1935
// if we know it's non-empty
1893
1936
if (complex) {
1894
- parseOptionalWhitespace (status );
1937
+ parseOptionalWhitespace ();
1895
1938
parseDeclarations (status);
1896
1939
parseBody (status);
1897
- parseOptionalWhitespace (status );
1940
+ parseOptionalWhitespace ();
1898
1941
} else {
1899
1942
// Simple message
1900
1943
// For normalization, quote the pattern
0 commit comments