Skip to content

Commit 62b95bb

Browse files
committed
ICU-22908 MF2: Update spec tests and update implementation for recent spec changes
Updating the spec tests requires two implementation changes: * Match on variables rather than expressions -- see unicode-org/message-format-wg#877 * Require attribute values to be literals (if present) -- see unicode-org/message-format-wg#894 This updates the spec tests to commit 6c3704f41a9c24427126429fb43992b03609dfc8 in https://github.com/unicode-org/message-format-wg/ . Any changes following that commit will be addressed in a future PR.
1 parent 2e57f07 commit 62b95bb

32 files changed

+551
-500
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,13 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
336336
CHECK_ERROR(status);
337337
U_ASSERT(!dataModel.hasPattern());
338338

339-
const Expression* selectors = dataModel.getSelectorsInternal();
339+
const VariableName* selectors = dataModel.getSelectorsInternal();
340340
// 1. Let res be a new empty list of resolved values that support selection.
341341
// (Implicit, since `res` is an out-parameter)
342342
// 2. For each expression exp of the message's selectors
343343
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
344344
// 2i. Let rv be the resolved value of exp.
345-
ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
345+
ResolvedSelector rv = formatSelector(env, selectors[i], context, status);
346346
if (rv.hasSelector()) {
347347
// 2ii. If selection is supported for rv:
348348
// (True if this code has been reached)
@@ -614,7 +614,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo
614614

615615

616616
// Evaluate the operand
617-
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
617+
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
618+
const Operand& rand,
619+
MessageContext& context,
620+
UErrorCode &status) const {
618621
if (U_FAILURE(status)) {
619622
return {};
620623
}
@@ -628,7 +631,13 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons
628631
}
629632

630633
// Must be variable
631-
const VariableName& var = rand.asVariable();
634+
return resolveVariables(env, rand.asVariable(), context, status);
635+
}
636+
637+
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
638+
const VariableName& var,
639+
MessageContext& context,
640+
UErrorCode &status) const {
632641
// Resolve the variable
633642
if (env.has(var)) {
634643
const Closure& referent = env.lookup(var);
@@ -691,13 +700,16 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
691700
}
692701
}
693702

694-
ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
703+
ResolvedSelector MessageFormatter::formatSelector(const Environment& globalEnv,
704+
const VariableName& var,
705+
MessageContext& context,
706+
UErrorCode &status) const {
695707
if (U_FAILURE(status)) {
696708
return {};
697709
}
698710

699711
// Resolve expression to determine if it's a function call
700-
ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
712+
ResolvedSelector exprResult = resolveVariables(globalEnv, var, context, status);
701713

702714
DynamicErrors& err = context.getErrors();
703715

icu4c/source/i18n/messageformat2_checker.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,12 @@ TypeEnvironment::~TypeEnvironment() {}
108108

109109
// ---------------------
110110

111-
UnicodeString Checker::normalizeNFC(const Key& k) const {
111+
Key Checker::normalizeNFC(const Key& k) const {
112112
if (k.isWildcard()) {
113-
return UnicodeString("*");
113+
return k;
114114
}
115-
return context.normalizeNFC(k.asLiteral().unquoted());
115+
return Key(Literal(k.asLiteral().isQuoted(),
116+
context.normalizeNFC(k.asLiteral().unquoted())));
116117
}
117118

118119
static bool areDefaultKeys(const Key* keys, int32_t len) {
@@ -216,18 +217,14 @@ void Checker::checkVariants(UErrorCode& status) {
216217
}
217218
}
218219

219-
void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
220+
void Checker::requireAnnotated(const TypeEnvironment& t,
221+
const VariableName& selectorVar,
222+
UErrorCode& status) {
220223
CHECK_ERROR(status);
221224

222-
if (selectorExpr.isFunctionCall()) {
225+
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
223226
return; // No error
224227
}
225-
const Operand& rand = selectorExpr.getOperand();
226-
if (rand.isVariable()) {
227-
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
228-
return; // No error
229-
}
230-
}
231228
// If this code is reached, an error was detected
232229
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
233230
}
@@ -237,7 +234,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
237234

238235
// Check each selector; if it's not annotated, emit a
239236
// "missing selector annotation" error
240-
const Expression* selectors = dataModel.getSelectorsInternal();
237+
const VariableName* selectors = dataModel.getSelectorsInternal();
241238
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
242239
requireAnnotated(t, selectors[i], status);
243240
}

icu4c/source/i18n/messageformat2_checker.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ namespace message2 {
6969
: dataModel(d), errors(e), context(mf) {}
7070
private:
7171

72-
UnicodeString normalizeNFC(const Key&) const;
72+
Key normalizeNFC(const Key&) const;
7373

74-
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
74+
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
7575
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
7676
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
7777
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);

icu4c/source/i18n/messageformat2_data_model.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -693,9 +693,9 @@ Matcher::Matcher(const Matcher& other) {
693693
numSelectors = other.numSelectors;
694694
numVariants = other.numVariants;
695695
UErrorCode localErrorCode = U_ZERO_ERROR;
696-
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
697-
numSelectors,
698-
localErrorCode));
696+
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
697+
numSelectors,
698+
localErrorCode));
699699
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
700700
numVariants,
701701
localErrorCode));
@@ -704,7 +704,7 @@ Matcher::Matcher(const Matcher& other) {
704704
}
705705
}
706706

707-
Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
707+
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
708708
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}
709709

710710
Matcher::~Matcher() {}
@@ -726,7 +726,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
726726
return bindings.getAlias();
727727
}
728728

729-
const Expression* MFDataModel::getSelectorsInternal() const {
729+
const VariableName* MFDataModel::getSelectorsInternal() const {
730730
U_ASSERT(!bogus);
731731
U_ASSERT(!hasPattern());
732732
return std::get_if<Matcher>(&body)->selectors.getAlias();
@@ -788,15 +788,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
788788
return *this;
789789
}
790790

791-
/*
792-
selector must be non-null
793-
*/
794-
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
791+
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
792+
UErrorCode& status) {
795793
THIS_ON_ERROR(status);
796794

797795
buildSelectorsMessage(status);
798796
U_ASSERT(selectors != nullptr);
799-
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
797+
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);
800798

801799
return *this;
802800
}
@@ -832,11 +830,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
832830
if (other.hasPattern()) {
833831
body = *std::get_if<Pattern>(&other.body);
834832
} else {
835-
const Expression* otherSelectors = other.getSelectorsInternal();
833+
const VariableName* otherSelectors = other.getSelectorsInternal();
836834
const Variant* otherVariants = other.getVariantsInternal();
837835
int32_t numSelectors = other.numSelectors();
838836
int32_t numVariants = other.numVariants();
839-
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
837+
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
840838
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
841839
if (U_FAILURE(localErrorCode)) {
842840
bogus = true;
@@ -865,7 +863,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
865863
int32_t numVariants = builder.variants->size();
866864
int32_t numSelectors = builder.selectors->size();
867865
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
868-
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
866+
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
867+
errorCode),
868+
errorCode);
869869
if (U_FAILURE(errorCode)) {
870870
bogus = true;
871871
return;

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -512,21 +512,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
512512
VariableName result;
513513

514514
U_ASSERT(inBounds());
515-
// If the '$' is missing, we don't want a binding
516-
// for this variable to be created.
517-
bool valid = peek() == DOLLAR;
515+
518516
parseToken(DOLLAR, errorCode);
519517
if (!inBounds()) {
520518
ERROR(errorCode);
521519
return result;
522520
}
523-
UnicodeString varName = parseName(errorCode);
524-
// Set the name to "" if the variable wasn't
525-
// declared correctly
526-
if (!valid) {
527-
varName.remove();
528-
}
529-
return VariableName(varName);
521+
return VariableName(parseName(errorCode));
530522
}
531523

532524
/*
@@ -863,27 +855,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
863855
parseTokenWithWhitespace(EQUALS, errorCode);
864856

865857
UnicodeString rhsStr;
866-
// Parse RHS, which is either a literal or variable
867-
switch (peek()) {
868-
case DOLLAR: {
869-
rand = Operand(parseVariableName(errorCode));
870-
break;
871-
}
872-
default: {
873-
// Must be a literal
874-
rand = Operand(parseLiteral(errorCode));
875-
break;
876-
}
877-
}
878-
U_ASSERT(!rand.isNull());
858+
// Parse RHS, which must be a literal
859+
// attribute = "@" identifier [o "=" o literal]
860+
rand = Operand(parseLiteral(errorCode));
879861
} else {
880862
// attribute -> "@" identifier [[s] "=" [s]]
881863
// Use null operand, which `rand` is already set to
882864
// "Backtrack" by restoring the whitespace (if there was any)
883865
index = savedIndex;
884866
}
885867

886-
attrAdder.addAttribute(lhs, std::move(rand), errorCode);
868+
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
887869
}
888870

889871
/*
@@ -1722,6 +1704,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
17221704
return result.build(status);
17231705
}
17241706

1707+
void Parser::parseVariant(UErrorCode& status) {
1708+
CHECK_ERROR(status);
1709+
1710+
// At least one key is required
1711+
SelectorKeys keyList(parseNonEmptyKeys(status));
1712+
1713+
// parseNonEmptyKeys() consumes any trailing whitespace,
1714+
// so the pattern can be consumed next.
1715+
1716+
// Restore precondition before calling parsePattern()
1717+
// (which must return a non-null value)
1718+
CHECK_BOUNDS(status);
1719+
Pattern rhs = parseQuotedPattern(status);
1720+
1721+
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1722+
}
17251723

17261724
/*
17271725
Consume a `selectors` (matching the nonterminal in the grammar),
@@ -1741,22 +1739,25 @@ void Parser::parseSelectors(UErrorCode& status) {
17411739
// Parse selectors
17421740
// "Backtracking" is required here. It's not clear if whitespace is
17431741
// (`[s]` selector) or (`[s]` variant)
1744-
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
1745-
parseOptionalWhitespace(status);
1742+
while (isWhitespace(peek()) || peek() == DOLLAR) {
1743+
int32_t whitespaceStart = index;
1744+
parseRequiredWhitespace(status);
17461745
// Restore precondition
17471746
CHECK_BOUNDS(status);
1748-
if (peek() != LEFT_CURLY_BRACE) {
1747+
if (peek() != DOLLAR) {
17491748
// This is not necessarily an error, but rather,
17501749
// means the whitespace we parsed was the optional
17511750
// whitespace preceding the first variant, not the
1752-
// optional whitespace preceding a subsequent expression.
1751+
// required whitespace preceding a subsequent variable.
1752+
// In that case, "push back" the whitespace.
1753+
normalizedInput.truncate(normalizedInput.length() - 1);
1754+
index = whitespaceStart;
17531755
break;
17541756
}
1755-
Expression expression;
1756-
expression = parseExpression(status);
1757+
VariableName var = parseVariableName(status);
17571758
empty = false;
17581759

1759-
dataModel.addSelector(std::move(expression), status);
1760+
dataModel.addSelector(std::move(var), status);
17601761
CHECK_ERROR(status);
17611762
}
17621763

@@ -1772,27 +1773,29 @@ void Parser::parseSelectors(UErrorCode& status) {
17721773
} \
17731774

17741775
// Parse variants
1776+
// matcher = match-statement s variant *(o variant)
1777+
1778+
// Parse first variant
1779+
parseRequiredWhitespace(status);
1780+
if (!inBounds()) {
1781+
ERROR(status);
1782+
return;
1783+
}
1784+
parseVariant(status);
1785+
if (!inBounds()) {
1786+
// Not an error; there might be only one variant
1787+
return;
1788+
}
1789+
17751790
while (isWhitespace(peek()) || isKeyStart(peek())) {
1776-
// Trailing whitespace is allowed
17771791
parseOptionalWhitespace(status);
1792+
// Restore the precondition.
1793+
// Trailing whitespace is allowed.
17781794
if (!inBounds()) {
17791795
return;
17801796
}
17811797

1782-
// At least one key is required
1783-
SelectorKeys keyList(parseNonEmptyKeys(status));
1784-
1785-
CHECK_ERROR(status);
1786-
1787-
// parseNonEmptyKeys() consumes any trailing whitespace,
1788-
// so the pattern can be consumed next.
1789-
1790-
// Restore precondition before calling parsePattern()
1791-
// (which must return a non-null value)
1792-
CHECK_BOUNDS(status);
1793-
Pattern rhs = parseQuotedPattern(status);
1794-
1795-
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1798+
parseVariant(status);
17961799

17971800
// Restore the precondition, *without* erroring out if we've
17981801
// reached the end of input. That's because it's valid for the

icu4c/source/i18n/messageformat2_parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ namespace message2 {
102102
void parseUnsupportedStatement(UErrorCode&);
103103
void parseLocalDeclaration(UErrorCode&);
104104
void parseInputDeclaration(UErrorCode&);
105-
void parseSelectors(UErrorCode&);
105+
void parseSelectors(UErrorCode&);
106+
void parseVariant(UErrorCode&);
106107

107108
void parseWhitespaceMaybeRequired(bool, UErrorCode&);
108109
void parseRequiredWhitespace(UErrorCode&);

icu4c/source/i18n/messageformat2_serializer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,18 +246,20 @@ void Serializer::serializeDeclarations() {
246246

247247
void Serializer::serializeSelectors() {
248248
U_ASSERT(!dataModel.hasPattern());
249-
const Expression* selectors = dataModel.getSelectorsInternal();
249+
const VariableName* selectors = dataModel.getSelectorsInternal();
250250

251251
emit(ID_MATCH);
252252
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
253-
// No whitespace needed here -- see `selectors` in the grammar
253+
whitespace();
254+
emit(DOLLAR);
254255
emit(selectors[i]);
255256
}
256257
}
257258

258259
void Serializer::serializeVariants() {
259260
U_ASSERT(!dataModel.hasPattern());
260261
const Variant* variants = dataModel.getVariantsInternal();
262+
whitespace();
261263
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
262264
const Variant& v = variants[i];
263265
emit(v.getKeys());

0 commit comments

Comments
 (0)