Skip to content

Commit 1b33f5e

Browse files
committed
ICU-22889 Implemented a recursion limit in the RBNF parsing code to match the one already present in the RBNF
formatting code.
1 parent 17608b6 commit 1b33f5e

File tree

14 files changed

+138
-33
lines changed

14 files changed

+138
-33
lines changed

icu4c/source/i18n/nfrs.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -689,14 +689,20 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
689689
#endif
690690

691691
UBool
692-
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
692+
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
693693
{
694694
// try matching each rule in the rule set against the text being
695695
// parsed. Whichever one matches the most characters is the one
696696
// that determines the value we return.
697697

698698
result.setLong(0);
699699

700+
// dump out if we've reached the recursion limit
701+
if (recursionCount >= RECURSION_LIMIT) {
702+
// stop recursion
703+
return false;
704+
}
705+
700706
// dump out if there's no text to parse
701707
if (text.length() == 0) {
702708
return 0;
@@ -720,7 +726,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
720726
nonNumericalExecutedRuleMask |= 1 << i;
721727

722728
Formattable tempResult;
723-
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
729+
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
724730
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
725731
result = tempResult;
726732
highWaterMark = workingPos;
@@ -759,7 +765,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
759765
continue;
760766
}
761767
Formattable tempResult;
762-
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
768+
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
763769
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
764770
result = tempResult;
765771
highWaterMark = workingPos;

icu4c/source/i18n/nfrs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class NFRuleSet : public UMemory {
5555
void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
5656
void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
5757

58-
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const;
58+
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const;
5959

6060
void appendRules(UnicodeString& result) const; // toString
6161

icu4c/source/i18n/nfrule.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,7 @@ NFRule::doParse(const UnicodeString& text,
918918
UBool isFractionRule,
919919
double upperBound,
920920
uint32_t nonNumericalExecutedRuleMask,
921+
int32_t recursionCount,
921922
Formattable& resVal) const
922923
{
923924
// internally we operate on a copy of the string being parsed
@@ -1021,6 +1022,7 @@ NFRule::doParse(const UnicodeString& text,
10211022
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
10221023
temp, pp, sub1,
10231024
nonNumericalExecutedRuleMask,
1025+
recursionCount,
10241026
upperBound);
10251027

10261028
// if we got a successful match (or were trying to match a
@@ -1042,6 +1044,7 @@ NFRule::doParse(const UnicodeString& text,
10421044
partialResult = matchToDelimiter(workText2, 0, partialResult,
10431045
temp, pp2, sub2,
10441046
nonNumericalExecutedRuleMask,
1047+
recursionCount,
10451048
upperBound);
10461049

10471050
// if we got a successful match on this second
@@ -1179,6 +1182,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
11791182
ParsePosition& pp,
11801183
const NFSubstitution* sub,
11811184
uint32_t nonNumericalExecutedRuleMask,
1185+
int32_t recursionCount,
11821186
double upperBound) const
11831187
{
11841188
UErrorCode status = U_ZERO_ERROR;
@@ -1213,6 +1217,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
12131217
formatter->isLenient(),
12141218
#endif
12151219
nonNumericalExecutedRuleMask,
1220+
recursionCount,
12161221
result);
12171222

12181223
// if the substitution could match all the text up to
@@ -1267,6 +1272,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
12671272
formatter->isLenient(),
12681273
#endif
12691274
nonNumericalExecutedRuleMask,
1275+
recursionCount,
12701276
result);
12711277
if (success && (tempPP.getIndex() != 0)) {
12721278
// if there's a successful match (or it's a null

icu4c/source/i18n/nfrule.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class NFRule : public UMemory {
7777
UBool isFractional,
7878
double upperBound,
7979
uint32_t nonNumericalExecutedRuleMask,
80+
int32_t recursionCount,
8081
Formattable& result) const;
8182

8283
UBool shouldRollBack(int64_t number) const;
@@ -98,6 +99,7 @@ class NFRule : public UMemory {
9899
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
99100
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
100101
uint32_t nonNumericalExecutedRuleMask,
102+
int32_t recursionCount,
101103
double upperBound) const;
102104
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
103105

icu4c/source/i18n/nfsubs.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ class ModulusSubstitution : public NFSubstitution {
175175
double upperBound,
176176
UBool lenientParse,
177177
uint32_t nonNumericalExecutedRuleMask,
178+
int32_t recursionCount,
178179
Formattable& result) const override;
179180

180181
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override {
@@ -242,6 +243,7 @@ class FractionalPartSubstitution : public NFSubstitution {
242243
double upperBound,
243244
UBool lenientParse,
244245
uint32_t nonNumericalExecutedRuleMask,
246+
int32_t recursionCount,
245247
Formattable& result) const override;
246248

247249
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue + oldRuleValue; }
@@ -314,6 +316,7 @@ class NumeratorSubstitution : public NFSubstitution {
314316
double upperBound,
315317
UBool /*lenientParse*/,
316318
uint32_t nonNumericalExecutedRuleMask,
319+
int32_t recursionCount,
317320
Formattable& result) const override;
318321

319322
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue / oldRuleValue; }
@@ -706,6 +709,7 @@ NFSubstitution::doParse(const UnicodeString& text,
706709
double upperBound,
707710
UBool lenientParse,
708711
uint32_t nonNumericalExecutedRuleMask,
712+
int32_t recursionCount,
709713
Formattable& result) const
710714
{
711715
#ifdef RBNF_DEBUG
@@ -726,7 +730,7 @@ NFSubstitution::doParse(const UnicodeString& text,
726730
// on), then also try parsing the text using a default-
727731
// constructed NumberFormat
728732
if (ruleSet != nullptr) {
729-
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
733+
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, recursionCount, result);
730734
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
731735
UErrorCode status = U_ZERO_ERROR;
732736
NumberFormat* fmt = NumberFormat::createInstance(status);
@@ -949,18 +953,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
949953
double upperBound,
950954
UBool lenientParse,
951955
uint32_t nonNumericalExecutedRuleMask,
956+
int32_t recursionCount,
952957
Formattable& result) const
953958
{
954959
// if this isn't a >>> substitution, we can just use the
955960
// inherited parse() routine to do the parsing
956961
if (ruleToUse == nullptr) {
957-
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
962+
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, recursionCount, result);
958963

959964
// but if it IS a >>> substitution, we have to do it here: we
960965
// use the specific rule's doParse() method, and then we have to
961966
// do some of the other work of NFRuleSet.parse()
962967
} else {
963-
ruleToUse->doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask, result);
968+
ruleToUse->doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask, recursionCount, result);
964969

965970
if (parsePosition.getIndex() != 0) {
966971
UErrorCode status = U_ZERO_ERROR;
@@ -1136,12 +1141,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
11361141
double /*upperBound*/,
11371142
UBool lenientParse,
11381143
uint32_t nonNumericalExecutedRuleMask,
1144+
int32_t recursionCount,
11391145
Formattable& resVal) const
11401146
{
11411147
// if we're not in byDigits mode, we can just use the inherited
11421148
// doParse()
11431149
if (!byDigits) {
1144-
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
1150+
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, recursionCount, resVal);
11451151

11461152
// if we ARE in byDigits mode, parse the text one digit at a time
11471153
// using this substitution's owning rule set (we do this by setting
@@ -1160,7 +1166,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
11601166
while (workText.length() > 0 && workPos.getIndex() != 0) {
11611167
workPos.setIndex(0);
11621168
Formattable temp;
1163-
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
1169+
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, recursionCount, temp);
11641170
UErrorCode status = U_ZERO_ERROR;
11651171
digit = temp.getLong(status);
11661172
// digit = temp.getType() == Formattable::kLong ?
@@ -1271,6 +1277,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
12711277
double upperBound,
12721278
UBool /*lenientParse*/,
12731279
uint32_t nonNumericalExecutedRuleMask,
1280+
int32_t recursionCount,
12741281
Formattable& result) const
12751282
{
12761283
// we don't have to do anything special to do the parsing here,
@@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
12891296

12901297
while (workText.length() > 0 && workPos.getIndex() != 0) {
12911298
workPos.setIndex(0);
1292-
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
1299+
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, recursionCount, temp); // parse zero or nothing at all
12931300
if (workPos.getIndex() == 0) {
12941301
// we failed, either there were no more zeros, or the number was formatted with digits
12951302
// either way, we're done
@@ -1311,7 +1318,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
13111318
}
13121319

13131320
// we've parsed off the zeros, now let's parse the rest from our current position
1314-
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask, result);
1321+
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask, recursionCount, result);
13151322

13161323
if (withZeros) {
13171324
// any base value will do in this case. is there a way to

icu4c/source/i18n/nfsubs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class NFSubstitution : public UObject {
192192
double upperBound,
193193
UBool lenientParse,
194194
uint32_t nonNumericalExecutedRuleMask,
195+
int32_t recursionCount,
195196
Formattable& result) const;
196197

197198
/**

icu4c/source/i18n/rbnf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
13621362
ParsePosition working_pp(0);
13631363
Formattable working_result;
13641364

1365-
rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
1365+
rp->parse(workingText, working_pp, kMaxDouble, 0, 0, working_result);
13661366
if (working_pp.getIndex() > high_pp.getIndex()) {
13671367
high_pp = working_pp;
13681368
high_result = working_result;

icu4c/source/test/intltest/itrbnf.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
8181
TESTCASE(29, TestNumberingSystem);
8282
TESTCASE(30, TestDFRounding);
8383
TESTCASE(31, TestMemoryLeak22899);
84+
TESTCASE(32, TestInfiniteRecursion);
8485
#else
8586
TESTCASE(0, TestRBNFDisabled);
8687
#endif
@@ -2614,6 +2615,39 @@ IntlTestRBNF::TestNumberingSystem() {
26142615
}
26152616
}
26162617

2618+
void
2619+
IntlTestRBNF::TestInfiniteRecursion() {
2620+
UnicodeString badRules[] = {
2621+
">>",
2622+
"<<",
2623+
"<<<",
2624+
">>>",
2625+
"%foo: x=%foo=",
2626+
"%one: x>%two>; %two: y>%one>;"
2627+
};
2628+
2629+
for (int32_t i = 0; i < UPRV_LENGTHOF(badRules); i++) {
2630+
UErrorCode err = U_ZERO_ERROR;
2631+
UParseError parseErr;
2632+
RuleBasedNumberFormat rbnf(badRules[i], parseErr, err);
2633+
2634+
if (U_SUCCESS(err)) {
2635+
UnicodeString result;
2636+
rbnf.format(5, result);
2637+
// we don't actually care about the result and the function doesn't return an error code;
2638+
// we just want to make sure the function returns
2639+
2640+
Formattable pResult;
2641+
rbnf.parse("foo", pResult, err);
2642+
assertTrue("rbnf.parse() didn't return U_INVALID_FORMAT_ERROR!", err == U_INVALID_FORMAT_ERROR);
2643+
} else {
2644+
// eventually it'd be nice to statically analyze the rules for (at least) the most common
2645+
// causes of infinite recursion, in which case we'd end up down here and need to check
2646+
// the error code. But for now, we probably won't end up here and don't care if we do
2647+
}
2648+
}
2649+
}
2650+
26172651
/* U_HAVE_RBNF */
26182652
#else
26192653

icu4c/source/test/intltest/itrbnf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ class IntlTestRBNF : public IntlTest {
162162
void TestMinMaxIntegerDigitsIgnored();
163163
void TestNumberingSystem();
164164
void TestMemoryLeak22899();
165+
void TestInfiniteRecursion();
165166

166167
protected:
167168
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);

icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,4 +1908,47 @@ public void TestNumberingSystem() {
19081908
rbnf.setDefaultRuleSet("%ethiopic");
19091909
assertEquals("Wrong result with Ethiopic rule set", "፻፳፫", rbnf.format(123));
19101910
}
1911+
1912+
@Test
1913+
public void TestInfiniteRecursion() {
1914+
final String[] badRules = {
1915+
">>",
1916+
"<<",
1917+
"<<<",
1918+
">>>",
1919+
"%foo: x=%foo=",
1920+
"%one: x>%two>; %two: y>%one>;"
1921+
};
1922+
1923+
for (String badRule : badRules) {
1924+
try {
1925+
RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(badRule);
1926+
try {
1927+
rbnf.format(5);
1928+
// we don't actually care about the result; we just want to make sure the function returns
1929+
} catch (IllegalStateException e) {
1930+
// we're supposed to get an IllegalStateException; swallow it and continue
1931+
}
1932+
1933+
try {
1934+
rbnf.parse("foo");
1935+
errln("Parse test didn't throw an exception!");
1936+
} catch (IllegalStateException e) {
1937+
// we're supposed to get an IllegalStateException; swallow it and continue
1938+
} catch (ParseException e) {
1939+
// if we don't hit the recursion limit, we'll still fail to parse the number,
1940+
// so also swallow this exception and continue
1941+
}
1942+
} catch (IllegalArgumentException e) {
1943+
// ">>>" generates a parse exception when you try to create the formatter (so we expect that rather
1944+
// than re-throwing and triggering a test failure)
1945+
// (eventually it'd be nice to statically analyze the rules for (at least) the most common
1946+
// causes of infinite recursion, in which case we'd end up down here and need to check
1947+
// the error code. But for now, we probably won't end up here and don't care if we do)
1948+
if (!badRule.equals("<<<")) {
1949+
throw e;
1950+
}
1951+
}
1952+
}
1953+
}
19111954
}

0 commit comments

Comments
 (0)