Skip to content

Commit c394686

Browse files
milesziemermtdowling
authored andcommitted
Assert trees are valid, fill in missing tests (#1808)
All TreeTypeTests now expect the entire tree to be valid. This helps us find ambiguity and inconsistencies in the parser for cases when parsing a child tree is invalid only due to the context of the parent. Fixes minor bugs that caused trees to be invalid: - enumMember test didn't have a BR after VALUE_ASSIGNMENT Fills in missing tests for: - Operations - Block apply statements - Entity type name - Aggregate type name Other minor changes and improvements: - Adds util methods for parsing optional for-resource, mixins, and value assignment productions. - Adds util method for parsing any shape type and name. - Updates peekPastSpaces to not skip the current token, in case it is not SP. This simplifies parsing value assignments, and has more intuitive behavior. - Removes peekPastWs, which was only used for TRAIT_BODY_VALUE, inlining the implementation. peekPastWs skipped the current token like peekPastSpaces, which is necessary for TRAIT_BODY_VALUE. - Add ENTITY_TYPE_NAME production. - Updates name of some productions to be consistent with grammar. - Fixed a few places where TOKEN was parsed as IDENTIFIER.
1 parent d60237d commit c394686

File tree

3 files changed

+439
-102
lines changed

3 files changed

+439
-102
lines changed

smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturingTokenizer.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,15 @@ public IdlToken next() {
137137
}
138138

139139
CapturedToken peekPastSpaces() {
140-
return peekWhile(1, token -> token == IdlToken.SPACE);
141-
}
142-
143-
CapturedToken peekPastWs() {
144-
return peekWhile(1, token -> token.isWhitespace() || token == IdlToken.DOC_COMMENT);
140+
return peekWhile(0, token -> token == IdlToken.SPACE);
145141
}
146142

147143
CapturedToken peekWhile(int offsetFromPosition, Predicate<IdlToken> predicate) {
148144
int position = cursor + offsetFromPosition;
145+
// If the start position is out of bounds, return the EOF token.
146+
if (position >= tokens.size()) {
147+
return tokens.get(tokens.size() - 1);
148+
}
149149
CapturedToken token = tokens.get(position);
150150
while (token.getIdlToken() != IdlToken.EOF && predicate.test(token.getIdlToken())) {
151151
token = tokens.get(++position);

smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java

+62-56
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ void parse(CapturingTokenizer tokenizer) {
152152
@Override
153153
void parse(CapturingTokenizer tokenizer) {
154154
tokenizer.withState(this, () -> {
155-
tokenizer.next();
155+
tokenizer.next(); // Skip "use"
156156
SP.parse(tokenizer);
157157
ABSOLUTE_ROOT_SHAPE_ID.parse(tokenizer);
158158
BR.parse(tokenizer);
@@ -236,22 +236,16 @@ void parse(CapturingTokenizer tokenizer) {
236236
@Override
237237
void parse(CapturingTokenizer tokenizer) {
238238
tokenizer.withState(this, () -> {
239-
SIMPLE_TYPE_NAME.parse(tokenizer);
240-
optionalSpaces(tokenizer);
241-
IDENTIFIER.parse(tokenizer);
242-
optionalSpaces(tokenizer);
243-
244-
if (tokenizer.isCurrentLexeme("with")) {
245-
SHAPE_MIXINS.parse(tokenizer);
246-
}
239+
parseShapeTypeAndName(tokenizer, SIMPLE_TYPE_NAME);
240+
parseOptionalMixins(tokenizer);
247241
});
248242
}
249243
},
250244

251245
SIMPLE_TYPE_NAME {
252246
@Override
253247
void parse(CapturingTokenizer tokenizer) {
254-
// Assumes that the current token is a valid simple type name validated by SHAPE_BODY.
248+
// Assumes that the current token is a valid simple type name validated by SHAPE.
255249
tokenizer.withState(this, tokenizer::next);
256250
}
257251
},
@@ -260,14 +254,8 @@ void parse(CapturingTokenizer tokenizer) {
260254
@Override
261255
void parse(CapturingTokenizer tokenizer) {
262256
tokenizer.withState(this, () -> {
263-
ENUM_TYPE_NAME.parse(tokenizer);
264-
optionalSpaces(tokenizer);
265-
IDENTIFIER.parse(tokenizer);
266-
optionalSpaces(tokenizer);
267-
268-
if (tokenizer.isCurrentLexeme("with")) {
269-
SHAPE_MIXINS.parse(tokenizer);
270-
}
257+
parseShapeTypeAndName(tokenizer, ENUM_TYPE_NAME);
258+
parseOptionalMixins(tokenizer);
271259

272260
optionalWs(tokenizer);
273261
ENUM_SHAPE_MEMBERS.parse(tokenizer);
@@ -278,7 +266,7 @@ void parse(CapturingTokenizer tokenizer) {
278266
ENUM_TYPE_NAME {
279267
@Override
280268
void parse(CapturingTokenizer tokenizer) {
281-
// Assumes that the current token is a valid enum type name validated by SHAPE_BODY.
269+
// Assumes that the current token is a valid enum type name validated by SHAPE.
282270
tokenizer.withState(this, tokenizer::next);
283271
}
284272
},
@@ -308,10 +296,7 @@ void parse(CapturingTokenizer tokenizer) {
308296
tokenizer.withState(this, () -> {
309297
TRAIT_STATEMENTS.parse(tokenizer);
310298
IDENTIFIER.parse(tokenizer);
311-
if (tokenizer.hasNext() && (tokenizer.getCurrentToken() == IdlToken.EQUAL
312-
|| tokenizer.peekPastSpaces().getIdlToken() == IdlToken.EQUAL)) {
313-
VALUE_ASSIGNMENT.parse(tokenizer);
314-
}
299+
parseOptionalValueAssignment(tokenizer);
315300
});
316301
}
317302
},
@@ -320,10 +305,7 @@ void parse(CapturingTokenizer tokenizer) {
320305
@Override
321306
void parse(CapturingTokenizer tokenizer) {
322307
tokenizer.withState(this, () -> {
323-
AGGREGATE_TYPE_NAME.parse(tokenizer);
324-
optionalSpaces(tokenizer);
325-
IDENTIFIER.parse(tokenizer);
326-
optionalSpaces(tokenizer);
308+
parseShapeTypeAndName(tokenizer, AGGREGATE_TYPE_NAME);
327309
parseSharedStructureBodyWithinInline(tokenizer);
328310
});
329311
}
@@ -332,16 +314,17 @@ void parse(CapturingTokenizer tokenizer) {
332314
AGGREGATE_TYPE_NAME {
333315
@Override
334316
void parse(CapturingTokenizer tokenizer) {
335-
// Assumes that the current token is a valid simple type name validated by SHAPE_BODY.
317+
// Assumes that the current token is a valid simple type name validated by SHAPE.
336318
tokenizer.withState(this, tokenizer::next);
337319
}
338320
},
339321

340-
AGGREGATE_SHAPE_RESOURCE {
322+
// Don't use this directly. Instead, use parseOptionalForResource
323+
FOR_RESOURCE {
341324
@Override
342325
void parse(CapturingTokenizer tokenizer) {
343326
tokenizer.withState(this, () -> {
344-
IDENTIFIER.parse(tokenizer);
327+
tokenizer.next(); // Skip "for"
345328
SP.parse(tokenizer);
346329
SHAPE_ID.parse(tokenizer);
347330
});
@@ -377,11 +360,7 @@ void parse(CapturingTokenizer tokenizer) {
377360
} else {
378361
EXPLICIT_SHAPE_MEMBER.parse(tokenizer);
379362
}
380-
381-
optionalSpaces(tokenizer);
382-
if (tokenizer.getCurrentToken() == IdlToken.EQUAL) {
383-
VALUE_ASSIGNMENT.parse(tokenizer);
384-
}
363+
parseOptionalValueAssignment(tokenizer);
385364
});
386365
}
387366
},
@@ -416,11 +395,9 @@ void parse(CapturingTokenizer tokenizer) {
416395
void parse(CapturingTokenizer tokenizer) {
417396
// Assumes that the shape type is a valid "service" or "resource".
418397
tokenizer.withState(this, () -> {
419-
parseShapeTypeAndName(tokenizer);
398+
parseShapeTypeAndName(tokenizer, ENTITY_TYPE_NAME);
420399

421-
if (tokenizer.isCurrentLexeme("with")) {
422-
SHAPE_MIXINS.parse(tokenizer);
423-
}
400+
parseOptionalMixins(tokenizer);
424401

425402
optionalWs(tokenizer);
426403
tokenizer.expect(IdlToken.LBRACE);
@@ -429,15 +406,21 @@ void parse(CapturingTokenizer tokenizer) {
429406
}
430407
},
431408

409+
ENTITY_TYPE_NAME {
410+
@Override
411+
void parse(CapturingTokenizer tokenizer) {
412+
// Assumes that the current token is a valid entity type name validated by SHAPE.
413+
tokenizer.withState(this, tokenizer::next);
414+
}
415+
},
416+
432417
OPERATION_SHAPE {
433418
@Override
434419
void parse(CapturingTokenizer tokenizer) {
435420
tokenizer.withState(this, () -> {
436421
parseShapeTypeAndName(tokenizer);
437422

438-
if (tokenizer.isCurrentLexeme("with")) {
439-
SHAPE_MIXINS.parse(tokenizer);
440-
}
423+
parseOptionalMixins(tokenizer);
441424

442425
optionalWs(tokenizer);
443426
OPERATION_BODY.parse(tokenizer);
@@ -544,11 +527,12 @@ void parse(CapturingTokenizer tokenizer) {
544527

545528
// Mixins =
546529
// [SP] %s"with" [WS] "[" [WS] 1*(ShapeId [WS]) "]"
547-
SHAPE_MIXINS {
530+
// Don't use this directly. Instead, use parseOptionalMixins
531+
MIXINS {
548532
@Override
549533
void parse(CapturingTokenizer tokenizer) {
550534
tokenizer.withState(this, () -> {
551-
IDENTIFIER.parse(tokenizer); // 'with'
535+
tokenizer.next(); // Skip "with"
552536
optionalWs(tokenizer);
553537

554538
tokenizer.expect(IdlToken.LBRACKET);
@@ -567,6 +551,7 @@ void parse(CapturingTokenizer tokenizer) {
567551
}
568552
},
569553

554+
// Don't use this directly. Instead, use parseOptionalValueAssignment
570555
VALUE_ASSIGNMENT {
571556
@Override
572557
void parse(CapturingTokenizer tokenizer) {
@@ -634,7 +619,9 @@ void parse(CapturingTokenizer tokenizer) {
634619
case STRING:
635620
case IDENTIFIER:
636621
default:
637-
if (tokenizer.peekPastWs().getIdlToken() == IdlToken.COLON) {
622+
CapturedToken nextPastWs = tokenizer.peekWhile(1, token ->
623+
token.isWhitespace() || token == IdlToken.DOC_COMMENT);
624+
if (nextPastWs.getIdlToken() == IdlToken.COLON) {
638625
TRAIT_STRUCTURE.parse(tokenizer);
639626
} else {
640627
TRAIT_NODE.parse(tokenizer);
@@ -677,7 +664,7 @@ void parse(CapturingTokenizer tokenizer) {
677664
tokenizer.withState(this, () -> {
678665
// Try to see if this is a singular or block apply statement.
679666
IdlToken peek = tokenizer
680-
.peekWhile(1, t -> t != IdlToken.EOF && t != IdlToken.AT && t != IdlToken.LBRACE)
667+
.peekWhile(1, t -> t != IdlToken.AT && t != IdlToken.LBRACE)
681668
.getIdlToken();
682669
if (peek == IdlToken.LBRACE) {
683670
APPLY_STATEMENT_BLOCK.parse(tokenizer);
@@ -692,7 +679,7 @@ void parse(CapturingTokenizer tokenizer) {
692679
@Override
693680
void parse(CapturingTokenizer tokenizer) {
694681
tokenizer.withState(this, () -> {
695-
tokenizer.next();
682+
tokenizer.next(); // Skip "apply"
696683
SP.parse(tokenizer);
697684
SHAPE_ID.parse(tokenizer);
698685
WS.parse(tokenizer);
@@ -705,7 +692,7 @@ void parse(CapturingTokenizer tokenizer) {
705692
@Override
706693
void parse(CapturingTokenizer tokenizer) {
707694
tokenizer.withState(this, () -> {
708-
tokenizer.next();
695+
tokenizer.next(); // Skip "apply"
709696
SP.parse(tokenizer);
710697
SHAPE_ID.parse(tokenizer);
711698
WS.parse(tokenizer);
@@ -1089,27 +1076,46 @@ protected static void optionalSpaces(CapturingTokenizer tokenizer) {
10891076
}
10901077

10911078
protected static void parseShapeTypeAndName(CapturingTokenizer tokenizer) {
1092-
tokenizer.expect(IdlToken.IDENTIFIER);
1093-
tokenizer.next(); // skip the shape type
1079+
parseShapeTypeAndName(tokenizer, null);
1080+
}
1081+
1082+
protected static void parseShapeTypeAndName(CapturingTokenizer tokenizer, TreeType typeName) {
1083+
if (typeName == null) {
1084+
tokenizer.next();
1085+
} else {
1086+
typeName.parse(tokenizer); // Skip the shape type
1087+
}
10941088
optionalSpaces(tokenizer);
10951089
IDENTIFIER.parse(tokenizer); // shape name
10961090
optionalSpaces(tokenizer);
10971091
}
10981092

10991093
protected static void parseSharedStructureBodyWithinInline(CapturingTokenizer tokenizer) {
1100-
optionalSpaces(tokenizer);
1094+
parseOptionalForResource(tokenizer);
1095+
parseOptionalMixins(tokenizer);
11011096

1097+
optionalWs(tokenizer);
1098+
SHAPE_MEMBERS.parse(tokenizer);
1099+
}
1100+
1101+
protected static void parseOptionalForResource(CapturingTokenizer tokenizer) {
1102+
optionalSpaces(tokenizer);
11021103
if (tokenizer.isCurrentLexeme("for")) {
1103-
AGGREGATE_SHAPE_RESOURCE.parse(tokenizer);
1104-
optionalSpaces(tokenizer);
1104+
FOR_RESOURCE.parse(tokenizer);
11051105
}
1106+
}
11061107

1108+
protected static void parseOptionalMixins(CapturingTokenizer tokenizer) {
1109+
optionalSpaces(tokenizer);
11071110
if (tokenizer.isCurrentLexeme("with")) {
1108-
SHAPE_MIXINS.parse(tokenizer);
1111+
MIXINS.parse(tokenizer);
11091112
}
1113+
}
11101114

1111-
optionalWs(tokenizer);
1112-
SHAPE_MEMBERS.parse(tokenizer);
1115+
protected static void parseOptionalValueAssignment(CapturingTokenizer tokenizer) {
1116+
if (tokenizer.peekPastSpaces().getIdlToken() == IdlToken.EQUAL) {
1117+
VALUE_ASSIGNMENT.parse(tokenizer);
1118+
}
11131119
}
11141120

11151121
protected static void operationInputOutputDefinition(CapturingTokenizer tokenizer) {

0 commit comments

Comments
 (0)