Skip to content

Commit f1144bc

Browse files
committed
Assert trees are valid, fill in missing tests
All TreeTypeTests now expect the entire tree to be valid. This helps us find ambiguity and inconsistencies in the parser for cases when parsing a child tree is invalid only due to the context of the parent. Fixes minor bugs that caused trees to be invalid: - enumMember test didn't have a BR after VALUE_ASSIGNMENT Fills in missing tests for: - Operations - Block apply statements - Entity type name - Aggregate type name Other minor changes and improvements: - Adds util methods for parsing optional for-resource, mixins, and value assignment productions. - Adds util method for parsing any shape type and name. - Updates peekPastSpaces to not skip the current token, in case it is not SP. This simplifies parsing value assignments, and has more intuitive behavior. - Removes peekPastWs, which was only used for TRAIT_BODY_VALUE, inlining the implementation. peekPastWs skipped the current token like peekPastSpaces, which is necessary for TRAIT_BODY_VALUE. - Add ENTITY_TYPE_NAME production. - Updates name of some productions to be consistent with grammar. - Fixed a few places where TOKEN was parsed as IDENTIFIER.
1 parent 2fbdeb8 commit f1144bc

File tree

3 files changed

+442
-108
lines changed

3 files changed

+442
-108
lines changed

smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturingTokenizer.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,15 @@ public IdlToken next() {
137137
}
138138

139139
CapturedToken peekPastSpaces() {
140-
return peekWhile(1, token -> token == IdlToken.SPACE);
141-
}
142-
143-
CapturedToken peekPastWs() {
144-
return peekWhile(1, token -> token.isWhitespace() || token == IdlToken.DOC_COMMENT);
140+
return peekWhile(0, token -> token == IdlToken.SPACE);
145141
}
146142

147143
CapturedToken peekWhile(int offsetFromPosition, Predicate<IdlToken> predicate) {
148144
int position = cursor + offsetFromPosition;
145+
// If the start position is out of bounds, return the EOF token.
146+
if (position >= tokens.size()) {
147+
return tokens.get(tokens.size() - 1);
148+
}
149149
CapturedToken token = tokens.get(position);
150150
while (token.getIdlToken() != IdlToken.EOF && predicate.test(token.getIdlToken())) {
151151
token = tokens.get(++position);

smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java

+65-62
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ void parse(CapturingTokenizer tokenizer) {
152152
@Override
153153
void parse(CapturingTokenizer tokenizer) {
154154
tokenizer.withState(this, () -> {
155-
tokenizer.next();
155+
tokenizer.next(); // Skip "use"
156156
SP.parse(tokenizer);
157157
ABSOLUTE_ROOT_SHAPE_ID.parse(tokenizer);
158158
BR.parse(tokenizer);
@@ -236,22 +236,16 @@ void parse(CapturingTokenizer tokenizer) {
236236
@Override
237237
void parse(CapturingTokenizer tokenizer) {
238238
tokenizer.withState(this, () -> {
239-
SIMPLE_TYPE_NAME.parse(tokenizer);
240-
optionalSpaces(tokenizer);
241-
IDENTIFIER.parse(tokenizer);
242-
optionalSpaces(tokenizer);
243-
244-
if (tokenizer.isCurrentLexeme("with")) {
245-
SHAPE_MIXINS.parse(tokenizer);
246-
}
239+
parseShapeTypeAndName(tokenizer, SIMPLE_TYPE_NAME);
240+
parseOptionalMixins(tokenizer);
247241
});
248242
}
249243
},
250244

251245
SIMPLE_TYPE_NAME {
252246
@Override
253247
void parse(CapturingTokenizer tokenizer) {
254-
// Assumes that the current token is a valid simple type name validated by SHAPE_BODY.
248+
// Assumes that the current token is a valid simple type name validated by SHAPE.
255249
tokenizer.withState(this, tokenizer::next);
256250
}
257251
},
@@ -260,14 +254,8 @@ void parse(CapturingTokenizer tokenizer) {
260254
@Override
261255
void parse(CapturingTokenizer tokenizer) {
262256
tokenizer.withState(this, () -> {
263-
ENUM_TYPE_NAME.parse(tokenizer);
264-
optionalSpaces(tokenizer);
265-
IDENTIFIER.parse(tokenizer);
266-
optionalSpaces(tokenizer);
267-
268-
if (tokenizer.isCurrentLexeme("with")) {
269-
SHAPE_MIXINS.parse(tokenizer);
270-
}
257+
parseShapeTypeAndName(tokenizer, ENUM_TYPE_NAME);
258+
parseOptionalMixins(tokenizer);
271259

272260
optionalWs(tokenizer);
273261
ENUM_SHAPE_MEMBERS.parse(tokenizer);
@@ -278,7 +266,7 @@ void parse(CapturingTokenizer tokenizer) {
278266
ENUM_TYPE_NAME {
279267
@Override
280268
void parse(CapturingTokenizer tokenizer) {
281-
// Assumes that the current token is a valid enum type name validated by SHAPE_BODY.
269+
// Assumes that the current token is a valid enum type name validated by SHAPE.
282270
tokenizer.withState(this, tokenizer::next);
283271
}
284272
},
@@ -308,10 +296,7 @@ void parse(CapturingTokenizer tokenizer) {
308296
tokenizer.withState(this, () -> {
309297
TRAIT_STATEMENTS.parse(tokenizer);
310298
IDENTIFIER.parse(tokenizer);
311-
if (tokenizer.hasNext() && (tokenizer.getCurrentToken() == IdlToken.EQUAL
312-
|| tokenizer.peekPastSpaces().getIdlToken() == IdlToken.EQUAL)) {
313-
VALUE_ASSIGNMENT.parse(tokenizer);
314-
}
299+
parseOptionalValueAssignment(tokenizer);
315300
});
316301
}
317302
},
@@ -320,10 +305,7 @@ void parse(CapturingTokenizer tokenizer) {
320305
@Override
321306
void parse(CapturingTokenizer tokenizer) {
322307
tokenizer.withState(this, () -> {
323-
AGGREGATE_TYPE_NAME.parse(tokenizer);
324-
optionalSpaces(tokenizer);
325-
IDENTIFIER.parse(tokenizer);
326-
optionalSpaces(tokenizer);
308+
parseShapeTypeAndName(tokenizer, AGGREGATE_TYPE_NAME);
327309
parseSharedStructureBodyWithinInline(tokenizer);
328310
});
329311
}
@@ -332,16 +314,17 @@ void parse(CapturingTokenizer tokenizer) {
332314
AGGREGATE_TYPE_NAME {
333315
@Override
334316
void parse(CapturingTokenizer tokenizer) {
335-
// Assumes that the current token is a valid simple type name validated by SHAPE_BODY.
317+
// Assumes that the current token is a valid simple type name validated by SHAPE.
336318
tokenizer.withState(this, tokenizer::next);
337319
}
338320
},
339321

340-
AGGREGATE_SHAPE_RESOURCE {
322+
// Don't use this directly. Instead, use parseOptionalForResource
323+
FOR_RESOURCE {
341324
@Override
342325
void parse(CapturingTokenizer tokenizer) {
343326
tokenizer.withState(this, () -> {
344-
IDENTIFIER.parse(tokenizer);
327+
tokenizer.next(); // Skip "for"
345328
SP.parse(tokenizer);
346329
SHAPE_ID.parse(tokenizer);
347330
});
@@ -377,11 +360,7 @@ void parse(CapturingTokenizer tokenizer) {
377360
} else {
378361
EXPLICIT_SHAPE_MEMBER.parse(tokenizer);
379362
}
380-
381-
optionalSpaces(tokenizer);
382-
if (tokenizer.getCurrentToken() == IdlToken.EQUAL) {
383-
VALUE_ASSIGNMENT.parse(tokenizer);
384-
}
363+
parseOptionalValueAssignment(tokenizer);
385364
});
386365
}
387366
},
@@ -416,11 +395,9 @@ void parse(CapturingTokenizer tokenizer) {
416395
void parse(CapturingTokenizer tokenizer) {
417396
// Assumes that the shape type is a valid "service" or "resource".
418397
tokenizer.withState(this, () -> {
419-
parseShapeTypeAndName(tokenizer);
398+
parseShapeTypeAndName(tokenizer, ENTITY_TYPE_NAME);
420399

421-
if (tokenizer.isCurrentLexeme("with")) {
422-
SHAPE_MIXINS.parse(tokenizer);
423-
}
400+
parseOptionalMixins(tokenizer);
424401

425402
optionalWs(tokenizer);
426403
tokenizer.expect(IdlToken.LBRACE);
@@ -429,15 +406,21 @@ void parse(CapturingTokenizer tokenizer) {
429406
}
430407
},
431408

409+
ENTITY_TYPE_NAME {
410+
@Override
411+
void parse(CapturingTokenizer tokenizer) {
412+
// Assumes that the current token is a valid entity type name validated by SHAPE.
413+
tokenizer.withState(this, tokenizer::next);
414+
}
415+
},
416+
432417
OPERATION_SHAPE {
433418
@Override
434419
void parse(CapturingTokenizer tokenizer) {
435420
tokenizer.withState(this, () -> {
436421
parseShapeTypeAndName(tokenizer);
437422

438-
if (tokenizer.isCurrentLexeme("with")) {
439-
SHAPE_MIXINS.parse(tokenizer);
440-
}
423+
parseOptionalMixins(tokenizer);
441424

442425
optionalWs(tokenizer);
443426
OPERATION_BODY.parse(tokenizer);
@@ -486,8 +469,7 @@ void parse(CapturingTokenizer tokenizer) {
486469
@Override
487470
void parse(CapturingTokenizer tokenizer) {
488471
tokenizer.withState(this, () -> {
489-
tokenizer.expect(IdlToken.IDENTIFIER);
490-
tokenizer.next(); // skip "input"
472+
IDENTIFIER.parse(tokenizer); // skip "input"
491473
optionalWs(tokenizer);
492474
operationInputOutputDefinition(tokenizer);
493475
});
@@ -498,8 +480,7 @@ void parse(CapturingTokenizer tokenizer) {
498480
@Override
499481
void parse(CapturingTokenizer tokenizer) {
500482
tokenizer.withState(this, () -> {
501-
tokenizer.expect(IdlToken.IDENTIFIER);
502-
tokenizer.next(); // skip "output"
483+
IDENTIFIER.parse(tokenizer); // skip "output"
503484
optionalWs(tokenizer);
504485
operationInputOutputDefinition(tokenizer);
505486
});
@@ -523,8 +504,7 @@ void parse(CapturingTokenizer tokenizer) {
523504
@Override
524505
void parse(CapturingTokenizer tokenizer) {
525506
tokenizer.withState(this, () -> {
526-
tokenizer.expect(IdlToken.IDENTIFIER);
527-
tokenizer.next(); // skip "errors"
507+
IDENTIFIER.parse(tokenizer); // skip "errors"
528508
optionalWs(tokenizer);
529509
tokenizer.expect(IdlToken.COLON);
530510
tokenizer.next();
@@ -544,11 +524,12 @@ void parse(CapturingTokenizer tokenizer) {
544524

545525
// Mixins =
546526
// [SP] %s"with" [WS] "[" [WS] 1*(ShapeId [WS]) "]"
547-
SHAPE_MIXINS {
527+
// Don't use this directly. Instead, use parseOptionalMixins
528+
MIXINS {
548529
@Override
549530
void parse(CapturingTokenizer tokenizer) {
550531
tokenizer.withState(this, () -> {
551-
IDENTIFIER.parse(tokenizer); // 'with'
532+
tokenizer.next(); // Skip "with"
552533
optionalWs(tokenizer);
553534

554535
tokenizer.expect(IdlToken.LBRACKET);
@@ -567,6 +548,7 @@ void parse(CapturingTokenizer tokenizer) {
567548
}
568549
},
569550

551+
// Don't use this directly. Instead, use parseOptionalValueAssignment
570552
VALUE_ASSIGNMENT {
571553
@Override
572554
void parse(CapturingTokenizer tokenizer) {
@@ -634,7 +616,9 @@ void parse(CapturingTokenizer tokenizer) {
634616
case STRING:
635617
case IDENTIFIER:
636618
default:
637-
if (tokenizer.peekPastWs().getIdlToken() == IdlToken.COLON) {
619+
CapturedToken nextPastWs = tokenizer.peekWhile(1, token ->
620+
token.isWhitespace() || token == IdlToken.DOC_COMMENT);
621+
if (nextPastWs.getIdlToken() == IdlToken.COLON) {
638622
TRAIT_STRUCTURE.parse(tokenizer);
639623
} else {
640624
TRAIT_NODE.parse(tokenizer);
@@ -677,7 +661,7 @@ void parse(CapturingTokenizer tokenizer) {
677661
tokenizer.withState(this, () -> {
678662
// Try to see if this is a singular or block apply statement.
679663
IdlToken peek = tokenizer
680-
.peekWhile(1, t -> t != IdlToken.EOF && t != IdlToken.AT && t != IdlToken.LBRACE)
664+
.peekWhile(1, t -> t != IdlToken.AT && t != IdlToken.LBRACE)
681665
.getIdlToken();
682666
if (peek == IdlToken.LBRACE) {
683667
APPLY_STATEMENT_BLOCK.parse(tokenizer);
@@ -692,7 +676,7 @@ void parse(CapturingTokenizer tokenizer) {
692676
@Override
693677
void parse(CapturingTokenizer tokenizer) {
694678
tokenizer.withState(this, () -> {
695-
tokenizer.next();
679+
tokenizer.next(); // Skip "apply"
696680
SP.parse(tokenizer);
697681
SHAPE_ID.parse(tokenizer);
698682
WS.parse(tokenizer);
@@ -705,7 +689,7 @@ void parse(CapturingTokenizer tokenizer) {
705689
@Override
706690
void parse(CapturingTokenizer tokenizer) {
707691
tokenizer.withState(this, () -> {
708-
tokenizer.next();
692+
tokenizer.next(); // Skip "apply"
709693
SP.parse(tokenizer);
710694
SHAPE_ID.parse(tokenizer);
711695
WS.parse(tokenizer);
@@ -1089,27 +1073,46 @@ protected static void optionalSpaces(CapturingTokenizer tokenizer) {
10891073
}
10901074

10911075
protected static void parseShapeTypeAndName(CapturingTokenizer tokenizer) {
1092-
tokenizer.expect(IdlToken.IDENTIFIER);
1093-
tokenizer.next(); // skip the shape type
1076+
parseShapeTypeAndName(tokenizer, null);
1077+
}
1078+
1079+
protected static void parseShapeTypeAndName(CapturingTokenizer tokenizer, TreeType typeName) {
1080+
if (typeName == null) {
1081+
tokenizer.next();
1082+
} else {
1083+
typeName.parse(tokenizer); // Skip the shape type
1084+
}
10941085
optionalSpaces(tokenizer);
10951086
IDENTIFIER.parse(tokenizer); // shape name
10961087
optionalSpaces(tokenizer);
10971088
}
10981089

10991090
protected static void parseSharedStructureBodyWithinInline(CapturingTokenizer tokenizer) {
1100-
optionalSpaces(tokenizer);
1091+
parseOptionalForResource(tokenizer);
1092+
parseOptionalMixins(tokenizer);
11011093

1094+
optionalWs(tokenizer);
1095+
SHAPE_MEMBERS.parse(tokenizer);
1096+
}
1097+
1098+
protected static void parseOptionalForResource(CapturingTokenizer tokenizer) {
1099+
optionalSpaces(tokenizer);
11021100
if (tokenizer.isCurrentLexeme("for")) {
1103-
AGGREGATE_SHAPE_RESOURCE.parse(tokenizer);
1104-
optionalSpaces(tokenizer);
1101+
FOR_RESOURCE.parse(tokenizer);
11051102
}
1103+
}
11061104

1105+
protected static void parseOptionalMixins(CapturingTokenizer tokenizer) {
1106+
optionalSpaces(tokenizer);
11071107
if (tokenizer.isCurrentLexeme("with")) {
1108-
SHAPE_MIXINS.parse(tokenizer);
1108+
MIXINS.parse(tokenizer);
11091109
}
1110+
}
11101111

1111-
optionalWs(tokenizer);
1112-
SHAPE_MEMBERS.parse(tokenizer);
1112+
protected static void parseOptionalValueAssignment(CapturingTokenizer tokenizer) {
1113+
if (tokenizer.peekPastSpaces().getIdlToken() == IdlToken.EQUAL) {
1114+
VALUE_ASSIGNMENT.parse(tokenizer);
1115+
}
11131116
}
11141117

11151118
protected static void operationInputOutputDefinition(CapturingTokenizer tokenizer) {

0 commit comments

Comments
 (0)