Skip to content

Commit d9bf4be

Browse files
chqrliebvdberg
authored andcommitted
syntax: fix multiple issues with escape sequences and character constants
* accept 8-bit octal espace sequences * reject malformed hex espace sequences * make 8-bit character literal values consistent with type `char` * fix output of 8-bit octal and hex literals in `CharLiteral.printLiteral` * fix formating bug in tools/c2cat.c2 * add tests for various issues
1 parent 4563d83 commit d9bf4be

13 files changed

+60
-11
lines changed

analyser_utils/ctv_analyser.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public fn Value get_value(const Expr* e) {
8888
break;
8989
case CharLiteral:
9090
const CharLiteral* c = cast<CharLiteral*>(e);
91-
result.uvalue = c.getValue();
91+
result.uvalue = cast<u64>(c.getValue()); // cast required if char is signed
9292
break;
9393
case StringLiteral:
9494
assert(0);

ast/char_literal.c2

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ public fn CharLiteral* CharLiteral.create(ast_context.Context* c, SrcLoc loc, u8
4141
return e;
4242
}
4343

44-
public fn u8 CharLiteral.getValue(const CharLiteral* e) {
45-
return cast<u8>(e.parent.parent.charLiteralBits.value);
44+
public fn char CharLiteral.getValue(const CharLiteral* e) {
45+
return cast<char>(e.parent.parent.charLiteralBits.value);
4646
}
4747

4848
fn void CharLiteral.print(const CharLiteral* e, string_buffer.Buf* out, u32 indent) {
@@ -55,14 +55,14 @@ fn void CharLiteral.print(const CharLiteral* e, string_buffer.Buf* out, u32 inde
5555
}
5656

5757
public fn void CharLiteral.printLiteral(const CharLiteral* e, string_buffer.Buf* out) {
58-
char c = cast<char>(e.parent.parent.charLiteralBits.value);
58+
u8 c = cast<u8>(e.parent.parent.charLiteralBits.value);
5959

6060
switch (e.parent.parent.charLiteralBits.radix) {
6161
case 8:
6262
out.print("'\\%o'", c);
6363
return;
6464
case 16:
65-
out.print("'\\x%x'", c);
65+
out.print("'\\x%02x'", c);
6666
return;
6767
default:
6868
break;

generator/c_generator_pure_call.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ fn Value Evaluator.get_value(Evaluator* eval, const Expr* e) {
6969
break;
7070
case CharLiteral:
7171
const CharLiteral* c = cast<CharLiteral*>(e);
72-
result.uvalue = c.getValue();
72+
result.uvalue = cast<u64>(c.getValue()); // cast required if char is signed
7373
break;
7474
case StringLiteral:
7575
assert(0);

parser/c2_tokenizer.c2

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,25 +1005,37 @@ fn u32 Tokenizer.lex_escaped_char(Tokenizer* t, Token* result) {
10051005
result.char_value = '\v';
10061006
break;
10071007
case 'x':
1008-
if (!isxdigit(input[1]) || !isxdigit(input[2])) {
1008+
if (!isxdigit(input[1])) {
10091009
t.cur++;
10101010
t.error(result, "expect hexadecimal number after '\\x'");
10111011
return 0;
10121012
}
1013+
// C consumes all hex digits after \x (at least one)
1014+
// C2 requires 2 hex digits, but rejects extra digits to simplify C generation
1015+
if (!isxdigit(input[2])) {
1016+
t.cur += 2;
1017+
t.error(result, "expect 2 hexadecimal digits after '\\x'");
1018+
return 0;
1019+
}
1020+
if (isxdigit(input[3])) {
1021+
t.cur += 3;
1022+
t.error(result, "too many digits in hexadecimal escape sequence '\\x'");
1023+
return 0;
1024+
}
10131025
result.char_value = hex2val(input[1]) * 16 + hex2val(input[2]);
10141026
result.radix = 16;
10151027
return 3;
10161028
default:
10171029
if (is_octal(input[0])) {
10181030
u32 offset = 0;
10191031
u32 value = 0;
1020-
while (is_octal(input[offset]) && offset <= 2) {
1032+
while (is_octal(input[offset]) && offset < 3) {
10211033
value *= 8;
10221034
value += cast<u32>(input[offset] - '0');
10231035
offset++;
10241036
}
10251037

1026-
if (value > 127) {
1038+
if (value > 255) {
10271039
t.cur++;
10281040
t.error(result, "octal escape sequence out of range");
10291041
return 0;

test/parser/char_escape_sequence.c2

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ char o1 = '\0';
1717
char o2 = '\07';
1818
char o3 = '\077';
1919
char o4 = '\177';
20+
char o5 = '\00';
21+
char o6 = '\000';
22+
char o7 = '\200';
23+
char o8 = '\377';
2024

2125
char h1 = '\x1e';
26+
char h2 = '\x00';
27+
char h3 = '\x80';
28+
char h4 = '\xFF';
2229

test/parser/char_hex_error1.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\x'; // @error{expect hexadecimal number after '\x'}
5+

test/parser/char_hex_error2.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\xx'; // @error{expect hexadecimal number after '\x'}
5+

test/parser/char_hex_error3.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\x1'; // @error{expect 2 hexadecimal digits after '\x'}
5+

test/parser/char_hex_error4.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\x1x'; // @error{expect 2 hexadecimal digits after '\x'}
5+

test/parser/char_hex_error5.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\x012'; // @error{too many digits in hexadecimal escape sequence '\x'}
5+

test/parser/char_hex_error6.c2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// @warnings{no-unused}
2+
module test;
3+
4+
char c = '\x123'; // @error{too many digits in hexadecimal escape sequence '\x'}
5+
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// @warnings{no-unused}
22
module test;
33

4-
char c = '\200'; // @error{octal escape sequence out of range}
4+
char c = '\400'; // @error{octal escape sequence out of range}
55

tools/c2cat.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ fn void print_token(const Token* tok) {
175175
len = sprintf(tmp, "'\\%o'", tok.char_value);
176176
break;
177177
case 16:
178-
len = sprintf(tmp, "'\\x%%%x'", tok.char_value);
178+
len = sprintf(tmp, "'\\x%02x'", tok.char_value);
179179
break;
180180
default:
181181
if (ctype.isprint(tok.char_value)) {

0 commit comments

Comments
 (0)