c2lang
diff --git a/‎analyser/module_analyser_switch.c2
Lines changed: 29 additions & 7 deletions b/‎analyser/module_analyser_switch.c2
Lines changed: 29 additions & 7 deletions
diff --git a/‎ast/string_literal.c2
Lines changed: 9 additions & 3 deletions b/‎ast/string_literal.c2
Lines changed: 9 additions & 3 deletions
diff --git a/‎ast_utils/string_buffer.c2
Lines changed: 58 additions & 4 deletions b/‎ast_utils/string_buffer.c2
Lines changed: 58 additions & 4 deletions
diff --git a/‎common/utf8.c2
Lines changed: 97 additions & 0 deletions b/‎common/utf8.c2
Lines changed: 97 additions & 0 deletions
diff --git a/‎generator/c_generator_call.c2
Lines changed: 3 additions & 1 deletion b/‎generator/c_generator_call.c2
Lines changed: 3 additions & 1 deletion
diff --git a/‎generator/c_generator_expr.c2
Lines changed: 1 addition & 1 deletion b/‎generator/c_generator_expr.c2
Lines changed: 1 addition & 1 deletion
@@ -21,6 +21,7 @@ import init_checker;
 import src_loc local;
 import scope;
 import string_buffer;
+import string;
 
 fn void Analyser.analyseSwitchStmt(Analyser* ma, Stmt* s) {
     SwitchStmt* sw = cast<SwitchStmt*>(s);
@@ -58,6 +59,7 @@ fn void Analyser.analyseSwitchStmt(Analyser* ma, Stmt* s) {
 
     init_checker.Checker checker = init_checker.Checker.create(numCases);
 
+    bool ok = true;
     for (u32 i=0; i<numCases; i++) {
         SwitchCase* c = cases[i];
         bool is_last = (i+1 == numCases);
@@ -79,11 +81,12 @@ fn void Analyser.analyseSwitchStmt(Analyser* ma, Stmt* s) {
             }
         }
 
-        bool ok = ma.analyseCase(c, &checker, etd, is_string);
+        ok &= ma.analyseCase(c, &checker, etd, is_string);
         ma.scope.exit(ma.has_error);
-        if (!ok) return;
     }
 
+    if (!ok) return;
+
     ma.scope.exit(ma.has_error);
 
     if (etd) {
@@ -202,25 +205,44 @@ fn bool Analyser.analyseCaseCondition(Analyser* ma,
     } else {
         Expr* orig = c.getCond();
         QualType qt = ma.analyseExpr(c.getCond2(), true, RHS);
+
         if (qt.isInvalid()) return false;
         cond.setType(qt);
 
         if (is_string) {
+            u32 index;
             if (orig.isNil()) {
-                // TODO: check for duplicate nil
+                index = 0;
+                SrcLoc duplicate = checker.find(index);
+                if (duplicate) {
+                    ma.errorRange(cond.getLoc(), cond.getRange(), "duplicate case value nil");
+                    ma.note(duplicate, "previous case is here");
+                    return false;
+                }
             } else
             if (orig.isStringLiteral()) {
                 StringLiteral* lit = cast<StringLiteral*>(orig);
-                if (lit.getSize() > 255) {
-                    ma.error(cond.getLoc(), "string switch case string is loo long (max 254 bytes)");
+                u32 len = lit.getSize() - 1;
+                if (len > 255) {
+                    ma.error(cond.getLoc(), "string switch case string is loo long (max 255 bytes)");
+                    return false;
+                }
+                if (string.memchr(lit.getText(), 0, len)) {
+                    ma.error(cond.getLoc(), "case string value has embedded null byte");
+                    return false;
+                }
+                index = lit.getTextIndex();
+                SrcLoc duplicate = checker.find(index);
+                if (duplicate) {
+                    ma.errorRange(cond.getLoc(), cond.getRange(), "duplicate case string");
+                    ma.note(duplicate, "previous case is here");
                     return false;
                 }
-                // TODO: check for embedded null bytes
-                // TODO: check for duplicate string
             } else {
                 ma.error(cond.getLoc(), "string switch case can only have a string literal or nil as condition");
                 return false;
             }
+            checker.add(index, cond.getLoc());
         } else {
             if (!cond.isCtv()) {
                 ma.error(cond.getLoc(), "case condition is not compile-time constant");
 
@@ -29,24 +29,30 @@ public fn StringLiteral* StringLiteral.create(ast_context.Context* c, SrcLoc loc
     StringLiteral* e = c.alloc(sizeof(StringLiteral));
     e.base.init(ExprKind.StringLiteral, loc, 0, 1, 0, ValType.LValue);
     e.value = value;
-    e.size = len;   // len includes the null terminator
+    e.size = len + 1;   // size includes the null terminator
 #if AstStatistics
     Stats.addExpr(ExprKind.StringLiteral, sizeof(StringLiteral));
 #endif
-    e.base.setType(getStringType(len));
+    e.base.setType(getStringType(len + 1));
     return e;
 }
 
 public fn const char* StringLiteral.getText(const StringLiteral* e) {
     return idx2name(e.value);
 }
 
+public fn u32 StringLiteral.getTextIndex(const StringLiteral* e) {
+    return e.value;
+}
+
 public fn u32 StringLiteral.getSize(const StringLiteral* e) {
     return e.size;
 }
 
 public fn void StringLiteral.printLiteral(const StringLiteral* e, string_buffer.Buf* out) {
-    out.print("\"%s\"", idx2name(e.value));
+    out.add1('"');
+    out.encodeBytes(idx2name(e.value), e.size - 1, '"');
+    out.add1('"');
 }
 
 fn void StringLiteral.print(const StringLiteral* e, string_buffer.Buf* out, u32 indent) {
 
@@ -19,6 +19,7 @@ import stdio local;
 import stdarg local;
 import stdlib local;
 import string local;
+import utf8;
 
 public type Buf struct @(opaque) {
     u32 capacity;
@@ -78,10 +79,7 @@ public fn void Buf.clear(Buf* buf) {
 }
 
 public fn void Buf.color(Buf* buf, const char* color) {
-    if (!buf.colors) return;
-
-    u32 len = cast<u32>(strlen(color));
-    buf.add2(color, len);
+    if (buf.colors) buf.add(color);
 }
 
 public fn void Buf.add1(Buf* buf, char c) {
@@ -191,3 +189,59 @@ public fn void Buf.stripNewline(Buf* buf) {
     }
 }
 
+public fn u32 Buf.add_utf8(Buf* buf, u32 cc) {
+    char[4] tab;
+    u32 clen = utf8.encode(tab, elemsof(tab), cc);
+    buf.add2(tab, clen);
+    return clen;
+}
+
+public fn u32 Buf.encodeBytes(Buf* buf, const char *p, u32 len, char sep) {
+    u32 size = buf.size_;
+    u32 copy = 0;
+    const char* end = p + len;
+    while (p < end) {
+        u8 c = *p++;
+        switch (c) {
+        case '\a':  c = 'a'; goto add_char;
+        case '\b':  c = 'b'; goto add_char;
+        case '\f':  c = 'f'; goto add_char;
+        case '\n':  c = 'n'; goto add_char;
+        case '\r':  c = 'r'; goto add_char;
+        case '\t':  c = 't'; goto add_char;
+        case '\v':  c = 'v'; goto add_char;
+        case '"':
+        case '\'':
+            if (sep && sep != c) goto normal;
+            fallthrough;
+        case '\\':
+        add_char:
+            if (copy) buf.add2(p - copy - 1, copy);
+            buf.add1('\\');
+            buf.add1(c);
+            copy = 0;
+            break;
+        default:
+            if (c < ' ' || c >= 0x7F) {
+                char[4] arr;
+                if (copy) buf.add2(p - copy - 1, copy);
+                arr[0] = '\\';
+                arr[1] = '0' + ((c >> 6) & 7);
+                arr[2] = '0' + ((c >> 3) & 7);
+                arr[3] = '0' + (c & 7);
+                u32 esc_len = 4;
+                // special case \0 not followed by another digit
+                if (c == 0 && (p == end || !(*p >= '0' && *p <= '9')))
+                    esc_len = 2;
+                buf.add2(arr, esc_len);
+                copy = 0;
+                break;
+            }
+        normal:
+            copy++;
+            break;
+        }
+    }
+    if (copy) buf.add2(p - copy, copy);
+    return buf.size_ - size;
+}
@@ -0,0 +1,97 @@
+/* Copyright 2022-2025 Charlie Gordon
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module utf8;
+
+public const u32 MB_CUR_MAX = 6;    // UTF-8 uses just 4
+
+public fn u32 encode(char *dest, u32 max_len, u32 cc) {
+    if (cc < 0x80) {
+        if (max_len >= 1) {
+            dest[0] = cast<char>(cc);
+            return 1;
+        }
+    } else
+    if (cc < 0x800) {
+        if (max_len >= 2) {
+            dest[0] = cast<char>(0xC0 + (cc >> 6));
+            dest[1] = cast<char>(0x80 + (cc & 0x3F));
+            return 2;
+        }
+    } else
+    if (cc < 0x10000) {
+        if (max_len >= 3) {
+            dest[0] = cast<char>(0xE0 + (cc >> 12));
+            dest[1] = cast<char>(0x80 + ((cc >> 6) & 0x3F));
+            dest[2] = cast<char>(0x80 + (cc & 0x3F));
+            return 3;
+        }
+    } else
+    if (cc < 0x110000) {
+        if (max_len >= 4) {
+            dest[0] = cast<char>(0xF0 + (cc >> 18));
+            dest[1] = cast<char>(0x80 + ((cc >> 12) & 0x3F));
+            dest[2] = cast<char>(0x80 + ((cc >> 6) & 0x3F));
+            dest[3] = cast<char>(0x80 + (cc & 0x3F));
+            return 4;
+        }
+    }
+    return 0;
+}
+
+public fn u32 decode(const char *p, u32 max_len, u32* pc) {
+    if (!max_len)
+        return 0;
+
+    u32 c = cast<u8>(*p++);
+    if (c < 0x80) {
+        *pc = c;
+        return 1;
+    } else
+    if (c < 0xC2) {
+        // invalid prefix byte or naked trailing byte
+    } else
+    if (c < 0xE0) {
+        if (max_len >= 2 && p[0] >= 0x80 && p[0] <= 0xBF) {
+            *pc = ((c - 0xC0) << 6) + (p[0] - 0x80);
+            return 2;
+        }
+    } else
+    if (c < 0xF0) {
+        if (max_len >= 3
+        &&  p[0] >= 0x80 && p[0] <= 0xBF
+        &&  p[1] >= 0x80 && p[1] <= 0xBF) {
+            c = ((c - 0xE0) << 12) + ((p[0] - 0x80) << 6) + (p[1] - 0x80);
+            if (c >= 0x800) {
+                *pc = c;
+                return 3;
+            }
+        }
+    } else
+    if (c <= 0xF4) {
+        if (max_len >= 4
+        &&  p[0] >= 0x80 && p[0] <= 0xBF
+        &&  p[1] >= 0x80 && p[1] <= 0xBF
+        &&  p[2] >= 0x80 && p[2] <= 0xBF) {
+            c = ((c - 0xF0) << 18) + ((p[0] - 0x80) << 12) +
+                ((p[1] - 0x80) << 6) + (p[2] - 0x80);
+            if (c >= 0x10000 && c < 0x110000) {
+                *pc = c;
+                return 4;
+            }
+        }
+    }
+    return 0;
+}
@@ -20,6 +20,7 @@ import printf_utils;
 import source_mgr;
 import src_loc local;
 import string_buffer;
+import string;
 
 fn void Generator.emitCall(Generator* gen, string_buffer.Buf* out, Expr* e) {
     CallExpr* call = cast<CallExpr*>(e);
@@ -135,7 +136,8 @@ fn void Generator.emitCall(Generator* gen, string_buffer.Buf* out, Expr* e) {
                 FormatChanger fc = { format_text, &args[call_index+1], 0, 0, out }
                 out.add1('"');
                 printf_utils.parseFormat(format_text, on_format_specifier, &fc);
-                out.add(format_text + fc.last_offset);
+                out.encodeBytes(format_text + fc.last_offset,
+                                cast<u32>(string.strlen(format_text + fc.last_offset)), '"');
                 out.add1('"');
             } else {
                 gen.emitExpr(out, args[call_index]);
 
@@ -376,7 +376,7 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
     FormatChanger* fc = context;
 
     /* copy optional flags, width and precision */
-    fc.out.add2(fc.format + fc.last_offset, offset - fc.last_offset);
+    fc.out.encodeBytes(fc.format + fc.last_offset, offset - fc.last_offset, '"');
 
     fc.idx += stars;
     QualType qt = fc.args[fc.idx].getType();