From 5afbc65414b31d16aa32036a4ad3d6aa3eac24bc Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 17:04:41 +0300 Subject: [PATCH 01/11] fix(backend): float `!=` must be UNORDERED so `nan != nan` is true [F0.9] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emitCmpNe lowered float `!=` to `LLVMRealONE` (ordered not-equal), which is false when either operand is NaN. That made `nan != nan` false in native code — breaking the canonical `x != x` NaN test, making `!=` non-complementary with `==` for NaN, and disagreeing with the interpreter. Change the float predicate to `LLVMRealUNE` (unordered not-equal): true if either operand is NaN OR they are unequal. For all non-NaN operands `UNE` ≡ `ONE`, so only NaN-involving comparisons change (toward correct). The integer predicate (`LLVMIntNE`) and `emitCmpEq` (`OEQ`) are unchanged, so `nan == nan` stays false and `!=` is now the exact complement of `==`. - Regression: examples/0150-types-float-ne-unordered-nan.sx (fails before, passes after; also pins #run/comptime == runtime agreement). - specs.md: documents float comparison / NaN semantics (Operators). - Resolves issue 0091 (issues/0091-float-ne-ordered-nan.md). --- examples/0150-types-float-ne-unordered-nan.sx | 35 ++++++++ .../0150-types-float-ne-unordered-nan.exit | 1 + .../0150-types-float-ne-unordered-nan.stderr | 1 + .../0150-types-float-ne-unordered-nan.stdout | 8 ++ issues/0091-float-ne-ordered-nan.md | 79 +++++++++++++++++++ specs.md | 9 +++ src/backend/llvm/ops.zig | 5 +- 7 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 examples/0150-types-float-ne-unordered-nan.sx create mode 100644 examples/expected/0150-types-float-ne-unordered-nan.exit create mode 100644 examples/expected/0150-types-float-ne-unordered-nan.stderr create mode 100644 examples/expected/0150-types-float-ne-unordered-nan.stdout create mode 100644 issues/0091-float-ne-ordered-nan.md diff --git a/examples/0150-types-float-ne-unordered-nan.sx b/examples/0150-types-float-ne-unordered-nan.sx new file mode 100644 index 0000000..0e8f98b --- /dev/null +++ b/examples/0150-types-float-ne-unordered-nan.sx @@ -0,0 +1,35 @@ +// Float `!=` is UNORDERED not-equal: `nan != nan` is true (the canonical +// `x != x` NaN idiom), and `!=` is the exact complement of `==` for every +// float input — including NaN, where `nan == nan` is false (ordered `==`). +// For all non-NaN operands unordered `!=` matches ordered `!=`, so finite +// comparisons are unchanged. The native backend agrees with the interpreter. +// +// Regression (issue 0091): the LLVM backend lowered float `!=` to ordered +// not-equal (LLVMRealONE), so `nan != nan` was false in native code. +#import "modules/std.sx"; + +main :: () { + // Produce a genuine NaN without any numeric-limit accessor: 0.0 / 0.0. + z := 0.0; + nan := z / z; + + // The fix: `!=` is unordered, `==` is ordered. + print("nan != nan: {}\n", nan != nan); // true + print("nan == nan: {}\n", nan == nan); // false + print("nan != 1.0: {}\n", nan != 1.0); // true + print("nan == 1.0: {}\n", nan == 1.0); // false + + // Complementarity holds for finite operands too (unchanged behavior). + print("1.0 != 2.0: {}\n", 1.0 != 2.0); // true + print("1.0 != 1.0: {}\n", 1.0 != 1.0); // false + print("2.0 != 2.0: {}\n", 2.0 != 2.0); // false + + // Native codegen converges with the comptime interpreter. + print("comptime nan != nan: {}\n", #run nan_ne_nan()); +} + +nan_ne_nan :: () -> bool { + z := 0.0; + n := z / z; + return n != n; +} diff --git a/examples/expected/0150-types-float-ne-unordered-nan.exit b/examples/expected/0150-types-float-ne-unordered-nan.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0150-types-float-ne-unordered-nan.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0150-types-float-ne-unordered-nan.stderr b/examples/expected/0150-types-float-ne-unordered-nan.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0150-types-float-ne-unordered-nan.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0150-types-float-ne-unordered-nan.stdout b/examples/expected/0150-types-float-ne-unordered-nan.stdout new file mode 100644 index 0000000..051f37d --- /dev/null +++ b/examples/expected/0150-types-float-ne-unordered-nan.stdout @@ -0,0 +1,8 @@ +nan != nan: true +nan == nan: false +nan != 1.0: true +nan == 1.0: false +1.0 != 2.0: true +1.0 != 1.0: false +2.0 != 2.0: false +comptime nan != nan: true diff --git a/issues/0091-float-ne-ordered-nan.md b/issues/0091-float-ne-ordered-nan.md new file mode 100644 index 0000000..fcfab78 --- /dev/null +++ b/issues/0091-float-ne-ordered-nan.md @@ -0,0 +1,79 @@ +# 0091 — float `!=` lowers to ORDERED not-equal, so `nan != nan` is false in native code + +> **RESOLVED** (F0.9). Root cause: `emitCmpNe` in `src/backend/llvm/ops.zig` +> passed `c.LLVMRealONE` (ordered not-equal) as the float predicate. Fix: +> `c.LLVMRealONE` → `c.LLVMRealUNE` (unordered not-equal). The integer predicate +> `LLVMIntNE` and `emitCmpEq` (`OEQ`) are unchanged. For all non-NaN operands +> `UNE` ≡ `ONE`, so only NaN-involving float `!=` changes (toward correct). +> Regression test: `examples/0150-types-float-ne-unordered-nan.sx`. Spec note +> added to `specs.md` (Operators → "Float comparison and NaN"). + +## Symptom + +The LLVM backend lowers float `!=` to `LLVMRealONE` (ordered not-equal), which +returns **false** when either operand is NaN. Consequences: + +- Observed: `nan != nan` evaluates to **false** (via `sx run`). +- Expected: **true** — `!=` must be the logical complement of `==`, and the + canonical NaN-detection idiom `x != x` must be true for a NaN. + +This makes `==` and `!=` non-complementary for NaN: `nan == nan` is false +(correct, `OEQ`) AND `nan != nan` is also false (wrong, `ONE`). It silently +breaks the standard NaN check used throughout numerical code +(`if x != x { /* NaN */ }`): NaN is never detected at runtime. + +## Reproduction (accessor-free) + +NaN is produced as `0.0 / 0.0` — no numeric-limit accessor required: + +```sx +#import "modules/std.sx"; +main :: () { + z := 0.0; + n := z / z; // NaN + print("ne={} eq={}\n", n != n, n == n); // observed: ne=false eq=false +} // correct: ne=true eq=false +``` + +`./zig-out/bin/sx run .sx` printed `ne=false eq=false` before the fix. +After the fix it prints `ne=true eq=false`. Non-NaN comparisons are unchanged +(`1.0 != 2.0` true, `1.0 != 1.0` false). The `#run`/comptime path (JIT-compiled +through the same backend) and the native runtime path agree in both states. + +## Root cause + +`src/backend/llvm/ops.zig`, `emitCmpNe`: + +```zig +pub fn emitCmpNe(self: Ops, instruction: *const Inst, bin: BinOp) void { + self.e.emitCmp(bin, instruction.ty, c.LLVMIntNE, c.LLVMRealONE); + // ^^^^^^^^^^^^^^^ ordered +} +``` + +`LLVMRealONE` = ordered not-equal (false if either operand is NaN). The IEEE/C +`!=` is `LLVMRealUNE` (unordered not-equal → true if either is NaN). For all +NON-NaN operands `UNE` and `ONE` are identical, so the fix changes behavior only +for the NaN case — bringing native codegen in line with `==` (`OEQ`) and with +the interpreter's `evalCmp` (`.ne => lf != rf`, which is unordered in Zig). + +`emitCmpNe` is the sole float-`!=` lowering site (dispatched from +`src/ir/emit_llvm.zig` `cmp_ne` → `ops().emitCmpNe`). There is no second backend +path (no `fcmp one` appears in any `.ir` snapshot; `src/codegen.zig` has no +float-`!=` lowering). + +## Fix + +```zig +pub fn emitCmpNe(self: Ops, instruction: *const Inst, bin: BinOp) void { + self.e.emitCmp(bin, instruction.ty, c.LLVMIntNE, c.LLVMRealUNE); +} +``` + +## Regression test + +`examples/0150-types-float-ne-unordered-nan.sx` asserts (runtime, exit 0): +`nan != nan` true, `nan == nan` false, `nan != 1.0` true, `nan == 1.0` false, +the finite cases (`1.0 != 2.0` true, `1.0 != 1.0` false, `2.0 != 2.0` false), +and that the `#run` comptime `nan != nan` matches the runtime one. It fails on +the pre-fix compiler (`nan != nan: false`) and passes after. diff --git a/specs.md b/specs.md index 93e175d..3b703db 100644 --- a/specs.md +++ b/specs.md @@ -85,6 +85,15 @@ GLSL; | `<<=` | left shift assign | | `>>=` | right shift assign | +**Float comparison and NaN.** Float `==` is *ordered* and `!=` is *unordered*, +matching IEEE 754: `==` is false whenever either operand is NaN (`nan == x` is +false for every `x`, including `nan`), and `!=` is true whenever either operand +is NaN (`nan != x` is true for every `x`, including `nan`). So `!=` is the exact +complement of `==` for all float inputs, and the canonical NaN test `x != x` is +true exactly when `x` is NaN. The ordered relations `<`, `<=`, `>`, `>=` are all +false when either operand is NaN. For all non-NaN operands these reduce to the +ordinary comparisons. Native codegen and the comptime interpreter agree on this. + ### Delimiters and Punctuation | Token | Meaning | diff --git a/src/backend/llvm/ops.zig b/src/backend/llvm/ops.zig index 6542735..df80e84 100644 --- a/src/backend/llvm/ops.zig +++ b/src/backend/llvm/ops.zig @@ -274,7 +274,10 @@ pub const Ops = struct { } pub fn emitCmpNe(self: Ops, instruction: *const Inst, bin: BinOp) void { - self.e.emitCmp(bin, instruction.ty, c.LLVMIntNE, c.LLVMRealONE); + // Float `!=` is UNORDERED not-equal: true if either operand is NaN, so + // `nan != nan` is true (IEEE 754 / the `x != x` NaN idiom) and `!=` stays + // the exact complement of `==` (OEQ). UNE == ONE for all non-NaN operands. + self.e.emitCmp(bin, instruction.ty, c.LLVMIntNE, c.LLVMRealUNE); } pub fn emitCmpLt(self: Ops, instruction: *const Inst, bin: BinOp) void { From 0dbdc530baddd408e141f00bae7427652b5bd4b9 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 17:40:42 +0300 Subject: [PATCH 02/11] feat(lang): backtick raw-identifier escape + #import c foreign-name exemption [F0.6] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reserved type-name spellings (s1, s2, u8, …) can now be used as value identifiers two ways, resolving issue 0089: 1. Backtick raw identifier: a leading backtick (`s2) lexes to an .identifier token carrying a new Token.is_raw flag, with the backtick excluded from the text. A raw identifier is never type-classified — the parser skips Type.fromName for it — so it is always a value identifier. The flag threads to VarDecl.is_raw / Param.is_raw at binding sites, and the reserved-type-name check (UnknownTypeChecker) skips raw bindings. Because the token tag stays .identifier, the escape works in every position (local, global, param, field, fn name, struct member, later reference) with no per-site parser change. 2. #import c exemption: c_import.zig synthesizes foreign decls with Param.is_raw = true, so generated C param names that collide with reserved type names (s1, s2) import unedited. A bare reserved-name binding in sx still errors (issue 0076 preserved): the is_raw-gated skip only fires for backtick / foreign names, and a raw binding's address-of / autoref lowering stays correct because every occurrence is an .identifier, never a .type_expr. Tests: examples/0151 (backtick, every position), examples/1220 (foreign exemption, compiled+run), lexer unit tests. 1119 (bare-binding rejection) stays green. specs.md + readme.md updated. --- .../0151-types-backtick-raw-identifier.sx | 33 +++++++ .../1220-ffi-c-import-reserved-name-params.c | 9 ++ .../1220-ffi-c-import-reserved-name-params.h | 5 + .../1220-ffi-c-import-reserved-name-params.sx | 20 ++++ .../0151-types-backtick-raw-identifier.exit | 1 + .../0151-types-backtick-raw-identifier.stderr | 1 + .../0151-types-backtick-raw-identifier.stdout | 4 + ...220-ffi-c-import-reserved-name-params.exit | 1 + ...0-ffi-c-import-reserved-name-params.stderr | 1 + ...0-ffi-c-import-reserved-name-params.stdout | 3 + issues/0089-backtick-raw-identifier.md | 91 +++++++++++++++++++ readme.md | 13 +++ specs.md | 44 +++++++++ src/ast.zig | 8 ++ src/c_import.zig | 4 + src/ir/semantic_diagnostics.zig | 8 +- src/lexer.zig | 50 ++++++++++ src/parser.zig | 29 +++--- src/token.zig | 6 ++ 19 files changed, 317 insertions(+), 14 deletions(-) create mode 100644 examples/0151-types-backtick-raw-identifier.sx create mode 100644 examples/1220-ffi-c-import-reserved-name-params.c create mode 100644 examples/1220-ffi-c-import-reserved-name-params.h create mode 100644 examples/1220-ffi-c-import-reserved-name-params.sx create mode 100644 examples/expected/0151-types-backtick-raw-identifier.exit create mode 100644 examples/expected/0151-types-backtick-raw-identifier.stderr create mode 100644 examples/expected/0151-types-backtick-raw-identifier.stdout create mode 100644 examples/expected/1220-ffi-c-import-reserved-name-params.exit create mode 100644 examples/expected/1220-ffi-c-import-reserved-name-params.stderr create mode 100644 examples/expected/1220-ffi-c-import-reserved-name-params.stdout create mode 100644 issues/0089-backtick-raw-identifier.md diff --git a/examples/0151-types-backtick-raw-identifier.sx b/examples/0151-types-backtick-raw-identifier.sx new file mode 100644 index 0000000..186bfbe --- /dev/null +++ b/examples/0151-types-backtick-raw-identifier.sx @@ -0,0 +1,33 @@ +// Backtick raw-identifier escape: a leading backtick makes the following +// identifier RAW — its text excludes the backtick and it is NEVER +// type-classified, so a reserved type-name spelling (`s2`, `u8`, …) can be +// used as a value identifier. Exercised in every position: global, local, +// param, struct field + member access, function name + call, and a later +// reference. A *bare* `s2` is still the reserved type name (see +// examples/1119), so the escape is the only way to spell these as values. +// Regression (issue 0089). +#import "modules/std.sx"; + +// Global named with a reserved type spelling. +`u8 := 100; + +// Function whose name is a reserved type spelling, with a reserved-name param. +`s2 :: (`s1: s64) -> s64 { return `s1 * 2; } + +Point :: struct { + `s2: f64; // field name is a reserved type spelling + `u16: s64; +} + +main :: () { + // Local with a reserved type spelling; later reference resolves to it. + `s64 := 7; + `s64 = `s64 + 1; + print("local = {}\n", `s64); + + print("global = {}\n", `u8); + print("fn = {}\n", `s2(21)); // calls the `s2 function + + p := Point.{ `s2 = 2.5, `u16 = 9 }; + print("field = {} {}\n", p.`s2, p.`u16); +} diff --git a/examples/1220-ffi-c-import-reserved-name-params.c b/examples/1220-ffi-c-import-reserved-name-params.c new file mode 100644 index 0000000..560ab3b --- /dev/null +++ b/examples/1220-ffi-c-import-reserved-name-params.c @@ -0,0 +1,9 @@ +#include "1220-ffi-c-import-reserved-name-params.h" + +int ffi_pick(int s1, int s2, int which) { + return which == 0 ? s1 : s2; +} + +int ffi_sum(int s1, int s2) { + return s1 + s2; +} diff --git a/examples/1220-ffi-c-import-reserved-name-params.h b/examples/1220-ffi-c-import-reserved-name-params.h new file mode 100644 index 0000000..33929c6 --- /dev/null +++ b/examples/1220-ffi-c-import-reserved-name-params.h @@ -0,0 +1,5 @@ +/* Foreign C declarations whose parameter names (`s1`, `s2`) collide with + sx's reserved signed-int type spellings. The `#import c` exemption must + accept these generated names unedited (issue 0089). */ +int ffi_pick(int s1, int s2, int which); +int ffi_sum(int s1, int s2); diff --git a/examples/1220-ffi-c-import-reserved-name-params.sx b/examples/1220-ffi-c-import-reserved-name-params.sx new file mode 100644 index 0000000..dd25659 --- /dev/null +++ b/examples/1220-ffi-c-import-reserved-name-params.sx @@ -0,0 +1,20 @@ +// `#import c` foreign-name exemption: a C header's parameter names `s1`/`s2` +// collide with sx's reserved signed-int type spellings. Foreign decls are +// treated as RAW — their names are never type-classified nor reserved-checked +// — so the generated `#foreign` bindings import and call without hand-edits +// (no backticks needed). Before issue 0089 this errored with "'s1' is a +// reserved type name and cannot be used as an identifier". +// Regression (issue 0089). +#import "modules/std.sx"; + +#import c { + #include "1220-ffi-c-import-reserved-name-params.h"; + #source "1220-ffi-c-import-reserved-name-params.c"; +}; + +main :: () -> s32 { + print("pick(10,20,0) = {}\n", ffi_pick(10, 20, 0)); + print("pick(10,20,1) = {}\n", ffi_pick(10, 20, 1)); + print("sum(10,20) = {}\n", ffi_sum(10, 20)); + 0 +} diff --git a/examples/expected/0151-types-backtick-raw-identifier.exit b/examples/expected/0151-types-backtick-raw-identifier.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0151-types-backtick-raw-identifier.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0151-types-backtick-raw-identifier.stderr b/examples/expected/0151-types-backtick-raw-identifier.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0151-types-backtick-raw-identifier.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0151-types-backtick-raw-identifier.stdout b/examples/expected/0151-types-backtick-raw-identifier.stdout new file mode 100644 index 0000000..6ca57e4 --- /dev/null +++ b/examples/expected/0151-types-backtick-raw-identifier.stdout @@ -0,0 +1,4 @@ +local = 8 +global = 100 +fn = 42 +field = 2.500000 9 diff --git a/examples/expected/1220-ffi-c-import-reserved-name-params.exit b/examples/expected/1220-ffi-c-import-reserved-name-params.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/1220-ffi-c-import-reserved-name-params.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/1220-ffi-c-import-reserved-name-params.stderr b/examples/expected/1220-ffi-c-import-reserved-name-params.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1220-ffi-c-import-reserved-name-params.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/1220-ffi-c-import-reserved-name-params.stdout b/examples/expected/1220-ffi-c-import-reserved-name-params.stdout new file mode 100644 index 0000000..78b2f72 --- /dev/null +++ b/examples/expected/1220-ffi-c-import-reserved-name-params.stdout @@ -0,0 +1,3 @@ +pick(10,20,0) = 10 +pick(10,20,1) = 20 +sum(10,20) = 30 diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md new file mode 100644 index 0000000..25304fe --- /dev/null +++ b/issues/0089-backtick-raw-identifier.md @@ -0,0 +1,91 @@ +# 0089 — backtick raw-identifier escape + `#import c` foreign-name exemption from the reserved-type-name rule + +> **✅ RESOLVED** (foundation step F0.6). Two mechanisms, per Agra's design ruling: +> +> 1. **Backtick raw identifier.** The lexer recognises a leading backtick +> (`` `s2 ``) and emits an `.identifier` token whose span excludes the backtick, +> carrying a new `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). A raw +> identifier is NEVER type-classified — the parser skips `Type.fromName` for it +> in expression position ([src/parser.zig] `parsePrimary`), so it is always a +> value identifier. The flag threads to `VarDecl.is_raw` / `Param.is_raw` +> ([src/ast.zig]) at the binding sites, and `UnknownTypeChecker` skips the +> reserved-name check for raw bindings ([src/ir/semantic_diagnostics.zig]). +> Because the token tag stays `.identifier`, the escape works in every position +> (local, global, param, field, function name, struct member, later reference) +> with no per-site parser change. +> 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign +> `#foreign` decls with `Param.is_raw = true`, so generated C param names that +> collide with reserved type names (`s1`, `s2`) import unedited. +> +> A *bare* reserved-name binding in sx still errors (issue 0076 preserved): the +> `is_raw`-gated skip only fires for backtick / foreign names. Regression tests: +> `examples/0151-types-backtick-raw-identifier.sx` (backtick, every position), +> `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign exemption), +> `examples/1119-diagnostics-reserved-type-name-as-identifier.sx` (negative — +> bare binding still rejected). Backtick lexer unit tests in `src/lexer.zig`. +> +> The original report is preserved below. + +--- + +## Symptom + +Importing non-sx source whose names collide with sx reserved type names is +rejected. `library/modules/stb_truetype.sx` is a `#import c { ... }` block over a +vendored C header (`vendors/stb_truetype/stb_truetype.h`); C identifiers `s1`, +`s2` (which collide with sx's signed-int type keywords `s1`..`sN`) produce: + +``` +error: 's1' is a reserved type name and cannot be used as an identifier +error: 's2' is a reserved type name and cannot be used as an identifier +``` + +The user cannot hand-edit these — they are generated from the vendored C header. +Separately, sx-authored code has NO way to deliberately use a reserved-name-spelled +identifier even when it wants to. + +## Root cause + +The parser classifies any reserved-type-name spelling (`s2`, `u8`, `f64`, …) as a +`.type_expr` via `name_class.Type.fromName`, never as an `.identifier`. The F0.1 / +issue-0076 fix added `UnknownTypeChecker.checkBindingName` +(`src/ir/semantic_diagnostics.zig`) to reject a value binding / param spelled as +a reserved type name (the `.type_expr`-vs-`.identifier` mismatch otherwise breaks +address-of / autoref lowering). F0.1 deliberately extended this check to imported +declarations — which is what now fires on the C-imported `s1`/`s2`. + +## Desired behaviour (Agra ruling) + +External / imported source does NOT need to conform to sx naming standards. Two +mechanisms: + +1. **Auto-exempt imports.** `#import c` (and other foreign) declarations are + treated as RAW identifiers: foreign names are never type-classified and never + reserved-checked, so generated bindings "just work" with zero user edits. +2. **Backtick raw-identifier for sx code.** A leading backtick makes the following + identifier raw — an identifier that is NEVER type-classified, so it bypasses the + reserved-name rule: + + ```sx + `s2 := 2.5; // OK — identifier "s2", distinct from the s2 signed-int type + s2 := 2.5; // ERROR — bare s2 is still the reserved type name + ``` + + Prefix form (single leading backtick on the identifier). The raw identifier's + TEXT is `s2` (the backtick is not part of the name). A bare `s2` used as a TYPE + remains the signed-int type. + +## Reproduction + +sx-side (minimal): + +```sx +#import "modules/std.sx"; +main :: () { + `s2 := 2.5; // must compile: identifier s2 = 2.5 + print("{}\n", `s2); // 2.5 +} +``` + +Import-side: a `#import c` block over a header declaring `int s1, s2;` (or +`stb_truetype.sx`) must NOT emit the reserved-type-name error. diff --git a/readme.md b/readme.md index 16d0632..a1ec6dd 100644 --- a/readme.md +++ b/readme.md @@ -105,6 +105,19 @@ y : s32 = 0; // explicit type z : s32 = ---; // uninitialized ``` +Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and can't be used +as bare value identifiers. A leading backtick escapes one into a raw identifier — its +text drops the backtick and it's never read as a type — so reserved spellings (and +keywords) work as ordinary names: + +```sx +`s2 := 2.5; // value identifier "s2", distinct from the s2 type +print("{}\n", `s2); // 2.5 +``` + +Foreign declarations from `#import c { … }` are exempt automatically: C names that +collide with reserved type names (e.g. `s1`, `s2`) import unedited. + ### Structs ```sx diff --git a/specs.md b/specs.md index 3b703db..fe0290b 100644 --- a/specs.md +++ b/specs.md @@ -13,6 +13,50 @@ Line comments start with `//` and extend to end of line. - UPPER_SNAKE_CASE for constants: `SOME_INT`, `SOME_STR` - PascalCase for types: `Foo` +#### Reserved type names + +A spelling that names a builtin type — the arbitrary-width integers `s1`..`s64` / +`u1`..`u64`, plus `bool`, `string`, `void`, `f32`, `f64`, `usize`, `isize`, `Any` — +is reserved. A bare value binding (`:=` / typed local / parameter name) spelled as +one of these is rejected: such a spelling parses as a *type*, not a value, so the +address-of / autoref paths would mis-lower it. + +```sx +s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier +``` + +#### Backtick raw-identifier escape + +A leading backtick makes the following identifier **raw**: its text excludes the +backtick and it is never type-classified, so a reserved-type-name spelling can be +used as an ordinary value identifier. The backtick is required at every occurrence +of that identifier (declaration and each reference); a *bare* `s2` is still the +signed-int type. + +```sx +`s2 := 2.5; // OK — value identifier "s2", distinct from the s2 type +print("{}\n", `s2); // 2.5 +``` + +The escape works in every identifier position — local, global, parameter, struct +field, function name, and a later reference: + +```sx +`u8 := 100; // global +`s2 :: (`s1: s64) -> s64 { `s1 } // function name + parameter +P :: struct { `s2: f64; } // struct field +``` + +A backtick may also escape a keyword spelling (`` `for ``, `` `struct ``), yielding +an identifier with that text. + +**`#import c` exemption.** Foreign declarations synthesized by an `#import c { … }` +block are treated as raw automatically: a generated C parameter or name that +collides with a reserved type name (e.g. `s1`, `s2`) imports unedited, with no +backticks and no reserved-name error. The exemption is scoped to the foreign decls — +it does not make a foreign `s2` usable as the sx `s2` type, nor relax the rule for +hand-written sx code. + ### Literals | Kind | Examples | Type | diff --git a/src/ast.zig b/src/ast.zig index fa085e5..f6c7251 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -148,6 +148,10 @@ pub const Param = struct { /// Optional default value expression. When the caller omits this /// parameter, lowering substitutes this expression in its place. default_expr: ?*Node = null, + /// True when the param name was written as a backtick raw identifier + /// (`` `s2 ``) or synthesized by a `#import c` foreign decl. A raw name is + /// exempt from the reserved-type-name binding check (issue 0089). + is_raw: bool = false, }; pub const Block = struct { @@ -303,6 +307,10 @@ pub const VarDecl = struct { is_foreign: bool = false, foreign_lib: ?[]const u8 = null, foreign_name: ?[]const u8 = null, + /// True when the binding name was written as a backtick raw identifier + /// (`` `s2 := … ``). A raw name is exempt from the reserved-type-name + /// binding check (issue 0089). + is_raw: bool = false, }; pub const Assignment = struct { diff --git a/src/c_import.zig b/src/c_import.zig index 2ce1769..29e21a2 100644 --- a/src/c_import.zig +++ b/src/c_import.zig @@ -127,6 +127,10 @@ pub fn processCImport( .name = pname, .name_span = .{ .start = 0, .end = 0 }, .type_expr = ptype_node, + // Foreign C param names (`s1`, `s2`, …) are RAW — exempt from + // the reserved-type-name binding check; generated bindings + // must import without hand-edits (issue 0089). + .is_raw = true, }); } diff --git a/src/ir/semantic_diagnostics.zig b/src/ir/semantic_diagnostics.zig index f0f5bb3..03814df 100644 --- a/src/ir/semantic_diagnostics.zig +++ b/src/ir/semantic_diagnostics.zig @@ -117,7 +117,7 @@ pub const UnknownTypeChecker = struct { switch (node.data) { // ── Binding-introducing nodes: check the name(s), then recurse. ── .var_decl => |vd| { - self.checkBindingName(vd.name, vd.name_span); + if (!vd.is_raw) self.checkBindingName(vd.name, vd.name_span); if (vd.value) |v| self.checkBindingNames(v); }, .destructure_decl => |dd| { @@ -133,7 +133,7 @@ pub const UnknownTypeChecker = struct { self.checkBindingNames(lm.body); }, .param => |p| { - self.checkBindingName(p.name, p.name_span); + if (!p.is_raw) self.checkBindingName(p.name, p.name_span); if (p.default_expr) |de| self.checkBindingNames(de); }, .if_expr => |ie| { @@ -316,7 +316,9 @@ pub const UnknownTypeChecker = struct { /// (a lambda default), so recurse into it. fn checkParamNames(self: UnknownTypeChecker, params: []const ast.Param) void { for (params) |p| { - self.checkBindingName(p.name, p.name_span); + // A backtick raw param (`` (`s2: T) ``) or a `#import c` foreign + // param is exempt from the reserved-type-name rule (issue 0089). + if (!p.is_raw) self.checkBindingName(p.name, p.name_span); if (p.default_expr) |de| self.checkBindingNames(de); } } diff --git a/src/lexer.zig b/src/lexer.zig index 1cbc901..1f2cffd 100644 --- a/src/lexer.zig +++ b/src/lexer.zig @@ -50,6 +50,24 @@ pub const Lexer = struct { return self.lexString(start); } + // Raw-identifier escape: `ident — a leading backtick forces the + // following identifier to be RAW (never type-classified, never + // reserved-checked). The emitted token's span excludes the backtick, so + // its text is the bare name, and a backticked keyword spelling + // (`` `s2 ``, `` `string ``) is still an `.identifier`, never a keyword. + if (c == '`') { + const id_start = start + 1; + if (id_start < self.source.len and isIdentStart(self.source[id_start])) { + self.index = id_start; + var tok = self.lexIdentifier(id_start); + tok.tag = .identifier; + tok.is_raw = true; + return tok; + } + self.index += 1; + return self.makeToken(.invalid, start, self.index); + } + // Directives: #import, #insert, #run, #builtin, #foreign, #library, #string if (c == '#') { @@ -485,6 +503,38 @@ test "lex type-like identifiers" { } } +test "lex backtick raw identifier" { + const source: [:0]const u8 = "`s2 `string `for"; + var lex = Lexer.init(source); + // Each is an `.identifier` carrying `is_raw`, even a keyword spelling + // (`for`), with text that excludes the leading backtick. + const t1 = lex.next(); + try std.testing.expectEqual(Tag.identifier, t1.tag); + try std.testing.expect(t1.is_raw); + try std.testing.expectEqualStrings("s2", t1.slice(source)); + const t2 = lex.next(); + try std.testing.expectEqual(Tag.identifier, t2.tag); + try std.testing.expect(t2.is_raw); + try std.testing.expectEqualStrings("string", t2.slice(source)); + const t3 = lex.next(); + try std.testing.expectEqual(Tag.identifier, t3.tag); + try std.testing.expect(t3.is_raw); + try std.testing.expectEqualStrings("for", t3.slice(source)); + try std.testing.expectEqual(Tag.eof, lex.next().tag); +} + +test "lex bare identifier is not raw" { + var lex = Lexer.init("s2"); + const tok = lex.next(); + try std.testing.expectEqual(Tag.identifier, tok.tag); + try std.testing.expect(!tok.is_raw); +} + +test "lex lone backtick is invalid" { + var lex = Lexer.init("` 5"); + try std.testing.expectEqual(Tag.invalid, lex.next().tag); +} + test "lex hash_run" { var lex = Lexer.init("#run"); try std.testing.expectEqual(Tag.hash_run, lex.next().tag); diff --git a/src/parser.zig b/src/parser.zig index 1aa35d3..a18a7e9 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -146,6 +146,7 @@ pub const Parser = struct { } const name = self.tokenSlice(self.current); const name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const name_is_raw = self.current.is_raw; self.advance(); // IDENT :: ... @@ -158,7 +159,7 @@ pub const Parser = struct { // IDENT : type = value; (typed variable) if (self.current.tag == .colon) { self.advance(); - return self.parseTypedBinding(name, name_span, start); + return self.parseTypedBinding(name, name_span, start, name_is_raw); } // IDENT := value; (variable) @@ -166,7 +167,7 @@ pub const Parser = struct { self.advance(); const value = try self.parseExpr(); try self.expectSemicolonAfter(value); - return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value } }); + return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value, .is_raw = name_is_raw } }); } return self.fail("expected '::', ':=', or ':' after identifier"); @@ -383,7 +384,7 @@ pub const Parser = struct { } }); } - fn parseTypedBinding(self: *Parser, name: []const u8, name_span: ast.Span, start_pos: u32) anyerror!*Node { + fn parseTypedBinding(self: *Parser, name: []const u8, name_span: ast.Span, start_pos: u32, name_is_raw: bool) anyerror!*Node { // After `name :` // Parse type const type_node = try self.parseTypeExpr(); @@ -401,13 +402,13 @@ pub const Parser = struct { self.advance(); const value = try self.parseExpr(); try self.expectSemicolonAfter(value); - return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = value } }); + return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = value, .is_raw = name_is_raw } }); } if (self.current.tag == .semicolon) { // name : type; (default-initialized variable) self.advance(); - return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = null } }); + return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = null, .is_raw = name_is_raw } }); } if (self.current.tag == .hash_foreign) { @@ -433,6 +434,7 @@ pub const Parser = struct { .is_foreign = true, .foreign_lib = lib_ref, .foreign_name = c_name, + .is_raw = name_is_raw, } }); } @@ -1778,11 +1780,12 @@ pub const Parser = struct { } const param_name = self.tokenSlice(self.current); const param_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const param_is_raw = self.current.is_raw; self.advance(); // Optional type annotation: if no ':', infer type from context if (self.current.tag != .colon) { const inferred_node = try self.createNode(param_name_span.start, .{ .inferred_type = {} }); - try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = inferred_node, .is_variadic = is_variadic, .is_comptime = is_ct_param }); + try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = inferred_node, .is_variadic = is_variadic, .is_comptime = is_ct_param, .is_raw = param_is_raw }); continue; } self.advance(); // consume ':' @@ -1822,7 +1825,7 @@ pub const Parser = struct { .type_expr, .parameterized_type_expr => true, else => false, }; - try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = param_type, .is_variadic = is_variadic, .is_comptime = is_comptime_param, .is_pack = is_pack, .default_expr = default_expr }); + try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = param_type, .is_variadic = is_variadic, .is_comptime = is_comptime_param, .is_pack = is_pack, .default_expr = default_expr, .is_raw = param_is_raw }); } for (params.items, 0..) |param, i| { if (param.is_variadic and i != params.items.len - 1) { @@ -2023,6 +2026,7 @@ pub const Parser = struct { const start = self.current.loc.start; const name = self.tokenSlice(self.current); const name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const name_is_raw = self.current.is_raw; self.advance(); if (self.current.tag == .colon_colon) { @@ -2033,11 +2037,11 @@ pub const Parser = struct { self.advance(); const value = try self.parseExpr(); try self.expectSemicolonAfter(value); - return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value } }); + return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value, .is_raw = name_is_raw } }); } if (self.current.tag == .colon) { self.advance(); - return self.parseTypedBinding(name, name_span, start); + return self.parseTypedBinding(name, name_span, start, name_is_raw); } // Multi-target assignment: ident, expr, ... = expr, expr, ...; @@ -2686,8 +2690,11 @@ pub const Parser = struct { }, .identifier => { const name = self.tokenSlice(self.current); - // Check if this identifier is a type name (e.g. s32, u8, s128) - if (Type.fromName(name) != null) { + // A backtick raw identifier (`` `s2 ``) is NEVER type-classified — + // it is always a value identifier, bypassing the reserved-type-name + // rule (issue 0089). Only a bare spelling is checked for a type name + // (e.g. s32, u8, s128). + if (!self.current.is_raw and Type.fromName(name) != null) { self.advance(); return try self.createNode(start, .{ .type_expr = .{ .name = name } }); } diff --git a/src/token.zig b/src/token.zig index ae5ac02..109a52f 100644 --- a/src/token.zig +++ b/src/token.zig @@ -210,6 +210,12 @@ pub const Tag = enum { pub const Token = struct { tag: Tag, loc: Loc, + /// True when an `.identifier` was introduced by a leading backtick + /// (`` `s2 ``): a RAW identifier whose text excludes the backtick and which + /// the parser must NEVER type-classify (it bypasses the reserved-type-name + /// rule). `loc` already spans only the un-backticked name, so `slice` returns + /// the bare text. + is_raw: bool = false, pub const Loc = struct { start: u32, From 640f59dc54061d2636159de9bebac3e6f3ba7f24 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 18:31:08 +0300 Subject: [PATCH 03/11] feat(lang): backtick raw identifier in every binding form + raw-not-a-type + foreign reserved-name fn bare-call [F0.6] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the issue-0089 backtick raw-identifier / `#import c` exemption across all remaining identifier positions and closes three boundary gaps the F0.6 review found. 1. Exhaustive raw-binding coverage. The `is_raw` bit now threads through `ast.Identifier` and EVERY binding/capture form — `IfExpr`/`WhileExpr` optional bindings, `ForExpr` capture + index, `MatchArm` capture, `CatchExpr`/`OnFailStmt` tag bindings, `DestructureDecl` per-name, and the protocol-default-body / foreign-class method param lists — not just `var_decl`/`param`. `UnknownTypeChecker` skips the reserved-name check at each arm when raw, so a backtick works in every identifier position while a bare reserved spelling still errors (issue 0076 preserved). 2. Raw identifier is never a type. `parseTypeExpr`'s atom rejects a raw identifier in type position (`x : `s2 = 1`, `List(`s2)`) with an accurate diagnostic instead of silently type-classifying it. 3. Reserved-name function bare-callable. A bare `s2(4)` parses its callee as a `.type_expr` (reserved spelling); `lowerCall` now rewrites a type_expr callee to an identifier when a function of that name is in scope, so a backtick-declared sx fn and a `#import c` foreign fn whose C name collides with a reserved type spelling both resolve by their bare name. (`TypeName(val)` is not a cast, so there is no ambiguity.) Tests: examples/0152 (every control-flow/capture form + bare ref/call/member access), examples/1054 (catch/onfail tag bindings), examples/1139 (raw in type position rejected), examples/1220 extended (foreign reserved-name function bare-call). 0076 negatives 1119/1121/1122/1123/1124/1125 stay green. Gate: zig build + zig build test + 422 examples pass. specs.md + readme.md updated; issues/0089 RESOLVED banner refreshed. --- examples/0152-types-backtick-control-flow.sx | 57 +++++++++++++++++++ .../1054-errors-backtick-reserved-binding.sx | 40 +++++++++++++ ...139-diagnostics-backtick-raw-not-a-type.sx | 12 ++++ .../1220-ffi-c-import-reserved-name-params.c | 4 ++ .../1220-ffi-c-import-reserved-name-params.h | 8 ++- .../1220-ffi-c-import-reserved-name-params.sx | 16 ++++-- .../0152-types-backtick-control-flow.exit | 1 + .../0152-types-backtick-control-flow.stderr | 1 + .../0152-types-backtick-control-flow.stdout | 8 +++ ...1054-errors-backtick-reserved-binding.exit | 1 + ...54-errors-backtick-reserved-binding.stderr | 1 + ...54-errors-backtick-reserved-binding.stdout | 5 ++ ...9-diagnostics-backtick-raw-not-a-type.exit | 1 + ...diagnostics-backtick-raw-not-a-type.stderr | 5 ++ ...diagnostics-backtick-raw-not-a-type.stdout | 1 + ...0-ffi-c-import-reserved-name-params.stdout | 1 + issues/0089-backtick-raw-identifier.md | 38 +++++++++---- readme.md | 19 +++++-- specs.md | 38 +++++++++---- src/ast.zig | 37 ++++++++++++ src/ir/lower.zig | 22 ++++++- src/ir/semantic_diagnostics.zig | 44 ++++++++++---- src/parser.zig | 52 ++++++++++++++--- 23 files changed, 356 insertions(+), 56 deletions(-) create mode 100644 examples/0152-types-backtick-control-flow.sx create mode 100644 examples/1054-errors-backtick-reserved-binding.sx create mode 100644 examples/1139-diagnostics-backtick-raw-not-a-type.sx create mode 100644 examples/expected/0152-types-backtick-control-flow.exit create mode 100644 examples/expected/0152-types-backtick-control-flow.stderr create mode 100644 examples/expected/0152-types-backtick-control-flow.stdout create mode 100644 examples/expected/1054-errors-backtick-reserved-binding.exit create mode 100644 examples/expected/1054-errors-backtick-reserved-binding.stderr create mode 100644 examples/expected/1054-errors-backtick-reserved-binding.stdout create mode 100644 examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit create mode 100644 examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr create mode 100644 examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout diff --git a/examples/0152-types-backtick-control-flow.sx b/examples/0152-types-backtick-control-flow.sx new file mode 100644 index 0000000..7b75860 --- /dev/null +++ b/examples/0152-types-backtick-control-flow.sx @@ -0,0 +1,57 @@ +// Backtick raw identifier across every control-flow / capture / binding form, +// plus bare later uses. A reserved type-name spelling (`s2`, `u8`, …) works as a +// binding name in a destructure, an `if`/`while` optional binding, a `for` +// capture + index, and a match-arm capture; a backtick-named function is +// bare-callable; and a backtick struct field is bare- or backtick-accessible. +// The escape is needed only at the binding site — a later BARE reference / call +// / member access resolves to the binding. A *bare* binding name is still the +// reserved type (see examples/1121), so the escape is the only way to spell +// these as values. +// Regression (issue 0089 — attempt-2 completeness across binding forms). +#import "modules/std.sx"; + +pair :: () -> (s64, s64) { (1, 2) } +maybe :: () -> ?s64 { return 42; } + +// Function named with a reserved spelling — bare-callable (no backtick at call). +`s2 :: (n: s64) -> s64 { return n + 1; } + +Quad :: struct { `s1: s32; `s2: s32; } + +main :: () -> s32 { + // destructure binding names + `u8, rest := pair(); + print("dstr = {} {}\n", `u8, rest); + + // if optional binding + bare-position reference inside the branch + if `s16 := maybe() { + print("if = {}\n", `s16); + } + + // while optional binding (name only — the while binding isn't body-exposed) + while `s32 := maybe() { + break; + } + + // for capture + index names + xs := [3]s64.{ 10, 20, 30 }; + for xs: (`bool, `u16) { + print("for = {} @ {}\n", `bool, `u16); + } + + // match-arm capture + opt: ?s64 = 5; + m := if opt == { + case .some: (`string) { `string * 2 } + case .none: { 0 } + }; + print("match = {}\n", m); + + // backtick function called BARE and via backtick — both resolve to the fn + print("call = {} {}\n", s2(10), `s2(10)); + + // struct field named with a reserved spelling: bare + backtick member access + q := Quad.{ `s1 = 7, `s2 = 9 }; + print("field = {} {} | {} {}\n", q.s1, q.s2, q.`s1, q.`s2); + return 0; +} diff --git a/examples/1054-errors-backtick-reserved-binding.sx b/examples/1054-errors-backtick-reserved-binding.sx new file mode 100644 index 0000000..d323ada --- /dev/null +++ b/examples/1054-errors-backtick-reserved-binding.sx @@ -0,0 +1,40 @@ +// Backtick raw identifier as the error-tag binding of `catch` and `onfail`. A +// reserved type-name spelling (`s2`, `u8`) is a value name when backticked, so +// it is accepted as the tag binding and a later reference resolves to it. A +// *bare* reserved spelling in the same position is still rejected (see +// examples/1123), so the backtick escape is the only way to spell these tags. +// Regression (issue 0089 — attempt-2 catch/onfail coverage). +#import "modules/std.sx"; + +E :: error { Bad, Empty } + +parse :: (n: s32) -> (s32, !E) { + if n < 0 { raise error.Bad; } + if n == 0 { raise error.Empty; } + return n * 2; +} + +// `catch` tag binding spelled `s2`, referenced in the match body. +classify :: (n: s32) -> s32 { + return parse(n) catch `s2 == { + case .Bad: 1; + case .Empty: 2; + else: 3 + }; +} + +// `onfail` tag binding spelled `u8`, referenced in the cleanup body. +cleanup :: (n: s32) -> !E { + onfail `u8 { if `u8 == error.Bad { print("cleanup: bad\n"); } } + if n < 0 { raise error.Bad; } + return; +} + +main :: () -> s32 { + print("classify(-1) = {}\n", classify(-1)); + print("classify(0) = {}\n", classify(0)); + print("classify(5) = {}\n", classify(5)); + c := cleanup(-1); + print("done\n"); + return 0; +} diff --git a/examples/1139-diagnostics-backtick-raw-not-a-type.sx b/examples/1139-diagnostics-backtick-raw-not-a-type.sx new file mode 100644 index 0000000..bfdb943 --- /dev/null +++ b/examples/1139-diagnostics-backtick-raw-not-a-type.sx @@ -0,0 +1,12 @@ +// A backtick raw identifier is a VALUE-name escape; it is never a type. Using +// one in type position (`x : `s2 = 1`) is a clean parse error, not a silent +// type-classification — reserved type names are the lowercase `sN`/`uN`/`fNN` +// spellings, and a real type never needs a backtick. A *bare* `s2` in type +// position remains the reserved signed-int type. +// Regression (issue 0089 — attempt-2: raw identifier rejected in type position). +#import "modules/std.sx"; + +main :: () -> s32 { + x : `s2 = 1; + return 0; +} diff --git a/examples/1220-ffi-c-import-reserved-name-params.c b/examples/1220-ffi-c-import-reserved-name-params.c index 560ab3b..120a0dd 100644 --- a/examples/1220-ffi-c-import-reserved-name-params.c +++ b/examples/1220-ffi-c-import-reserved-name-params.c @@ -7,3 +7,7 @@ int ffi_pick(int s1, int s2, int which) { int ffi_sum(int s1, int s2) { return s1 + s2; } + +int s2(int u8) { + return u8 + 100; +} diff --git a/examples/1220-ffi-c-import-reserved-name-params.h b/examples/1220-ffi-c-import-reserved-name-params.h index 33929c6..7c1bdea 100644 --- a/examples/1220-ffi-c-import-reserved-name-params.h +++ b/examples/1220-ffi-c-import-reserved-name-params.h @@ -1,5 +1,7 @@ -/* Foreign C declarations whose parameter names (`s1`, `s2`) collide with - sx's reserved signed-int type spellings. The `#import c` exemption must - accept these generated names unedited (issue 0089). */ +/* Foreign C declarations whose names collide with sx's reserved type spellings. + The `#import c` exemption must accept these generated names unedited, both as + parameter names (`s1`, `s2`) and as a FUNCTION name (`s2`) — and a foreign + reserved-name function must be bare-callable (issue 0089). */ int ffi_pick(int s1, int s2, int which); int ffi_sum(int s1, int s2); +int s2(int u8); diff --git a/examples/1220-ffi-c-import-reserved-name-params.sx b/examples/1220-ffi-c-import-reserved-name-params.sx index dd25659..f9f3284 100644 --- a/examples/1220-ffi-c-import-reserved-name-params.sx +++ b/examples/1220-ffi-c-import-reserved-name-params.sx @@ -1,9 +1,12 @@ -// `#import c` foreign-name exemption: a C header's parameter names `s1`/`s2` -// collide with sx's reserved signed-int type spellings. Foreign decls are -// treated as RAW — their names are never type-classified nor reserved-checked -// — so the generated `#foreign` bindings import and call without hand-edits -// (no backticks needed). Before issue 0089 this errored with "'s1' is a -// reserved type name and cannot be used as an identifier". +// `#import c` foreign-name exemption: C names that collide with sx's reserved +// type spellings import unedited. Foreign decls are treated as RAW — their names +// are never type-classified nor reserved-checked — so the generated `#foreign` +// bindings import and call without hand-edits (no backticks needed). This covers +// parameter names (`s1`/`s2`), a function whose own NAME is a reserved spelling +// (`s2`), and bare-calling that function (its callee spelling parses as a type +// but resolves to the foreign fn). Before issue 0089 the params errored with +// "'s1' is a reserved type name and cannot be used as an identifier", and the +// bare call errored with "unresolved 's2'". // Regression (issue 0089). #import "modules/std.sx"; @@ -16,5 +19,6 @@ main :: () -> s32 { print("pick(10,20,0) = {}\n", ffi_pick(10, 20, 0)); print("pick(10,20,1) = {}\n", ffi_pick(10, 20, 1)); print("sum(10,20) = {}\n", ffi_sum(10, 20)); + print("s2(4) bare = {}\n", s2(4)); 0 } diff --git a/examples/expected/0152-types-backtick-control-flow.exit b/examples/expected/0152-types-backtick-control-flow.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0152-types-backtick-control-flow.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0152-types-backtick-control-flow.stderr b/examples/expected/0152-types-backtick-control-flow.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0152-types-backtick-control-flow.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0152-types-backtick-control-flow.stdout b/examples/expected/0152-types-backtick-control-flow.stdout new file mode 100644 index 0000000..834e758 --- /dev/null +++ b/examples/expected/0152-types-backtick-control-flow.stdout @@ -0,0 +1,8 @@ +dstr = 1 2 +if = 42 +for = 10 @ 0 +for = 20 @ 1 +for = 30 @ 2 +match = 10 +call = 11 11 +field = 7 9 | 7 9 diff --git a/examples/expected/1054-errors-backtick-reserved-binding.exit b/examples/expected/1054-errors-backtick-reserved-binding.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/1054-errors-backtick-reserved-binding.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/1054-errors-backtick-reserved-binding.stderr b/examples/expected/1054-errors-backtick-reserved-binding.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1054-errors-backtick-reserved-binding.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/1054-errors-backtick-reserved-binding.stdout b/examples/expected/1054-errors-backtick-reserved-binding.stdout new file mode 100644 index 0000000..4a9a95f --- /dev/null +++ b/examples/expected/1054-errors-backtick-reserved-binding.stdout @@ -0,0 +1,5 @@ +classify(-1) = 1 +classify(0) = 2 +classify(5) = 10 +cleanup: bad +done diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr new file mode 100644 index 0000000..8608dcb --- /dev/null +++ b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr @@ -0,0 +1,5 @@ +error: `s2` is a raw identifier, not a type — the backtick escape names a value, never a type + --> examples/1139-diagnostics-backtick-raw-not-a-type.sx:10:10 + | +10 | x : `s2 = 1; + | ^^ diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout @@ -0,0 +1 @@ + diff --git a/examples/expected/1220-ffi-c-import-reserved-name-params.stdout b/examples/expected/1220-ffi-c-import-reserved-name-params.stdout index 78b2f72..7c90d43 100644 --- a/examples/expected/1220-ffi-c-import-reserved-name-params.stdout +++ b/examples/expected/1220-ffi-c-import-reserved-name-params.stdout @@ -1,3 +1,4 @@ pick(10,20,0) = 10 pick(10,20,1) = 20 sum(10,20) = 30 +s2(4) bare = 104 diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index 25304fe..ff0e275 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -4,25 +4,41 @@ > > 1. **Backtick raw identifier.** The lexer recognises a leading backtick > (`` `s2 ``) and emits an `.identifier` token whose span excludes the backtick, -> carrying a new `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). A raw +> carrying a `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). A raw > identifier is NEVER type-classified — the parser skips `Type.fromName` for it > in expression position ([src/parser.zig] `parsePrimary`), so it is always a -> value identifier. The flag threads to `VarDecl.is_raw` / `Param.is_raw` -> ([src/ast.zig]) at the binding sites, and `UnknownTypeChecker` skips the -> reserved-name check for raw bindings ([src/ir/semantic_diagnostics.zig]). -> Because the token tag stays `.identifier`, the escape works in every position -> (local, global, param, field, function name, struct member, later reference) -> with no per-site parser change. +> value identifier. The `is_raw` flag threads through `ast.Identifier` and EVERY +> binding/capture form ([src/ast.zig]): `VarDecl` / `Param` plus `IfExpr` / +> `WhileExpr` optional bindings, `ForExpr` capture + index, `MatchArm` capture, +> `CatchExpr` / `OnFailStmt` tag bindings, `DestructureDecl` per-name, and the +> protocol-default-body / foreign-class method param lists. `UnknownTypeChecker` +> skips the reserved-name check at each of those arms when raw +> ([src/ir/semantic_diagnostics.zig]). The backtick works in every identifier +> position (local, global, param, field, function name, struct member, later +> reference, and all the control-flow/capture/binding forms). > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign > `#foreign` decls with `Param.is_raw = true`, so generated C param names that > collide with reserved type names (`s1`, `s2`) import unedited. > +> **Boundary rules.** A raw identifier is a value name and is NEVER a type: using +> one in type position (`x : `s2 = 1`) is a clean parse error ([src/parser.zig] +> `parseTypeExpr` atom). A reserved-spelled FUNCTION (backtick-declared or +> `#import c` foreign) is bare-callable: `lowerCall` rewrites a `.type_expr` callee +> to an identifier when a function of that name is in scope ([src/ir/lower.zig]), +> so `s2(4)` resolves to the function (`TypeName(val)` is not a cast). A later BARE +> reference in value position resolves to the binding; a bare `s2` in type position +> is still the type. +> > A *bare* reserved-name binding in sx still errors (issue 0076 preserved): the > `is_raw`-gated skip only fires for backtick / foreign names. Regression tests: -> `examples/0151-types-backtick-raw-identifier.sx` (backtick, every position), -> `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign exemption), -> `examples/1119-diagnostics-reserved-type-name-as-identifier.sx` (negative — -> bare binding still rejected). Backtick lexer unit tests in `src/lexer.zig`. +> `examples/0151-types-backtick-raw-identifier.sx` (backtick, decl positions), +> `examples/0152-types-backtick-control-flow.sx` (every control-flow/capture form +> + bare ref/call/member access), `examples/1054-errors-backtick-reserved-binding.sx` +> (`catch`/`onfail` tag bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` +> (foreign param + function-name exemption, bare-callable foreign fn), +> `examples/1139-diagnostics-backtick-raw-not-a-type.sx` (negative — raw in type +> position), `examples/1119`/`1121`/`1123` (negative — bare reserved binding still +> rejected across all forms). Backtick lexer unit tests in `src/lexer.zig`. > > The original report is preserved below. diff --git a/readme.md b/readme.md index a1ec6dd..79b91d8 100644 --- a/readme.md +++ b/readme.md @@ -106,17 +106,26 @@ z : s32 = ---; // uninitialized ``` Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and can't be used -as bare value identifiers. A leading backtick escapes one into a raw identifier — its -text drops the backtick and it's never read as a type — so reserved spellings (and -keywords) work as ordinary names: +as bare value identifiers. A leading backtick at the **binding site** escapes one +into a raw identifier — its text drops the backtick and it's never read as a type — +so reserved spellings (and keywords) work as ordinary names. The backtick is needed +only where the name is declared; a later bare reference in value position resolves +to the binding, while a bare `s2` in type position is still the type. It works in +every identifier position (local, global, parameter, field, function name, and the +control-flow / capture / binding forms — destructure, `if`/`while` binding, `for` +capture, match capture, `catch`/`onfail` tag), and a reserved-spelled function is +bare-callable: ```sx `s2 := 2.5; // value identifier "s2", distinct from the s2 type -print("{}\n", `s2); // 2.5 +print("{}\n", `s2); // 2.5 (or bare `s2`) ``` +A raw identifier is a value name, never a type — `x : `s2 = 1` is an error. + Foreign declarations from `#import c { … }` are exempt automatically: C names that -collide with reserved type names (e.g. `s1`, `s2`) import unedited. +collide with reserved type names (e.g. `s1`, `s2`) import unedited, and a foreign +reserved-name function is bare-callable by its C name. ### Structs diff --git a/specs.md b/specs.md index fe0290b..864b0e2 100644 --- a/specs.md +++ b/specs.md @@ -29,32 +29,50 @@ s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an ide A leading backtick makes the following identifier **raw**: its text excludes the backtick and it is never type-classified, so a reserved-type-name spelling can be -used as an ordinary value identifier. The backtick is required at every occurrence -of that identifier (declaration and each reference); a *bare* `s2` is still the -signed-int type. +used as an ordinary value identifier. The backtick is required at the **binding +site** — the declaration that introduces the name — to escape the reserved-name +rule. A later reference is resolved by position: in **value** position a bare `s2` +resolves to the binding; in **type** position a bare `s2` is still the signed-int +type. ```sx `s2 := 2.5; // OK — value identifier "s2", distinct from the s2 type -print("{}\n", `s2); // 2.5 +print("{}\n", `s2); // 2.5 (backtick reference) +print("{}\n", s2); // 2.5 (bare reference, resolves to the binding) +x : s2 = 3; // bare `s2` in TYPE position is still the s2 type ``` -The escape works in every identifier position — local, global, parameter, struct -field, function name, and a later reference: +A raw identifier is a value name and is **never a type**: using one in type +position (`x : `s2 = 1`) is a parse error. + +The escape works in **every identifier position** — local, global, parameter, +struct field, function name, a later reference, and every control-flow / capture / +binding form: a destructure name, an `if` / `while` optional binding, a `for` +capture and index, a match-arm capture, and a `catch` / `onfail` tag binding: ```sx `u8 := 100; // global `s2 :: (`s1: s64) -> s64 { `s1 } // function name + parameter P :: struct { `s2: f64; } // struct field +`u8, rest := pair(); // destructure name +if `s16 := maybe() { } // optional binding +for xs: (`bool, `u16) { } // for capture + index +x catch `s2 { } // catch tag binding ``` +A reserved-spelled **function** is bare-callable: `` `s2 :: (n: s64) -> s64 { … } `` +can be invoked as `s2(10)` (the callee spelling parses as a type but resolves to +the function when one of that name is in scope; `TypeName(val)` is not a cast). + A backtick may also escape a keyword spelling (`` `for ``, `` `struct ``), yielding an identifier with that text. **`#import c` exemption.** Foreign declarations synthesized by an `#import c { … }` -block are treated as raw automatically: a generated C parameter or name that -collides with a reserved type name (e.g. `s1`, `s2`) imports unedited, with no -backticks and no reserved-name error. The exemption is scoped to the foreign decls — -it does not make a foreign `s2` usable as the sx `s2` type, nor relax the rule for +block are treated as raw automatically: a generated C parameter or function name +that collides with a reserved type name (e.g. `s1`, `s2`) imports unedited, with no +backticks and no reserved-name error, and a foreign reserved-name function is +bare-callable by its C name. The exemption is scoped to the foreign decls — it does +not make a foreign `s2` usable as the sx `s2` type, nor relax the rule for hand-written sx code. ### Literals diff --git a/src/ast.zig b/src/ast.zig index f6c7251..23b7213 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -188,6 +188,10 @@ pub const StringLiteral = struct { pub const Identifier = struct { name: []const u8, + /// True when written as a backtick raw identifier (`` `s2 ``). Carried so a + /// destructure target (`` `s2, b := … ``) can be recognised as raw and + /// exempted from the reserved-type-name binding check (issue 0089). + is_raw: bool = false, }; pub const EnumLiteral = struct { @@ -277,6 +281,9 @@ pub const IfExpr = struct { is_comptime: bool = false, // true for `inline if` — compile-time branch elimination binding_name: ?[]const u8 = null, // for `if val := expr { ... }` optional binding binding_span: ?Span = null, // span of `binding_name` (set iff `binding_name` is) + /// True when the optional binding was a backtick raw identifier + /// (`` if `s2 := … ``) — exempt from the reserved-type-name check (issue 0089). + binding_is_raw: bool = false, }; pub const MatchExpr = struct { @@ -291,6 +298,9 @@ pub const MatchArm = struct { is_break: bool, capture: ?[]const u8 = null, // payload binding name: case .variant: (name) { ... } capture_span: ?Span = null, // span of `capture` (set iff `capture` is) + /// True when the capture was a backtick raw identifier + /// (`` case .v: (`s2) ``) — exempt from the reserved-type-name check (issue 0089). + capture_is_raw: bool = false, }; pub const ConstDecl = struct { @@ -341,6 +351,10 @@ pub const MultiAssign = struct { pub const DestructureDecl = struct { names: []const []const u8, name_spans: []const Span, // one per entry in `names`, same order + /// One per entry in `names`, same order: true when that target was a + /// backtick raw identifier (`` `s2, b := … ``) — exempt from the + /// reserved-type-name binding check (issue 0089). + name_is_raw: []const bool, value: *Node, }; @@ -462,6 +476,9 @@ pub const CatchExpr = struct { operand: *Node, binding: ?[]const u8 = null, binding_span: ?Span = null, // span of `binding` (set iff `binding` is) + /// True when the binding was a backtick raw identifier + /// (`` x catch `s2 { … } ``) — exempt from the reserved-type-name check (issue 0089). + binding_is_raw: bool = false, body: *Node, is_match_body: bool = false, }; @@ -472,6 +489,9 @@ pub const CatchExpr = struct { pub const OnFailStmt = struct { binding: ?[]const u8 = null, binding_span: ?Span = null, // span of `binding` (set iff `binding` is) + /// True when the binding was a backtick raw identifier + /// (`` onfail `s2 { … } ``) — exempt from the reserved-type-name check (issue 0089). + binding_is_raw: bool = false, body: *Node, }; @@ -566,6 +586,9 @@ pub const WhileExpr = struct { body: *Node, binding_name: ?[]const u8 = null, // for `while val := expr { ... }` optional binding binding_span: ?Span = null, // span of `binding_name` (set iff `binding_name` is) + /// True when the optional binding was a backtick raw identifier + /// (`` while `s2 := … ``) — exempt from the reserved-type-name check (issue 0089). + binding_is_raw: bool = false, }; pub const ForExpr = struct { @@ -573,8 +596,14 @@ pub const ForExpr = struct { body: *Node, capture_name: []const u8, capture_span: ?Span = null, // span of `capture_name` (null when omitted, e.g. `for 0..N { }`) + /// True when `capture_name` was a backtick raw identifier + /// (`` for xs: (`s2) ``) — exempt from the reserved-type-name check (issue 0089). + capture_is_raw: bool = false, index_name: ?[]const u8 = null, index_span: ?Span = null, // span of `index_name` (set iff `index_name` is) + /// True when `index_name` was a backtick raw identifier + /// (`` for xs: (x, `s2) ``) — exempt from the reserved-type-name check (issue 0089). + index_is_raw: bool = false, /// Range form `for start..end (i) { }`: `iterable` is the start, `range_end` /// the (exclusive) end. Null for the iterate-a-collection form /// (`for coll : (x) { }`). For the range form `capture_name` is the cursor @@ -663,6 +692,10 @@ pub const ProtocolMethodDecl = struct { params: []const *Node, // type_expr nodes for parameter types (excluding implicit self) param_names: []const []const u8, // parameter names (excluding implicit self) param_name_spans: []const Span = &.{}, // one per `param_names` entry; empty for synthesized methods + /// One per `param_names` entry: true when written as a backtick raw + /// identifier — exempt from the reserved-type-name check (issue 0089). + /// Empty for synthesized methods (treated as all-false). + param_name_is_raw: []const bool = &.{}, return_type: ?*Node, // null = void return default_body: ?*Node, // null = required method, non-null = default implementation }; @@ -689,6 +722,10 @@ pub const ForeignMethodDecl = struct { params: []const *Node, // type_expr nodes — first is `*Self` for instance methods param_names: []const []const u8, param_name_spans: []const Span = &.{}, // one per `param_names` entry; empty for synthesized methods + /// One per `param_names` entry: true when written as a backtick raw + /// identifier — exempt from the reserved-type-name check (issue 0089). + /// Empty for synthesized methods (treated as all-false). + param_name_is_raw: []const bool = &.{}, return_type: ?*Node, // null = void is_static: bool = false, // true for `static name :: ...` jni_descriptor_override: ?[]const u8 = null, // `#jni_method_descriptor("(Sig)Ret")` — JNI runtime only diff --git a/src/ir/lower.zig b/src/ir/lower.zig index a83feb0..56c0b05 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -6621,10 +6621,30 @@ pub const Lowering = struct { // ── Calls ─────────────────────────────────────────────────────── fn lowerCall(self: *Lowering, c_in: *const ast.Call) Ref { + var c = c_in; + // A bare reserved-type-name spelling in call position parses as a + // `.type_expr` (e.g. `s2(4)`), but if a function of that name is in + // scope — a backtick-declared sx fn or a `#import c` foreign fn whose C + // name collides with a reserved type spelling — it is a CALL to that + // function. `TypeName(val)` is not a cast (casts are `cast(T, val)`), so + // there is no ambiguity. Rewrite the callee to an identifier so the + // normal call machinery resolves it, symmetric to the bare-value + // reference that already resolves via scope/globals (issue 0089). + if (c.callee.data == .type_expr) { + const tname = c.callee.data.type_expr.name; + const is_fn = self.program_index.fn_ast_map.contains(tname) or + (if (self.scope) |scope| scope.lookupFn(tname) != null else false); + if (is_fn) { + const id_node = self.alloc.create(Node) catch unreachable; + id_node.* = .{ .span = c.callee.span, .data = .{ .identifier = .{ .name = tname, .is_raw = true } } }; + const rewritten = self.alloc.create(ast.Call) catch unreachable; + rewritten.* = .{ .callee = id_node, .args = c.args }; + c = rewritten; + } + } // Expand default parameter values for bare identifier callees: // when the caller omits trailing positional args, fill them in // from the callee's `param: T = expr` declarations. - var c = c_in; if (self.expandCallDefaults(c)) |expanded| c = expanded; // Check reflection builtins first (before lowering args — some args are type names, not values) if (c.callee.data == .identifier) { diff --git a/src/ir/semantic_diagnostics.zig b/src/ir/semantic_diagnostics.zig index 03814df..33bee66 100644 --- a/src/ir/semantic_diagnostics.zig +++ b/src/ir/semantic_diagnostics.zig @@ -121,7 +121,9 @@ pub const UnknownTypeChecker = struct { if (vd.value) |v| self.checkBindingNames(v); }, .destructure_decl => |dd| { - for (dd.names, dd.name_spans) |n, sp| self.checkBindingName(n, sp); + for (dd.names, dd.name_spans, dd.name_is_raw) |n, sp, raw| { + if (!raw) self.checkBindingName(n, sp); + } self.checkBindingNames(dd.value); }, .fn_decl => |fd| { @@ -137,19 +139,25 @@ pub const UnknownTypeChecker = struct { if (p.default_expr) |de| self.checkBindingNames(de); }, .if_expr => |ie| { - if (ie.binding_name) |bn| self.checkBindingName(bn, ie.binding_span); + if (ie.binding_name) |bn| { + if (!ie.binding_is_raw) self.checkBindingName(bn, ie.binding_span); + } self.checkBindingNames(ie.condition); self.checkBindingNames(ie.then_branch); if (ie.else_branch) |e| self.checkBindingNames(e); }, .while_expr => |we| { - if (we.binding_name) |bn| self.checkBindingName(bn, we.binding_span); + if (we.binding_name) |bn| { + if (!we.binding_is_raw) self.checkBindingName(bn, we.binding_span); + } self.checkBindingNames(we.condition); self.checkBindingNames(we.body); }, .for_expr => |fe| { - if (fe.capture_name.len != 0) self.checkBindingName(fe.capture_name, fe.capture_span); - if (fe.index_name) |idx| self.checkBindingName(idx, fe.index_span); + if (fe.capture_name.len != 0 and !fe.capture_is_raw) self.checkBindingName(fe.capture_name, fe.capture_span); + if (fe.index_name) |idx| { + if (!fe.index_is_raw) self.checkBindingName(idx, fe.index_span); + } self.checkBindingNames(fe.iterable); if (fe.range_end) |re| self.checkBindingNames(re); self.checkBindingNames(fe.body); @@ -157,23 +165,31 @@ pub const UnknownTypeChecker = struct { .match_expr => |me| { self.checkBindingNames(me.subject); for (me.arms) |arm| { - if (arm.capture) |cap| self.checkBindingName(cap, arm.capture_span); + if (arm.capture) |cap| { + if (!arm.capture_is_raw) self.checkBindingName(cap, arm.capture_span); + } if (arm.pattern) |p| self.checkBindingNames(p); self.checkBindingNames(arm.body); } }, .match_arm => |arm| { - if (arm.capture) |cap| self.checkBindingName(cap, arm.capture_span); + if (arm.capture) |cap| { + if (!arm.capture_is_raw) self.checkBindingName(cap, arm.capture_span); + } if (arm.pattern) |p| self.checkBindingNames(p); self.checkBindingNames(arm.body); }, .catch_expr => |ce| { - if (ce.binding) |b| self.checkBindingName(b, ce.binding_span); + if (ce.binding) |b| { + if (!ce.binding_is_raw) self.checkBindingName(b, ce.binding_span); + } self.checkBindingNames(ce.operand); self.checkBindingNames(ce.body); }, .onfail_stmt => |os| { - if (os.binding) |b| self.checkBindingName(b, os.binding_span); + if (os.binding) |b| { + if (!os.binding_is_raw) self.checkBindingName(b, os.binding_span); + } self.checkBindingNames(os.body); }, // impl / protocol-default / foreign-class method bodies: each @@ -183,13 +199,19 @@ pub const UnknownTypeChecker = struct { .impl_block => |ib| for (ib.methods) |m| self.checkBindingNames(m), .protocol_decl => |pd| for (pd.methods) |m| { if (m.default_body) |body| { - for (m.param_names, m.param_name_spans) |pn, sp| self.checkBindingName(pn, sp); + for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { + if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; + self.checkBindingName(pn, sp); + } self.checkBindingNames(body); } }, .foreign_class_decl => |fcd| for (fcd.members) |member| switch (member) { .method => |m| if (m.body) |body| { - for (m.param_names, m.param_name_spans) |pn, sp| self.checkBindingName(pn, sp); + for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { + if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; + self.checkBindingName(pn, sp); + } self.checkBindingNames(body); }, .field, .extends, .implements => {}, diff --git a/src/parser.zig b/src/parser.zig index a18a7e9..ef4d279 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -629,6 +629,12 @@ pub const Parser = struct { } if (self.current.tag.isTypeKeyword() or self.isIdentLike()) { + // A backtick raw identifier (`` `s2 ``) is a VALUE-name escape; it is + // never a type. Reject it in type position rather than silently + // type-classifying it (issue 0089). + if (self.current.is_raw) { + return self.failFmt("`{s}` is a raw identifier, not a type — the backtick escape names a value, never a type", .{self.tokenSlice(self.current)}); + } var name = self.tokenSlice(self.current); self.advance(); @@ -1186,6 +1192,7 @@ pub const Parser = struct { var param_types = std.ArrayList(*Node).empty; var param_names = std.ArrayList([]const u8).empty; var param_name_spans = std.ArrayList(ast.Span).empty; + var param_name_is_raw = std.ArrayList(bool).empty; while (self.current.tag != .r_paren and self.current.tag != .eof) { if (param_types.items.len > 0) { @@ -1198,6 +1205,7 @@ pub const Parser = struct { } const pname = self.tokenSlice(self.current); try param_name_spans.append(self.allocator, .{ .start = self.current.loc.start, .end = self.current.loc.end }); + try param_name_is_raw.append(self.allocator, self.current.is_raw); self.advance(); try self.expect(.colon); const ptype = try self.parseTypeExpr(); @@ -1226,6 +1234,7 @@ pub const Parser = struct { .params = try param_types.toOwnedSlice(self.allocator), .param_names = try param_names.toOwnedSlice(self.allocator), .param_name_spans = try param_name_spans.toOwnedSlice(self.allocator), + .param_name_is_raw = try param_name_is_raw.toOwnedSlice(self.allocator), .return_type = return_type, .default_body = default_body, }); @@ -1454,6 +1463,7 @@ pub const Parser = struct { var param_types = std.ArrayList(*Node).empty; var param_names = std.ArrayList([]const u8).empty; var param_name_spans = std.ArrayList(ast.Span).empty; + var param_name_is_raw = std.ArrayList(bool).empty; while (self.current.tag != .r_paren and self.current.tag != .eof) { if (param_types.items.len > 0) { try self.expect(.comma); @@ -1464,6 +1474,7 @@ pub const Parser = struct { } const pname = self.tokenSlice(self.current); try param_name_spans.append(self.allocator, .{ .start = self.current.loc.start, .end = self.current.loc.end }); + try param_name_is_raw.append(self.allocator, self.current.is_raw); self.advance(); try self.expect(.colon); const ptype = try self.parseTypeExpr(); @@ -1546,6 +1557,7 @@ pub const Parser = struct { .params = try param_types.toOwnedSlice(self.allocator), .param_names = try param_names.toOwnedSlice(self.allocator), .param_name_spans = try param_name_spans.toOwnedSlice(self.allocator), + .param_name_is_raw = try param_name_is_raw.toOwnedSlice(self.allocator), .return_type = return_type, .is_static = is_static, .jni_descriptor_override = desc_override, @@ -2046,7 +2058,7 @@ pub const Parser = struct { // Multi-target assignment: ident, expr, ... = expr, expr, ...; if (self.current.tag == .comma) { - const first_target = try self.createNode(start, .{ .identifier = .{ .name = name } }); + const first_target = try self.createNode(start, .{ .identifier = .{ .name = name, .is_raw = name_is_raw } }); return try self.parseMultiAssign(first_target, start); } @@ -2056,7 +2068,7 @@ pub const Parser = struct { self.advance(); const value = try self.parseExpr(); try self.expect(.semicolon); - const target = try self.createNode(start, .{ .identifier = .{ .name = name } }); + const target = try self.createNode(start, .{ .identifier = .{ .name = name, .is_raw = name_is_raw } }); return try self.createNode(start, .{ .assignment = .{ .target = target, .op = op, .value = value } }); } @@ -2123,9 +2135,11 @@ pub const Parser = struct { self.advance(); var binding: ?[]const u8 = null; var binding_span: ?ast.Span = null; + var binding_is_raw = false; if (self.current.tag == .identifier and self.peekNext() == .l_brace) { binding = self.tokenSlice(self.current); binding_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + binding_is_raw = self.current.is_raw; self.advance(); } const saved_onfail = self.in_onfail_body; @@ -2138,7 +2152,7 @@ pub const Parser = struct { try self.expect(.semicolon); break :blk e; }; - return try self.createNode(start, .{ .onfail_stmt = .{ .binding = binding, .binding_span = binding_span, .body = body } }); + return try self.createNode(start, .{ .onfail_stmt = .{ .binding = binding, .binding_span = binding_span, .binding_is_raw = binding_is_raw, .body = body } }); } // Break statement: break; @@ -2570,9 +2584,11 @@ pub const Parser = struct { self.advance(); // consume 'catch' var binding: ?[]const u8 = null; var binding_span: ?ast.Span = null; + var binding_is_raw = false; if (self.current.tag == .identifier) { binding = self.tokenSlice(self.current); binding_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + binding_is_raw = self.current.is_raw; self.advance(); } var is_match_body = false; @@ -2582,7 +2598,7 @@ pub const Parser = struct { const m_start = self.current.loc.start; self.advance(); // consume '==' is_match_body = true; - const subject = try self.createNode(m_start, .{ .identifier = .{ .name = binding.? } }); + const subject = try self.createNode(m_start, .{ .identifier = .{ .name = binding.?, .is_raw = binding_is_raw } }); break :blk try self.parseMatchBody(subject, m_start); } else if (binding != null) try self.parseExpr() @@ -2592,6 +2608,7 @@ pub const Parser = struct { .operand = expr, .binding = binding, .binding_span = binding_span, + .binding_is_raw = binding_is_raw, .body = body, .is_match_body = is_match_body, } }); @@ -2690,16 +2707,17 @@ pub const Parser = struct { }, .identifier => { const name = self.tokenSlice(self.current); + const is_raw = self.current.is_raw; // A backtick raw identifier (`` `s2 ``) is NEVER type-classified — // it is always a value identifier, bypassing the reserved-type-name // rule (issue 0089). Only a bare spelling is checked for a type name // (e.g. s32, u8, s128). - if (!self.current.is_raw and Type.fromName(name) != null) { + if (!is_raw and Type.fromName(name) != null) { self.advance(); return try self.createNode(start, .{ .type_expr = .{ .name = name } }); } self.advance(); - return try self.createNode(start, .{ .identifier = .{ .name = name } }); + return try self.createNode(start, .{ .identifier = .{ .name = name, .is_raw = is_raw } }); }, .kw_closure, .kw_protocol, .kw_impl, .kw_ufcs => { // Contextual keywords used as identifiers in expressions @@ -2943,6 +2961,7 @@ pub const Parser = struct { if (self.current.tag == .identifier and self.peekNext() == .colon_equal) { const binding_name = self.tokenSlice(self.current); const binding_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const binding_is_raw = self.current.is_raw; self.advance(); // skip identifier self.advance(); // skip := const source_expr = try self.parseExpr(); @@ -2963,6 +2982,7 @@ pub const Parser = struct { .is_inline = false, .binding_name = binding_name, .binding_span = binding_span, + .binding_is_raw = binding_is_raw, } }); } @@ -3065,6 +3085,7 @@ pub const Parser = struct { if (self.current.tag == .identifier and self.peekNext() == .colon_equal) { const binding_name = self.tokenSlice(self.current); const binding_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const binding_is_raw = self.current.is_raw; self.advance(); // skip identifier self.advance(); // skip := const source_expr = try self.parseExpr(); @@ -3074,6 +3095,7 @@ pub const Parser = struct { .body = body, .binding_name = binding_name, .binding_span = binding_span, + .binding_is_raw = binding_is_raw, } }); } @@ -3128,8 +3150,10 @@ pub const Parser = struct { var capture_name: []const u8 = ""; var capture_span: ?ast.Span = null; + var capture_is_raw = false; var index_name: ?[]const u8 = null; var index_span: ?ast.Span = null; + var index_is_raw = false; var capture_by_ref = false; if (range_end != null) { @@ -3142,6 +3166,7 @@ pub const Parser = struct { if (self.current.tag != .identifier) return self.fail("expected cursor variable name"); capture_name = self.tokenSlice(self.current); capture_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + capture_is_raw = self.current.is_raw; self.advance(); try self.expect(.r_paren); } @@ -3157,12 +3182,14 @@ pub const Parser = struct { if (self.current.tag != .identifier) return self.fail("expected capture variable name"); capture_name = self.tokenSlice(self.current); capture_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + capture_is_raw = self.current.is_raw; self.advance(); if (self.current.tag == .comma) { self.advance(); if (self.current.tag != .identifier) return self.fail("expected index variable name"); index_name = self.tokenSlice(self.current); index_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + index_is_raw = self.current.is_raw; self.advance(); } try self.expect(.r_paren); @@ -3175,8 +3202,10 @@ pub const Parser = struct { .body = body, .capture_name = capture_name, .capture_span = capture_span, + .capture_is_raw = capture_is_raw, .index_name = index_name, .index_span = index_span, + .index_is_raw = index_is_raw, .range_end = range_end, .capture_by_ref = capture_by_ref, } }); @@ -3202,10 +3231,12 @@ pub const Parser = struct { // arm body (an expression) and is left for the body parse below. var capture: ?[]const u8 = null; var capture_span: ?ast.Span = null; + var capture_is_raw = false; if (self.current.tag == .l_paren and self.isLoneIdentParen()) { self.advance(); // '(' capture = self.tokenSlice(self.current); capture_span = .{ .start = self.current.loc.start, .end = self.current.loc.end }; + capture_is_raw = self.current.is_raw; self.advance(); // ident try self.expect(.r_paren); } @@ -3214,7 +3245,7 @@ pub const Parser = struct { self.advance(); try self.expect(.semicolon); const body = try self.createNode(arm_start, .{ .block = .{ .stmts = &.{} } }); - try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = true, .capture = capture, .capture_span = capture_span }); + try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = true, .capture = capture, .capture_span = capture_span, .capture_is_raw = capture_is_raw }); } else if (self.current.tag == .fat_arrow) { // Short form: (ident) => expr; self.advance(); @@ -3224,7 +3255,7 @@ pub const Parser = struct { // `;` is an arm terminator, not a value-discard — match arms are // exempt from the block trailing-`;` rule). const body = try self.createNode(arm_start, .{ .block = .{ .stmts = try self.allocator.dupe(*Node, &.{expr}), .produces_value = true } }); - try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture, .capture_span = capture_span }); + try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture, .capture_span = capture_span, .capture_is_raw = capture_is_raw }); } else { const stmts_start = self.current.loc.start; var stmts = std.ArrayList(*Node).empty; @@ -3235,7 +3266,7 @@ pub const Parser = struct { // yields its last statement's value — which, for a braced-block // arm body, still respects that inner block's own flag. const body = try self.createNode(stmts_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator), .produces_value = true } }); - try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture, .capture_span = capture_span }); + try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture, .capture_span = capture_span, .capture_is_raw = capture_is_raw }); } } // Optional else arm (default) @@ -3597,18 +3628,21 @@ pub const Parser = struct { // All targets must be plain identifiers var names = std.ArrayList([]const u8).empty; var name_spans = std.ArrayList(ast.Span).empty; + var name_is_raw = std.ArrayList(bool).empty; for (targets.items) |target| { if (target.data != .identifier) { return self.fail("destructuring targets must be identifiers"); } try names.append(self.allocator, target.data.identifier.name); try name_spans.append(self.allocator, target.span); + try name_is_raw.append(self.allocator, target.data.identifier.is_raw); } const value = try self.parseExpr(); try self.expectSemicolonAfter(value); return try self.createNode(start, .{ .destructure_decl = .{ .names = try names.toOwnedSlice(self.allocator), .name_spans = try name_spans.toOwnedSlice(self.allocator), + .name_is_raw = try name_is_raw.toOwnedSlice(self.allocator), .value = value, } }); } From c0e1a5db82f683d25861ef174a70060680eb7b2c Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 19:16:37 +0300 Subject: [PATCH 04/11] feat(lang): reserved-name check covers `::` const/fn/type decls + scope call rewrite to raw provenance [F0.6] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bare reserved-type-name `::` declaration was silently accepted, and the attempt-2 lowerCall rewrite then made a bare `s2 :: (…) {…}` function callable — bypassing the backtick rule for handwritten sx. The reserved-name binding check covered `:=` / typed-local / param / captures but NOT the `::` declaration form. - ast: `ConstDecl`/`FnDecl` carry `is_raw` + `name_span` threaded from the parser (parseConstBinding / parseFnDecl, all call sites incl. struct/impl methods). - semantic_diagnostics: reject a bare reserved spelling at EVERY declaration-name site — const, function (incl. struct/impl methods), struct/enum/union/error-set, protocol, foreign-class, ufcs alias, namespaced/library/c-import name. Backtick (`is_raw`) and the compiler's `#builtin` definition (`string :: []u8 #builtin`) are the only exemptions; a value whose node is itself a named decl defers to that node's own check. - c_import: synthesized foreign fn_decls are `is_raw = true`, so a C function whose own name collides with a reserved spelling (`int s2(int);`) imports and bare-calls unedited. - lower: scope the `.type_expr`→`.identifier` call rewrite to a callee FnDecl of RAW provenance (`is_raw`) — only a backtick / `#import c` foreign fn can carry a reserved-name spelling, so a non-raw match never gets rewritten. - examples: 0153 (positive — backtick `::` const + fn, bare + tick call), 1140 (negative — bare `::` const + fn rejected). - docs: specs.md + readme.md state the backtick is required at every binding site including `::` const / function / type declarations; issue 0089 banner updated. --- examples/0153-types-backtick-const-fn-decl.sx | 23 +++++ ...diagnostics-reserved-name-const-fn-decl.sx | 19 ++++ .../0153-types-backtick-const-fn-decl.exit | 1 + .../0153-types-backtick-const-fn-decl.stderr | 1 + .../0153-types-backtick-const-fn-decl.stdout | 3 + ...agnostics-reserved-name-const-fn-decl.exit | 1 + ...nostics-reserved-name-const-fn-decl.stderr | 11 +++ ...nostics-reserved-name-const-fn-decl.stdout | 1 + issues/0089-backtick-raw-identifier.md | 24 ++++- readme.md | 21 ++-- specs.md | 22 ++++- src/ast.zig | 15 +++ src/c_import.zig | 5 + src/ir/lower.zig | 16 ++- src/ir/semantic_diagnostics.zig | 99 +++++++++++++------ src/parser.zig | 32 +++--- 16 files changed, 232 insertions(+), 62 deletions(-) create mode 100644 examples/0153-types-backtick-const-fn-decl.sx create mode 100644 examples/1140-diagnostics-reserved-name-const-fn-decl.sx create mode 100644 examples/expected/0153-types-backtick-const-fn-decl.exit create mode 100644 examples/expected/0153-types-backtick-const-fn-decl.stderr create mode 100644 examples/expected/0153-types-backtick-const-fn-decl.stdout create mode 100644 examples/expected/1140-diagnostics-reserved-name-const-fn-decl.exit create mode 100644 examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stderr create mode 100644 examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stdout diff --git a/examples/0153-types-backtick-const-fn-decl.sx b/examples/0153-types-backtick-const-fn-decl.sx new file mode 100644 index 0000000..4b1c5ba --- /dev/null +++ b/examples/0153-types-backtick-const-fn-decl.sx @@ -0,0 +1,23 @@ +// Backtick raw-identifier escape at the `::` declaration sites: a leading +// backtick makes a CONSTANT name and a FUNCTION name raw, so a reserved type +// spelling (`s2`, `u8`) can be declared and used. Complements examples/0151 +// (var / param / field / global). The backtick fn is callable both via the +// backtick (`` `u8(5) ``) and bare (`u8(5)`) — the bare reserved-name callee +// resolves to the raw fn because its declaration is raw (issue 0089). A *bare* +// `s2 :: …` / `u8 :: …` declaration is still the reserved-name error (see +// examples/1140). +// Regression (issue 0089). +#import "modules/std.sx"; + +// Constant whose name is a reserved type spelling. +`s2 :: 2.5; + +// Function whose name is a reserved type spelling. +`u8 :: (n: s64) -> s64 { return n + 7; } + +main :: () -> s32 { + print("const = {}\n", `s2); + print("fn tick = {}\n", `u8(5)); + print("fn bare = {}\n", u8(5)); + return 0; +} diff --git a/examples/1140-diagnostics-reserved-name-const-fn-decl.sx b/examples/1140-diagnostics-reserved-name-const-fn-decl.sx new file mode 100644 index 0000000..270a594 --- /dev/null +++ b/examples/1140-diagnostics-reserved-name-const-fn-decl.sx @@ -0,0 +1,19 @@ +// A reserved/builtin type-name spelling is rejected as the NAME of a `::` +// declaration too — both a constant (`s2 :: 5`) and a function +// (`u8 :: (…) {…}`). A function name and a const name are binding sites just +// like `s2 := …`; previously the `::` decl forms slipped past the +// reserved-name check, so a bare reserved-name function compiled silently and +// became callable — bypassing the backtick rule that handwritten sx must use. +// The backtick escape (`` `s2 :: … ``, examples/0153) is the only way to spell +// these names; `#import c` foreign decls remain exempt (examples/1220). +// +// Regression (issue 0089). Expected: one error per declaration, each caret on +// the declared name; exit 1. +#import "modules/std.sx"; + +s2 :: 5; +u8 :: (n: s64) -> s64 { return n + 7; } + +main :: () -> s32 { + return 0; +} diff --git a/examples/expected/0153-types-backtick-const-fn-decl.exit b/examples/expected/0153-types-backtick-const-fn-decl.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0153-types-backtick-const-fn-decl.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0153-types-backtick-const-fn-decl.stderr b/examples/expected/0153-types-backtick-const-fn-decl.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0153-types-backtick-const-fn-decl.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0153-types-backtick-const-fn-decl.stdout b/examples/expected/0153-types-backtick-const-fn-decl.stdout new file mode 100644 index 0000000..c0c4c51 --- /dev/null +++ b/examples/expected/0153-types-backtick-const-fn-decl.stdout @@ -0,0 +1,3 @@ +const = 2.500000 +fn tick = 12 +fn bare = 12 diff --git a/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.exit b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.exit new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stderr b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stderr new file mode 100644 index 0000000..a1595fa --- /dev/null +++ b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stderr @@ -0,0 +1,11 @@ +error: 's2' is a reserved type name and cannot be used as an identifier + --> examples/1140-diagnostics-reserved-name-const-fn-decl.sx:14:1 + | +14 | s2 :: 5; + | ^^ + +error: 'u8' is a reserved type name and cannot be used as an identifier + --> examples/1140-diagnostics-reserved-name-const-fn-decl.sx:15:1 + | +15 | u8 :: (n: s64) -> s64 { return n + 7; } + | ^^ diff --git a/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stdout b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1140-diagnostics-reserved-name-const-fn-decl.stdout @@ -0,0 +1 @@ + diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index ff0e275..c5caafd 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -16,6 +16,17 @@ > ([src/ir/semantic_diagnostics.zig]). The backtick works in every identifier > position (local, global, param, field, function name, struct member, later > reference, and all the control-flow/capture/binding forms). +> +> The `::` DECLARATION forms are binding sites too and are equally covered +> (F0.6 attempt-3): a bare reserved-name **constant** (`s2 :: 5`), **function** +> (`s2 :: (…) {…}`, incl. struct/impl methods), or **type** declaration +> (`struct`/`enum`/`union`/`error`/alias/`protocol`/foreign-class/ufcs/namespace) +> is rejected, exactly like `s2 := …`. `ConstDecl`/`FnDecl` carry `is_raw` + +> `name_span` threaded from the parser (`parseConstBinding`/`parseFnDecl`), so the +> backtick form (`` `s2 :: … ``) is exempt; the compiler's own builtin definition +> (`string :: []u8 #builtin`) is the sole non-backtick exemption (a `#builtin` +> constant defines the reserved type). This closed the attempt-2 hole where a +> bare `s2 :: (…) {…}` compiled silently and the call rewrite made it callable. > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign > `#foreign` decls with `Param.is_raw = true`, so generated C param names that > collide with reserved type names (`s1`, `s2`) import unedited. @@ -24,8 +35,11 @@ > one in type position (`x : `s2 = 1`) is a clean parse error ([src/parser.zig] > `parseTypeExpr` atom). A reserved-spelled FUNCTION (backtick-declared or > `#import c` foreign) is bare-callable: `lowerCall` rewrites a `.type_expr` callee -> to an identifier when a function of that name is in scope ([src/ir/lower.zig]), -> so `s2(4)` resolves to the function (`TypeName(val)` is not a cast). A later BARE +> to an identifier when a function **of RAW provenance** of that name is in scope +> ([src/ir/lower.zig]) — the rewrite is scoped to the callee `FnDecl`'s `is_raw` +> flag (F0.6 attempt-3), so it only ever fires for a backtick / `#import c` foreign +> fn (the decl check guarantees no bare reserved-name fn exists), so `s2(4)` +> resolves to the function (`TypeName(val)` is not a cast). A later BARE > reference in value position resolves to the binding; a bare `s2` in type position > is still the type. > @@ -38,7 +52,11 @@ > (foreign param + function-name exemption, bare-callable foreign fn), > `examples/1139-diagnostics-backtick-raw-not-a-type.sx` (negative — raw in type > position), `examples/1119`/`1121`/`1123` (negative — bare reserved binding still -> rejected across all forms). Backtick lexer unit tests in `src/lexer.zig`. +> rejected across all forms), +> `examples/0153-types-backtick-const-fn-decl.sx` (positive — backtick `::` const + +> function decl, bare + backtick call), and +> `examples/1140-diagnostics-reserved-name-const-fn-decl.sx` (negative — bare `::` +> const + function decl rejected). Backtick lexer unit tests in `src/lexer.zig`. > > The original report is preserved below. diff --git a/readme.md b/readme.md index 79b91d8..5473311 100644 --- a/readme.md +++ b/readme.md @@ -106,15 +106,18 @@ z : s32 = ---; // uninitialized ``` Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and can't be used -as bare value identifiers. A leading backtick at the **binding site** escapes one -into a raw identifier — its text drops the backtick and it's never read as a type — -so reserved spellings (and keywords) work as ordinary names. The backtick is needed -only where the name is declared; a later bare reference in value position resolves -to the binding, while a bare `s2` in type position is still the type. It works in -every identifier position (local, global, parameter, field, function name, and the -control-flow / capture / binding forms — destructure, `if`/`while` binding, `for` -capture, match capture, `catch`/`onfail` tag), and a reserved-spelled function is -bare-callable: +as bare identifiers at **any** binding site — a value binding (`:=` / typed local / +parameter), a `::` constant or function declaration, or a `::` type declaration +(`struct` / `enum` / `union` / alias / `protocol` / …) — each is an error +(`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). A leading +backtick at the binding site escapes one into a raw identifier — its text drops the +backtick and it's never read as a type — so reserved spellings (and keywords) work +as ordinary names. The backtick is needed only where the name is declared; a later +bare reference in value position resolves to the binding, while a bare `s2` in type +position is still the type. It works in every identifier position (local, global, +parameter, field, function name, constant, and the control-flow / capture / binding +forms — destructure, `if`/`while` binding, `for` capture, match capture, +`catch`/`onfail` tag), and a reserved-spelled function is bare-callable: ```sx `s2 := 2.5; // value identifier "s2", distinct from the s2 type diff --git a/specs.md b/specs.md index 864b0e2..fe69040 100644 --- a/specs.md +++ b/specs.md @@ -17,14 +17,25 @@ Line comments start with `//` and extend to end of line. A spelling that names a builtin type — the arbitrary-width integers `s1`..`s64` / `u1`..`u64`, plus `bool`, `string`, `void`, `f32`, `f64`, `usize`, `isize`, `Any` — -is reserved. A bare value binding (`:=` / typed local / parameter name) spelled as -one of these is rejected: such a spelling parses as a *type*, not a value, so the -address-of / autoref paths would mis-lower it. +is reserved. A bare reserved spelling is rejected at **every binding site** — +anywhere handwritten sx introduces a name: a value binding (`:=` / typed local / +parameter), a `::` **constant** or **function** declaration, and a `::` **type** +declaration (`struct` / `enum` / `union` / `error` / type alias / `protocol` / +foreign class / ufcs alias / namespaced import). A value-spelled-as-type parses as +a *type*, not a value, so its address-of / autoref paths would mis-lower; a +type/const/function name spelled as a builtin would shadow the builtin. The only +exemptions are the backtick escape (below) and `#import c` foreign decls. ```sx -s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier +s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier +s2 :: 5; // ERROR — a `::` constant name is a binding site too +s2 :: (n: s64) -> s64 { n } // ERROR — so is a function name +s2 :: struct { x: s64; } // ERROR — and a type-declaration name ``` +(The stdlib's own builtin definitions — e.g. `string :: []u8 #builtin;` — are the +sole exception: a `#builtin` constant defines the reserved type and is allowed.) + #### Backtick raw-identifier escape A leading backtick makes the following identifier **raw**: its text excludes the @@ -52,7 +63,8 @@ capture and index, a match-arm capture, and a `catch` / `onfail` tag binding: ```sx `u8 := 100; // global -`s2 :: (`s1: s64) -> s64 { `s1 } // function name + parameter +`s2 :: 2.5; // constant declaration +`u8 :: (`s1: s64) -> s64 { `s1 } // function name + parameter P :: struct { `s2: f64; } // struct field `u8, rest := pair(); // destructure name if `s16 := maybe() { } // optional binding diff --git a/src/ast.zig b/src/ast.zig index 23b7213..d3b49de 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -131,6 +131,14 @@ pub const FnDecl = struct { type_params: []const StructTypeParam = &.{}, is_arrow: bool = false, call_conv: CallingConvention = .default, + /// Span of the function's name token, for the reserved-type-name decl + /// diagnostic (issue 0089). Synthesized decls (e.g. `#import c` foreign + /// functions) leave it zero. + name_span: Span = .{ .start = 0, .end = 0 }, + /// True when the function NAME was written as a backtick raw identifier + /// (`` `s2 :: … ``) or synthesized by a `#import c` foreign decl. A raw + /// name is exempt from the reserved-type-name binding check (issue 0089). + is_raw: bool = false, }; pub const Param = struct { @@ -307,6 +315,13 @@ pub const ConstDecl = struct { name: []const u8, type_annotation: ?*Node, value: *Node, + /// Span of the constant's name token, for the reserved-type-name decl + /// diagnostic (issue 0089). + name_span: Span = .{ .start = 0, .end = 0 }, + /// True when the constant NAME was written as a backtick raw identifier + /// (`` `s2 :: … ``). A raw name is exempt from the reserved-type-name + /// binding check (issue 0089). + is_raw: bool = false, }; pub const VarDecl = struct { diff --git a/src/c_import.zig b/src/c_import.zig index 29e21a2..f0c09b3 100644 --- a/src/c_import.zig +++ b/src/c_import.zig @@ -156,6 +156,11 @@ pub fn processCImport( .params = try params.toOwnedSlice(allocator), .return_type = ret_node, .body = foreign_body, + // A foreign C function whose own NAME collides with a reserved + // type spelling (`int s2(int);`) is RAW — exempt from the + // reserved-type-name decl check so generated bindings import + // without hand-edits (issue 0089). + .is_raw = true, } }, }; diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 56c0b05..aa536b9 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -6630,17 +6630,25 @@ pub const Lowering = struct { // there is no ambiguity. Rewrite the callee to an identifier so the // normal call machinery resolves it, symmetric to the bare-value // reference that already resolves via scope/globals (issue 0089). + // + // Scoped to RAW provenance: only a backtick (`is_raw`) or `#import c` + // foreign fn declaration may legally carry a reserved-name spelling + // (the decl check rejects every bare reserved-name sx fn). Refusing the + // rewrite for a non-raw match keeps a genuine reserved type spelling a + // type — belt-and-suspenders should any future path ever reintroduce a + // non-raw reserved-name callee. if (c.callee.data == .type_expr) { const tname = c.callee.data.type_expr.name; - const is_fn = self.program_index.fn_ast_map.contains(tname) or - (if (self.scope) |scope| scope.lookupFn(tname) != null else false); - if (is_fn) { + const eff = if (self.scope) |scope| scope.lookupFn(tname) orelse tname else tname; + const fd: ?*const ast.FnDecl = self.program_index.fn_ast_map.get(eff) orelse + self.program_index.fn_ast_map.get(tname); + if (fd) |decl| if (decl.is_raw) { const id_node = self.alloc.create(Node) catch unreachable; id_node.* = .{ .span = c.callee.span, .data = .{ .identifier = .{ .name = tname, .is_raw = true } } }; const rewritten = self.alloc.create(ast.Call) catch unreachable; rewritten.* = .{ .callee = id_node, .args = c.args }; c = rewritten; - } + }; } // Expand default parameter values for bare identifier callees: // when the caller omits trailing positional args, fill them in diff --git a/src/ir/semantic_diagnostics.zig b/src/ir/semantic_diagnostics.zig index 33bee66..9918099 100644 --- a/src/ir/semantic_diagnostics.zig +++ b/src/ir/semantic_diagnostics.zig @@ -127,6 +127,11 @@ pub const UnknownTypeChecker = struct { self.checkBindingNames(dd.value); }, .fn_decl => |fd| { + // A function NAME is a binding site too: a bare reserved-name + // `s2 :: (…) {…}` (free fn or struct/impl method) is rejected, + // exactly like `s2 := …`. Backtick (`` `s2 :: … ``) and + // `#import c` foreign fns set `is_raw` and are exempt (0089). + if (!fd.is_raw) self.checkBindingName(fd.name, fd.name_span); self.checkParamNames(fd.params); self.checkBindingNames(fd.body); }, @@ -197,33 +202,57 @@ pub const UnknownTypeChecker = struct { // `#objc_class` bodied method is lowered (M1.2), so its reserved // param/local names mis-lower the same as any other. .impl_block => |ib| for (ib.methods) |m| self.checkBindingNames(m), - .protocol_decl => |pd| for (pd.methods) |m| { - if (m.default_body) |body| { - for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { - if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; - self.checkBindingName(pn, sp); + .protocol_decl => |pd| { + self.checkDeclName(node, pd.name); + for (pd.methods) |m| { + if (m.default_body) |body| { + for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { + if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; + self.checkBindingName(pn, sp); + } + self.checkBindingNames(body); } - self.checkBindingNames(body); } }, - .foreign_class_decl => |fcd| for (fcd.members) |member| switch (member) { - .method => |m| if (m.body) |body| { - for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { - if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; - self.checkBindingName(pn, sp); - } - self.checkBindingNames(body); - }, - .field, .extends, .implements => {}, + .foreign_class_decl => |fcd| { + // The sx-side alias (left of `::`) is a user-chosen name, so a + // reserved spelling is rejected like any other type decl (0089). + self.checkDeclName(node, fcd.name); + for (fcd.members) |member| switch (member) { + .method => |m| if (m.body) |body| { + for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { + if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; + self.checkBindingName(pn, sp); + } + self.checkBindingNames(body); + }, + .field, .extends, .implements => {}, + }; }, // ── Container / control-flow / expression nodes: recurse children // so a binding nested anywhere below is still reached. ── // A namespaced import (`mod :: #import "..."`) is wrapped here, its // module decls held inline; descend so an imported module's // reserved-name binding is rejected too (issue 0077). - .namespace_decl => |nd| for (nd.decls) |d| self.checkBindingNames(d), - .const_decl => |cd| self.checkBindingNames(cd.value), + .namespace_decl => |nd| { + self.checkDeclName(node, nd.name); + for (nd.decls) |d| self.checkBindingNames(d); + }, + .const_decl => |cd| { + // A const BINDS `cd.name`. Reject a bare reserved spelling + // unless it is backtick-raw (`cd.is_raw`) or the compiler's + // blessed builtin definition (`string :: []u8 #builtin`, value + // `.builtin_expr`). When the value node is itself a named decl + // (struct/enum/union/error/fn), that node carries & checks its + // own name on recursion — don't double-check it here (0089). + switch (cd.value.data) { + .builtin_expr, .struct_decl, .enum_decl, .union_decl, .error_set_decl, .fn_decl => {}, + else => if (!cd.is_raw) self.checkBindingName(cd.name, cd.name_span), + } + self.checkBindingNames(cd.value); + }, .struct_decl => |sd| { + self.checkDeclName(node, sd.name); for (sd.methods) |m| self.checkBindingNames(m); for (sd.constants) |c| self.checkBindingNames(c); for (sd.field_defaults) |fdef| if (fdef) |d| self.checkBindingNames(d); @@ -286,12 +315,21 @@ pub const UnknownTypeChecker = struct { .comptime_expr => |ce| self.checkBindingNames(ce.expr), .insert_expr => |ins| self.checkBindingNames(ins.expr), .spread_expr => |se| self.checkBindingNames(se.operand), + // ── Named type / alias / import declarations: a bare reserved + // spelling as the declared name is rejected (issue 0089). These + // have no nested binding sites, so only the name is checked. A + // flat `#import`/`#import c` (name == null) binds nothing. ── + .enum_decl => |ed| self.checkDeclName(node, ed.name), + .union_decl => |ud| self.checkDeclName(node, ud.name), + .error_set_decl => |esd| self.checkDeclName(node, esd.name), + .ufcs_alias => |ua| self.checkDeclName(node, ua.name), + .library_decl => |ld| self.checkDeclName(node, ld.name), + .import_decl => |imp| if (imp.name) |n| self.checkDeclName(node, n), + .c_import_decl => |cid| if (cid.name) |n| self.checkDeclName(node, n), // ── Leaves & pure type-expression nodes: no binding sites below. ── // Type-expression subtrees carry only type names (no value - // bindings); enum / union / error-set declarations carry only field - // types + comptime constants. Listing each tag explicitly (rather - // than an `else`) is what forces a future binding-bearing node to be - // reconsidered here. + // bindings). Listing each tag explicitly (rather than an `else`) is + // what forces a future binding-bearing node to be reconsidered here. .int_literal, .float_literal, .bool_literal, @@ -299,10 +337,6 @@ pub const UnknownTypeChecker = struct { .identifier, .enum_literal, .type_expr, - .enum_decl, - .union_decl, - .error_set_decl, - .import_decl, .array_type_expr, .slice_type_expr, .parameterized_type_expr, @@ -321,13 +355,10 @@ pub const UnknownTypeChecker = struct { .builtin_expr, .compiler_expr, .foreign_expr, - .library_decl, .framework_decl, .function_type_expr, .closure_type_expr, .tuple_type_expr, - .ufcs_alias, - .c_import_decl, => {}, } } @@ -762,6 +793,18 @@ pub const UnknownTypeChecker = struct { if (isReservedTypeName(name)) self.diagnostics.addFmt(.err, span, "'{s}' is a reserved type name and cannot be used as an identifier", .{name}); } + + /// Reserved-name check for a `::` declaration whose own name binds an + /// identifier but carries no dedicated `name_span` field — struct / enum / + /// union / error-set / protocol / foreign-class type decls, ufcs aliases, + /// and namespaced imports (issue 0089). Each such node begins at its name + /// token, so the name's length isolates the caret onto the name. A + /// backtick raw / `#import c` foreign name never reaches here (those forms + /// are exempt at their own decl path). + fn checkDeclName(self: UnknownTypeChecker, node: *const Node, name: []const u8) void { + const span = ast.Span{ .start = node.span.start, .end = node.span.start + @as(u32, @intCast(name.len)) }; + self.checkBindingName(name, span); + } }; /// A binding name collides with a reserved/builtin type name exactly when the diff --git a/src/parser.zig b/src/parser.zig index ef4d279..0ef7bc7 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -152,7 +152,7 @@ pub const Parser = struct { // IDENT :: ... if (self.current.tag == .colon_colon) { self.advance(); - return self.parseConstBinding(name, start); + return self.parseConstBinding(name, name_span, start, name_is_raw); } // IDENT : type : value; (typed constant) @@ -173,7 +173,7 @@ pub const Parser = struct { return self.fail("expected '::', ':=', or ':' after identifier"); } - fn parseConstBinding(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseConstBinding(self: *Parser, name: []const u8, name_span: ast.Span, start_pos: u32, name_is_raw: bool) anyerror!*Node { // After `::` // Could be: #run expr, enum { ... }, (params) -> type { body }, or expr; @@ -215,7 +215,7 @@ pub const Parser = struct { const inner = try self.parseExpr(); try self.expect(.semicolon); const ct = try self.createNode(run_start, .{ .comptime_expr = .{ .expr = inner } }); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = ct } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = ct, .name_span = name_span, .is_raw = name_is_raw } }); } // Built-in declaration: name :: #builtin; @@ -224,7 +224,7 @@ pub const Parser = struct { self.advance(); try self.expect(.semicolon); const bi = try self.createNode(bi_start, .{ .builtin_expr = {} }); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = bi } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = bi, .name_span = name_span, .is_raw = name_is_raw } }); } // Enum declaration @@ -280,14 +280,14 @@ pub const Parser = struct { // Look ahead: is this a function or an expression starting with `(`? // Heuristic: if after matching parens we see `{` or `->`, it's a function. if (self.isFunctionDef()) { - return self.parseFnDecl(name, start_pos); + return self.parseFnDecl(name, name_span, name_is_raw, start_pos); } } // Bare block shorthand: name :: { body } is equivalent to name :: () { body } if (self.current.tag == .l_brace) { const body = try self.parseBlock(); - return try self.createNode(start_pos, .{ .fn_decl = .{ .name = name, .params = &.{}, .return_type = null, .body = body } }); + return try self.createNode(start_pos, .{ .fn_decl = .{ .name = name, .params = &.{}, .return_type = null, .body = body, .name_span = name_span, .is_raw = name_is_raw } }); } // Otherwise it's a constant expression @@ -299,7 +299,7 @@ pub const Parser = struct { self.advance(); try self.expect(.semicolon); const bi = try self.createNode(bi_start, .{ .builtin_expr = {} }); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = value, .value = bi } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = value, .value = bi, .name_span = name_span, .is_raw = name_is_raw } }); } // name :: type_expr #foreign [lib] ["c_name"]; — foreign with type annotation @@ -325,11 +325,11 @@ pub const Parser = struct { .library_ref = lib_ref, .c_name = c_name, } }); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = value, .value = fi } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = value, .value = fi, .name_span = name_span, .is_raw = name_is_raw } }); } try self.expect(.semicolon); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = value } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = value, .name_span = name_span, .is_raw = name_is_raw } }); } fn parseCImportBlock(self: *Parser, start: u32, name: ?[]const u8) anyerror!*Node { @@ -1044,10 +1044,12 @@ pub const Parser = struct { if (self.current.tag == .identifier and self.peekNext() == .colon_colon) { const method_start = self.current.loc.start; const method_name = self.tokenSlice(self.current); + const method_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const method_is_raw = self.current.is_raw; self.advance(); // skip name self.advance(); // skip :: if (self.current.tag == .l_paren and self.isFunctionDef()) { - try methods.append(self.allocator, try self.parseFnDecl(method_name, method_start)); + try methods.append(self.allocator, try self.parseFnDecl(method_name, method_name_span, method_is_raw, method_start)); } else { // Non-function constant: name :: value; const value = try self.parseExpr(); @@ -1672,11 +1674,13 @@ pub const Parser = struct { } const method_start = self.current.loc.start; const method_name = self.tokenSlice(self.current); + const method_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const method_is_raw = self.current.is_raw; self.advance(); try self.expect(.colon_colon); if (self.current.tag == .l_paren and self.isFunctionDef()) { - try methods.append(self.allocator, try self.parseFnDecl(method_name, method_start)); + try methods.append(self.allocator, try self.parseFnDecl(method_name, method_name_span, method_is_raw, method_start)); } else { return self.fail("expected function declaration in impl block"); } @@ -1900,7 +1904,7 @@ pub const Parser = struct { return try type_params.toOwnedSlice(self.allocator); } - fn parseFnDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseFnDecl(self: *Parser, name: []const u8, name_span: ast.Span, name_is_raw: bool, start_pos: u32) anyerror!*Node { const params = try self.parseParams(); // Optional return type @@ -1975,6 +1979,8 @@ pub const Parser = struct { .type_params = type_params, .is_arrow = is_arrow, .call_conv = call_conv, + .name_span = name_span, + .is_raw = name_is_raw, } }); } @@ -2043,7 +2049,7 @@ pub const Parser = struct { if (self.current.tag == .colon_colon) { self.advance(); - return self.parseConstBinding(name, start); + return self.parseConstBinding(name, name_span, start, name_is_raw); } if (self.current.tag == .colon_equal) { self.advance(); From 023971cae5579bb9ee9e336a57994917644d76a8 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 20:27:53 +0300 Subject: [PATCH 05/11] =?UTF-8?q?feat(lang):=20universal=20backtick=20raw?= =?UTF-8?q?=20identifier=20=E2=80=94=20valid=20in=20value,=20decl,=20AND?= =?UTF-8?q?=20type=20position=20[F0.6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AGRA ruling (attempt 4): `` `name `` is THE LITERAL identifier `name`, usable in EVERY position — the backtick only means "treat this token as a plain identifier, never the reserved keyword/type", and is never part of the name's text. - Raw in TYPE position is now VALID (reverses attempt-2 "raw is not a type"): `parseTypeExpr` emits a raw `type_expr`; `TypeResolver.resolveNamed` gains a `skip_builtin` flag (threaded from `te.is_raw` via lower.zig + type_bridge) so a `` `s2 `` reference resolves to a `` `s2 ``-declared type (struct/enum/union/alias), else a normal "unknown type 's2'" error (reportIfUnknownType skips the builtin exemption when raw). Bare `s2` in type position stays the builtin int. - Every declaration-name site is is_raw-exemptible: `is_raw` added to TypeExpr + StructDecl/EnumDecl/UnionDecl/ErrorSetDecl/ProtocolDecl/ForeignClassDecl/UfcsAlias/ NamespaceDecl/ImportDecl/CImportDecl/LibraryDecl; parser threads name_is_raw to every decl parse fn; namespace imports carry it through imports.addNamespace. Typed-const path (`` `s2 : s64 : 5 ``) now threads name_span+is_raw (fixes the 1:1-caret bug). - Check<->exemption made structurally symmetric: checkBindingName/checkDeclName take is_raw as a REQUIRED argument and skip inside the check, so no call site can validate a name without honoring the exemption (the desync cause of prior rounds). - Bare reserved-name declarations of every kind still error (0076 preserved); `#import c` foreign names stay auto-raw + bare-callable. specs.md + readme.md updated to the universal model. issue 0089 RESOLVED banner rewritten. Examples: replace 1139 (raw-not-a-type) with 0154 (raw type reference); add 0155 (typed const + union tag) and 1141 (bare type-decl negatives). Gate: zig build + zig build test + run_examples (426 passed, 0 failed). --- .../0151-types-backtick-raw-identifier.sx | 14 ++- .../0154-types-backtick-raw-type-reference.sx | 42 +++++++ ...55-types-backtick-typed-const-union-tag.sx | 24 ++++ ...139-diagnostics-backtick-raw-not-a-type.sx | 12 -- ...141-diagnostics-reserved-name-type-decl.sx | 22 ++++ ...154-types-backtick-raw-type-reference.exit | 1 + ...-types-backtick-raw-type-reference.stderr} | 0 ...4-types-backtick-raw-type-reference.stdout | 5 + ...-types-backtick-typed-const-union-tag.exit | 1 + ...ypes-backtick-typed-const-union-tag.stderr | 1 + ...ypes-backtick-typed-const-union-tag.stdout | 3 + ...diagnostics-backtick-raw-not-a-type.stderr | 5 - ...-diagnostics-reserved-name-type-decl.exit} | 0 ...diagnostics-reserved-name-type-decl.stderr | 29 +++++ ...diagnostics-reserved-name-type-decl.stdout | 1 + issues/0089-backtick-raw-identifier.md | 106 +++++++++-------- readme.md | 37 +++--- specs.md | 51 +++++--- src/ast.zig | 42 +++++++ src/imports.zig | 8 +- src/ir/lower.zig | 4 +- src/ir/semantic_diagnostics.zig | 111 +++++++++--------- src/ir/type_bridge.zig | 11 +- src/ir/type_resolver.test.zig | 24 +++- src/ir/type_resolver.zig | 27 +++-- src/parser.zig | 72 +++++++----- 26 files changed, 441 insertions(+), 212 deletions(-) create mode 100644 examples/0154-types-backtick-raw-type-reference.sx create mode 100644 examples/0155-types-backtick-typed-const-union-tag.sx delete mode 100644 examples/1139-diagnostics-backtick-raw-not-a-type.sx create mode 100644 examples/1141-diagnostics-reserved-name-type-decl.sx create mode 100644 examples/expected/0154-types-backtick-raw-type-reference.exit rename examples/expected/{1139-diagnostics-backtick-raw-not-a-type.stdout => 0154-types-backtick-raw-type-reference.stderr} (100%) create mode 100644 examples/expected/0154-types-backtick-raw-type-reference.stdout create mode 100644 examples/expected/0155-types-backtick-typed-const-union-tag.exit create mode 100644 examples/expected/0155-types-backtick-typed-const-union-tag.stderr create mode 100644 examples/expected/0155-types-backtick-typed-const-union-tag.stdout delete mode 100644 examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr rename examples/expected/{1139-diagnostics-backtick-raw-not-a-type.exit => 1141-diagnostics-reserved-name-type-decl.exit} (100%) create mode 100644 examples/expected/1141-diagnostics-reserved-name-type-decl.stderr create mode 100644 examples/expected/1141-diagnostics-reserved-name-type-decl.stdout diff --git a/examples/0151-types-backtick-raw-identifier.sx b/examples/0151-types-backtick-raw-identifier.sx index 186bfbe..5338d22 100644 --- a/examples/0151-types-backtick-raw-identifier.sx +++ b/examples/0151-types-backtick-raw-identifier.sx @@ -1,10 +1,12 @@ // Backtick raw-identifier escape: a leading backtick makes the following -// identifier RAW — its text excludes the backtick and it is NEVER -// type-classified, so a reserved type-name spelling (`s2`, `u8`, …) can be -// used as a value identifier. Exercised in every position: global, local, -// param, struct field + member access, function name + call, and a later -// reference. A *bare* `s2` is still the reserved type name (see -// examples/1119), so the escape is the only way to spell these as values. +// identifier RAW — its text excludes the backtick and it is never the +// reserved/builtin keyword, so a reserved type-name spelling (`s2`, `u8`, …) +// can be used as an ordinary identifier. Exercised in every VALUE position: +// global, local, param, struct field + member access, function name + call, +// and a later reference. (A raw identifier in TYPE position references a +// backtick-declared type instead — see examples/0154.) A *bare* `s2` is still +// the reserved type name (see examples/1119), so the escape is the only way to +// spell these as values. // Regression (issue 0089). #import "modules/std.sx"; diff --git a/examples/0154-types-backtick-raw-type-reference.sx b/examples/0154-types-backtick-raw-type-reference.sx new file mode 100644 index 0000000..9935b45 --- /dev/null +++ b/examples/0154-types-backtick-raw-type-reference.sx @@ -0,0 +1,42 @@ +// Backtick raw identifier in TYPE position (the universal model, issue 0089): +// `` `name `` is the LITERAL identifier `name` used as a type reference, never +// the builtin/reserved spelling. A reserved type spelling (`s2`, `u8`, …) can +// therefore both DECLARE a type (struct / enum / union / error-set / alias) and +// be REFERENCED as that type via the backtick — while a BARE `s2` in type +// position remains the signed-int type (see `add` below) and a bare reserved- +// name declaration still errors (see examples/1141). The backtick is required +// to declare or reference these names; it is never part of the name's text. +// Regression (issue 0089 — attempt-4 universal raw identifier). +#import "modules/std.sx"; + +// Type-introducing decls whose NAME is a reserved spelling. +`s2 :: struct { x: s64; } +`s8 :: enum { A; B; } +`u16 :: union { i: s32; f: f32; } +`u32 :: error { Bad, Empty } +RawAlias :: `s2; // alias to a backtick-declared struct + +// A bare `s2` in type position is still the 2-bit signed int. +add :: (a: s2, b: s2) -> s2 { return a + b; } + +main :: () -> s32 { + // Reference the backtick struct as a type; field access works. + v : `s2 = ---; + v.x = 7; + + // Reference via a normal alias too. + a : RawAlias = ---; + a.x = 11; + + // Backtick enum / union type references. + e : `s8 = .A; + u : `u16 = ---; + u.i = 5; + + print("struct = {}\n", v.x); + print("alias = {}\n", a.x); + print("enum = {}\n", e == .A); + print("union = {}\n", u.i); + print("bare = {}\n", add(1, 0)); // bare s2 = the 2-bit int type + return 0; +} diff --git a/examples/0155-types-backtick-typed-const-union-tag.sx b/examples/0155-types-backtick-typed-const-union-tag.sx new file mode 100644 index 0000000..476e65e --- /dev/null +++ b/examples/0155-types-backtick-typed-const-union-tag.sx @@ -0,0 +1,24 @@ +// Backtick raw identifier at the two remaining binding positions (issue 0089, +// attempt-4): a TYPED constant (`` `s2 : s64 : 5 ``) and a union TAG / field +// (`` `s2: s32 ``). The typed-const form previously slipped past the decl check +// without a name span (caret at 1:1); a bare `s2 : s64 : 5` is still rejected +// with the caret ON the name (see examples/1141). A union tag spelled with a +// reserved name works and is accessible bare or backticked. +// Regression (issue 0089 — attempt-4 typed const + union tag). +#import "modules/std.sx"; + +// Typed constant whose name is a reserved type spelling. +`s2 : s64 : 5; + +// Union whose tags are reserved type spellings. +Mix :: union { `s1: s32; `u8: f32; } + +main :: () -> s32 { + print("typed const = {}\n", `s2); + + m : Mix = ---; + m.`s1 = 42; + print("union tick = {}\n", m.`s1); // backtick member access + print("union bare = {}\n", m.s1); // bare member access — same field + return 0; +} diff --git a/examples/1139-diagnostics-backtick-raw-not-a-type.sx b/examples/1139-diagnostics-backtick-raw-not-a-type.sx deleted file mode 100644 index bfdb943..0000000 --- a/examples/1139-diagnostics-backtick-raw-not-a-type.sx +++ /dev/null @@ -1,12 +0,0 @@ -// A backtick raw identifier is a VALUE-name escape; it is never a type. Using -// one in type position (`x : `s2 = 1`) is a clean parse error, not a silent -// type-classification — reserved type names are the lowercase `sN`/`uN`/`fNN` -// spellings, and a real type never needs a backtick. A *bare* `s2` in type -// position remains the reserved signed-int type. -// Regression (issue 0089 — attempt-2: raw identifier rejected in type position). -#import "modules/std.sx"; - -main :: () -> s32 { - x : `s2 = 1; - return 0; -} diff --git a/examples/1141-diagnostics-reserved-name-type-decl.sx b/examples/1141-diagnostics-reserved-name-type-decl.sx new file mode 100644 index 0000000..d7cc59c --- /dev/null +++ b/examples/1141-diagnostics-reserved-name-type-decl.sx @@ -0,0 +1,22 @@ +// A reserved/builtin type-name spelling is rejected as the NAME of EVERY +// type-introducing `::` declaration too — struct, enum, union, error-set, and +// a typed constant — not just `:=` / value-const / function names (those are +// examples/1140). Each is a declaration-name binding site: a bare reserved +// spelling there mis-classifies and is rejected, exactly like `s2 := …`. The +// backtick escape (`` `s2 :: struct{…} ``, examples/0154) is the only way to +// spell these names in handwritten sx; `#import c` foreign decls stay exempt +// (examples/1220). +// +// Regression (issue 0089 — attempt-4: 0076 holds across every decl kind). +// Expected: one error per declaration, each caret ON the declared name; exit 1. +#import "modules/std.sx"; + +s8 :: struct { v: s64; } +s16 :: enum { A; B; } +u16 :: union { a: s32; b: f32; } +u32 :: error { Bad, Empty } +s2 : s64 : 5; + +main :: () -> s32 { + return 0; +} diff --git a/examples/expected/0154-types-backtick-raw-type-reference.exit b/examples/expected/0154-types-backtick-raw-type-reference.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0154-types-backtick-raw-type-reference.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout b/examples/expected/0154-types-backtick-raw-type-reference.stderr similarity index 100% rename from examples/expected/1139-diagnostics-backtick-raw-not-a-type.stdout rename to examples/expected/0154-types-backtick-raw-type-reference.stderr diff --git a/examples/expected/0154-types-backtick-raw-type-reference.stdout b/examples/expected/0154-types-backtick-raw-type-reference.stdout new file mode 100644 index 0000000..7bd00e5 --- /dev/null +++ b/examples/expected/0154-types-backtick-raw-type-reference.stdout @@ -0,0 +1,5 @@ +struct = 7 +alias = 11 +enum = true +union = 5 +bare = 1 diff --git a/examples/expected/0155-types-backtick-typed-const-union-tag.exit b/examples/expected/0155-types-backtick-typed-const-union-tag.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0155-types-backtick-typed-const-union-tag.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0155-types-backtick-typed-const-union-tag.stderr b/examples/expected/0155-types-backtick-typed-const-union-tag.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0155-types-backtick-typed-const-union-tag.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0155-types-backtick-typed-const-union-tag.stdout b/examples/expected/0155-types-backtick-typed-const-union-tag.stdout new file mode 100644 index 0000000..b7e6c01 --- /dev/null +++ b/examples/expected/0155-types-backtick-typed-const-union-tag.stdout @@ -0,0 +1,3 @@ +typed const = 5 +union tick = 42 +union bare = 42 diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr b/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr deleted file mode 100644 index 8608dcb..0000000 --- a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.stderr +++ /dev/null @@ -1,5 +0,0 @@ -error: `s2` is a raw identifier, not a type — the backtick escape names a value, never a type - --> examples/1139-diagnostics-backtick-raw-not-a-type.sx:10:10 - | -10 | x : `s2 = 1; - | ^^ diff --git a/examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit b/examples/expected/1141-diagnostics-reserved-name-type-decl.exit similarity index 100% rename from examples/expected/1139-diagnostics-backtick-raw-not-a-type.exit rename to examples/expected/1141-diagnostics-reserved-name-type-decl.exit diff --git a/examples/expected/1141-diagnostics-reserved-name-type-decl.stderr b/examples/expected/1141-diagnostics-reserved-name-type-decl.stderr new file mode 100644 index 0000000..55360ea --- /dev/null +++ b/examples/expected/1141-diagnostics-reserved-name-type-decl.stderr @@ -0,0 +1,29 @@ +error: 's8' is a reserved type name and cannot be used as an identifier + --> examples/1141-diagnostics-reserved-name-type-decl.sx:14:1 + | +14 | s8 :: struct { v: s64; } + | ^^ + +error: 's16' is a reserved type name and cannot be used as an identifier + --> examples/1141-diagnostics-reserved-name-type-decl.sx:15:1 + | +15 | s16 :: enum { A; B; } + | ^^^ + +error: 'u16' is a reserved type name and cannot be used as an identifier + --> examples/1141-diagnostics-reserved-name-type-decl.sx:16:1 + | +16 | u16 :: union { a: s32; b: f32; } + | ^^^ + +error: 'u32' is a reserved type name and cannot be used as an identifier + --> examples/1141-diagnostics-reserved-name-type-decl.sx:17:1 + | +17 | u32 :: error { Bad, Empty } + | ^^^ + +error: 's2' is a reserved type name and cannot be used as an identifier + --> examples/1141-diagnostics-reserved-name-type-decl.sx:18:1 + | +18 | s2 : s64 : 5; + | ^^ diff --git a/examples/expected/1141-diagnostics-reserved-name-type-decl.stdout b/examples/expected/1141-diagnostics-reserved-name-type-decl.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1141-diagnostics-reserved-name-type-decl.stdout @@ -0,0 +1 @@ + diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index c5caafd..8c0df3d 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -1,62 +1,68 @@ # 0089 — backtick raw-identifier escape + `#import c` foreign-name exemption from the reserved-type-name rule -> **✅ RESOLVED** (foundation step F0.6). Two mechanisms, per Agra's design ruling: +> **✅ RESOLVED** (foundation step F0.6). Two mechanisms, per Agra's design +> ruling; the final shape is the **universal raw identifier** (attempt 4): +> `` `name `` is THE LITERAL identifier `name`, usable in EVERY position — value, +> declaration, AND type — meaning only "treat this token as a plain identifier, +> never the reserved keyword/type." The backtick is never part of the name's text. > > 1. **Backtick raw identifier.** The lexer recognises a leading backtick > (`` `s2 ``) and emits an `.identifier` token whose span excludes the backtick, -> carrying a `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). A raw -> identifier is NEVER type-classified — the parser skips `Type.fromName` for it -> in expression position ([src/parser.zig] `parsePrimary`), so it is always a -> value identifier. The `is_raw` flag threads through `ast.Identifier` and EVERY -> binding/capture form ([src/ast.zig]): `VarDecl` / `Param` plus `IfExpr` / -> `WhileExpr` optional bindings, `ForExpr` capture + index, `MatchArm` capture, -> `CatchExpr` / `OnFailStmt` tag bindings, `DestructureDecl` per-name, and the -> protocol-default-body / foreign-class method param lists. `UnknownTypeChecker` -> skips the reserved-name check at each of those arms when raw -> ([src/ir/semantic_diagnostics.zig]). The backtick works in every identifier -> position (local, global, param, field, function name, struct member, later -> reference, and all the control-flow/capture/binding forms). +> carrying a `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). The flag +> threads through `ast.Identifier`, `ast.TypeExpr`, and EVERY binding / capture / +> declaration node ([src/ast.zig]): `VarDecl` / `ConstDecl` / `Param` / `FnDecl` +> plus `IfExpr` / `WhileExpr` optional bindings, `ForExpr` capture + index, +> `MatchArm` capture, `CatchExpr` / `OnFailStmt` tag bindings, `DestructureDecl` +> per-name, protocol-default / foreign-class method params, AND every +> type-introducing decl — `StructDecl` / `EnumDecl` / `UnionDecl` / +> `ErrorSetDecl` / `ProtocolDecl` / `ForeignClassDecl` / `UfcsAlias` / +> `NamespaceDecl` / `ImportDecl` / `CImportDecl` / `LibraryDecl`. > -> The `::` DECLARATION forms are binding sites too and are equally covered -> (F0.6 attempt-3): a bare reserved-name **constant** (`s2 :: 5`), **function** -> (`s2 :: (…) {…}`, incl. struct/impl methods), or **type** declaration -> (`struct`/`enum`/`union`/`error`/alias/`protocol`/foreign-class/ufcs/namespace) -> is rejected, exactly like `s2 := …`. `ConstDecl`/`FnDecl` carry `is_raw` + -> `name_span` threaded from the parser (`parseConstBinding`/`parseFnDecl`), so the -> backtick form (`` `s2 :: … ``) is exempt; the compiler's own builtin definition -> (`string :: []u8 #builtin`) is the sole non-backtick exemption (a `#builtin` -> constant defines the reserved type). This closed the attempt-2 hole where a -> bare `s2 :: (…) {…}` compiled silently and the call rewrite made it callable. +> - **Value position.** The parser skips `Type.fromName` for a raw identifier +> in expression position ([src/parser.zig] `parsePrimary`), so `` `s2 `` is a +> value identifier; a later bare reference resolves to the binding. +> - **Type position.** `parseTypeExpr` emits a raw `type_expr` (no qualified / +> `Closure` / parameterized continuation). Resolution skips the builtin +> classifier (`TypeResolver.resolveNamed`'s `skip_builtin`, threaded from +> `te.is_raw` in [src/ir/lower.zig] and [src/ir/type_bridge.zig]) and looks up +> a `` `s2 ``-declared type (struct / enum / union / alias), else a NORMAL +> "unknown type 's2'" error (`UnknownTypeChecker.reportIfUnknownType` skips the +> builtin-name exemption when raw). A bare `s2` in type position is still the +> builtin int. +> - **Declaration position.** A bare reserved-name declaration of EVERY kind +> still errors (issue 0076 preserved); the backtick form is exempt. The check +> and the exemption are made structurally symmetric: +> `checkBindingName` / `checkDeclName` ([src/ir/semantic_diagnostics.zig]) take +> `is_raw` as a REQUIRED argument and skip inside the check — no call site can +> validate a name without also honoring the exemption, which is what kept the +> two from desyncing across the earlier attempts. > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign -> `#foreign` decls with `Param.is_raw = true`, so generated C param names that -> collide with reserved type names (`s1`, `s2`) import unedited. +> `#foreign` decls with `Param.is_raw = true` (and the synthesized `FnDecl` +> `is_raw = true`), so generated C names that collide with reserved type names +> (`s1`, `s2`) import unedited and a reserved-name foreign fn is bare-callable. > -> **Boundary rules.** A raw identifier is a value name and is NEVER a type: using -> one in type position (`x : `s2 = 1`) is a clean parse error ([src/parser.zig] -> `parseTypeExpr` atom). A reserved-spelled FUNCTION (backtick-declared or -> `#import c` foreign) is bare-callable: `lowerCall` rewrites a `.type_expr` callee -> to an identifier when a function **of RAW provenance** of that name is in scope -> ([src/ir/lower.zig]) — the rewrite is scoped to the callee `FnDecl`'s `is_raw` -> flag (F0.6 attempt-3), so it only ever fires for a backtick / `#import c` foreign -> fn (the decl check guarantees no bare reserved-name fn exists), so `s2(4)` -> resolves to the function (`TypeName(val)` is not a cast). A later BARE -> reference in value position resolves to the binding; a bare `s2` in type position -> is still the type. +> **Bare-callable foreign / backtick fn.** `lowerCall` rewrites a `.type_expr` +> callee to an identifier when a function **of RAW provenance** of that name is in +> scope ([src/ir/lower.zig]) — scoped to the callee `FnDecl`'s `is_raw` flag, so it +> only ever fires for a backtick / `#import c` foreign fn (the decl check guarantees +> no bare reserved-name fn exists). `s2(4)` resolves to the function (`TypeName(val)` +> is not a cast). > -> A *bare* reserved-name binding in sx still errors (issue 0076 preserved): the -> `is_raw`-gated skip only fires for backtick / foreign names. Regression tests: -> `examples/0151-types-backtick-raw-identifier.sx` (backtick, decl positions), -> `examples/0152-types-backtick-control-flow.sx` (every control-flow/capture form -> + bare ref/call/member access), `examples/1054-errors-backtick-reserved-binding.sx` -> (`catch`/`onfail` tag bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` -> (foreign param + function-name exemption, bare-callable foreign fn), -> `examples/1139-diagnostics-backtick-raw-not-a-type.sx` (negative — raw in type -> position), `examples/1119`/`1121`/`1123` (negative — bare reserved binding still -> rejected across all forms), -> `examples/0153-types-backtick-const-fn-decl.sx` (positive — backtick `::` const + -> function decl, bare + backtick call), and -> `examples/1140-diagnostics-reserved-name-const-fn-decl.sx` (negative — bare `::` -> const + function decl rejected). Backtick lexer unit tests in `src/lexer.zig`. +> **Regression tests.** `examples/0151-types-backtick-raw-identifier.sx` (every +> VALUE position), `examples/0152-types-backtick-control-flow.sx` (every +> control-flow / capture form), `examples/0153-types-backtick-const-fn-decl.sx` +> (backtick `::` const + fn decl, bare + backtick call), +> `examples/0154-types-backtick-raw-type-reference.sx` (raw in TYPE position — +> struct / enum / union / alias decl + reference; bare `s2` still the int), +> `examples/0155-types-backtick-typed-const-union-tag.sx` (typed const + union tag), +> `examples/1054-errors-backtick-reserved-binding.sx` (`catch`/`onfail` tag +> bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign +> param + fn-name exemption, bare-callable foreign fn); negatives +> `examples/1119`/`1121`/`1123` (bare reserved binding across forms), +> `examples/1140-diagnostics-reserved-name-const-fn-decl.sx` (bare const + fn decl), +> `examples/1141-diagnostics-reserved-name-type-decl.sx` (bare struct / enum / union +> / error / typed-const decl). Backtick lexer + `resolveNamed(skip_builtin)` unit +> tests in `src/lexer.zig` / `src/ir/type_resolver.test.zig`. > > The original report is preserved below. diff --git a/readme.md b/readme.md index 5473311..e4f6fc0 100644 --- a/readme.md +++ b/readme.md @@ -105,26 +105,31 @@ y : s32 = 0; // explicit type z : s32 = ---; // uninitialized ``` -Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and can't be used -as bare identifiers at **any** binding site — a value binding (`:=` / typed local / -parameter), a `::` constant or function declaration, or a `::` type declaration -(`struct` / `enum` / `union` / alias / `protocol` / …) — each is an error -(`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). A leading -backtick at the binding site escapes one into a raw identifier — its text drops the -backtick and it's never read as a type — so reserved spellings (and keywords) work -as ordinary names. The backtick is needed only where the name is declared; a later -bare reference in value position resolves to the binding, while a bare `s2` in type -position is still the type. It works in every identifier position (local, global, -parameter, field, function name, constant, and the control-flow / capture / binding -forms — destructure, `if`/`while` binding, `for` capture, match capture, -`catch`/`onfail` tag), and a reserved-spelled function is bare-callable: +Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and a *bare* +spelling can't be used as an identifier at **any** binding site — a value binding +(`:=` / typed local / parameter), a `::` constant or function declaration, or a +`::` type declaration (`struct` / `enum` / `union` / alias / `protocol` / …) — each +is an error (`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). A +leading backtick escapes one into a **raw identifier**: `` `name `` is the literal +identifier `name` (the backtick drops out of the text), usable in **every** +position — value, declaration, and type. It is the only way handwritten sx can +spell a reserved name. ```sx -`s2 := 2.5; // value identifier "s2", distinct from the s2 type -print("{}\n", `s2); // 2.5 (or bare `s2`) +`s2 := 2.5; // identifier "s2", distinct from the s2 type +print("{}\n", `s2); // 2.5 (or bare `s2` in value position) + +`s2 :: struct { x: s64; } // declare a type named with a reserved spelling +v : `s2 = ---; // and reference it as a type — resolves to the struct +x : s2 = 3; // bare `s2` in type position is still the int type ``` -A raw identifier is a value name, never a type — `x : `s2 = 1` is an error. +It works in every identifier position — local, global, parameter, struct field, +union tag, function name, type/alias/import name, constant, and the control-flow / +capture / binding forms (destructure, `if`/`while` binding, `for` capture, match +capture, `catch`/`onfail` tag) — and a reserved-spelled function is bare-callable +(`s2(10)`). A backtick name used as a type resolves to a `` `name ``-declared type, +else a normal `unknown type` error. Foreign declarations from `#import c { … }` are exempt automatically: C names that collide with reserved type names (e.g. `s1`, `s2`) import unedited, and a foreign diff --git a/specs.md b/specs.md index fe69040..56ca1d2 100644 --- a/specs.md +++ b/specs.md @@ -38,34 +38,49 @@ sole exception: a `#builtin` constant defines the reserved type and is allowed.) #### Backtick raw-identifier escape -A leading backtick makes the following identifier **raw**: its text excludes the -backtick and it is never type-classified, so a reserved-type-name spelling can be -used as an ordinary value identifier. The backtick is required at the **binding -site** — the declaration that introduces the name — to escape the reserved-name -rule. A later reference is resolved by position: in **value** position a bare `s2` -resolves to the binding; in **type** position a bare `s2` is still the signed-int -type. +A leading backtick makes the following token a **raw identifier**: `` `name `` is +the **literal identifier** `name` — "treat this token as a plain identifier, never +the reserved keyword/type." The backtick is not part of the name's text (the text +is `name`), and the escape is usable in **every position**: value, declaration, +**and type**. It is the only way handwritten sx can spell a reserved name. ```sx -`s2 := 2.5; // OK — value identifier "s2", distinct from the s2 type +`s2 := 2.5; // OK — identifier "s2", distinct from the s2 type print("{}\n", `s2); // 2.5 (backtick reference) -print("{}\n", s2); // 2.5 (bare reference, resolves to the binding) -x : s2 = 3; // bare `s2` in TYPE position is still the s2 type +print("{}\n", s2); // 2.5 (bare reference in value position → the binding) +x : s2 = 3; // bare `s2` in TYPE position is still the s2 int type ``` -A raw identifier is a value name and is **never a type**: using one in type -position (`x : `s2 = 1`) is a parse error. +**Type position.** A backtick in type position is the literal name used as a type +reference: it resolves to a `` `s2 ``-declared type (struct / enum / union / type +alias / …), and never the builtin. A bare `s2` in type position stays the builtin +int; a backtick name with no matching declaration is a normal `unknown type 's2'` +error. -The escape works in **every identifier position** — local, global, parameter, -struct field, function name, a later reference, and every control-flow / capture / -binding form: a destructure name, an `if` / `while` optional binding, a `for` -capture and index, a match-arm capture, and a `catch` / `onfail` tag binding: +```sx +`s2 :: struct { x: s64; } // declare a type whose name is a reserved spelling +v : `s2 = ---; // reference it as a type — resolves to the struct +v.x = 7; +x : s2 = 3; // bare `s2` is still the 2-bit signed int +``` + +**Declaration position.** A *bare* reserved-name declaration of every kind still +errors (a value binding, a `::` constant / function, and a `::` type / alias / +protocol / foreign-class / ufcs / namespaced-import name); the backtick form is +exempt. The escape works in **every identifier position** — local, global, +parameter, struct field, union tag, function name, type/alias/import name, a later +reference, and every control-flow / capture / binding form (destructure name, +`if` / `while` optional binding, `for` capture and index, match-arm capture, and a +`catch` / `onfail` tag binding): ```sx `u8 := 100; // global `s2 :: 2.5; // constant declaration +`s2 : s64 : 5; // typed constant declaration `u8 :: (`s1: s64) -> s64 { `s1 } // function name + parameter P :: struct { `s2: f64; } // struct field +M :: union { `s1: s32; } // union tag +`u16 :: enum { A; B; } // type-declaration name `u8, rest := pair(); // destructure name if `s16 := maybe() { } // optional binding for xs: (`bool, `u16) { } // for capture + index @@ -73,8 +88,8 @@ x catch `s2 { } // catch tag binding ``` A reserved-spelled **function** is bare-callable: `` `s2 :: (n: s64) -> s64 { … } `` -can be invoked as `s2(10)` (the callee spelling parses as a type but resolves to -the function when one of that name is in scope; `TypeName(val)` is not a cast). +can be invoked as `s2(10)` (the bare callee spelling parses as a type but resolves +to the function when one of that name is in scope; `TypeName(val)` is not a cast). A backtick may also escape a keyword spelling (`` `for ``, `` `struct ``), yielding an identifier with that text. diff --git a/src/ast.zig b/src/ast.zig index d3b49de..f3c3541 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -380,12 +380,19 @@ pub const EnumDecl = struct { is_flags: bool = false, variant_values: []const ?*Node = &.{}, // explicit value per variant (null = auto), empty = all auto backing_type: ?*Node = null, // optional backing type: enum u8 { ... } + /// True when the declared NAME was a backtick raw identifier + /// (`` `s2 :: enum { … } ``) — exempt from the reserved-type-name decl + /// check (issue 0089). A bare reserved-name decl still errors. + is_raw: bool = false, }; pub const UnionDecl = struct { name: []const u8, field_names: []const []const u8, field_types: []const *Node, + /// True when the declared NAME was a backtick raw identifier — exempt from + /// the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; /// `Foo :: error { TagA, TagB }` — a named error set. Tags are bare @@ -393,6 +400,9 @@ pub const UnionDecl = struct { pub const ErrorSetDecl = struct { name: []const u8, tag_names: []const []const u8, + /// True when the declared NAME was a backtick raw identifier — exempt from + /// the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const StructTypeParam = struct { @@ -418,6 +428,10 @@ pub const StructDecl = struct { using_entries: []const UsingEntry = &.{}, methods: []const *Node = &.{}, // fn_decl nodes for struct methods constants: []const *Node = &.{}, // const_decl nodes for struct-level constants + /// True when the declared NAME was a backtick raw identifier + /// (`` `s2 :: struct { … } ``) — exempt from the reserved-type-name decl + /// check (issue 0089). A bare reserved-name decl still errors. + is_raw: bool = false, }; pub const StructFieldInit = struct { @@ -444,6 +458,12 @@ pub const TypeExpr = struct { name: []const u8, is_generic: bool = false, protocol_constraints: []const []const u8 = &.{}, // e.g. ["Eq", "Hashable"] for $T/Eq/Hashable + /// True when written as a backtick raw identifier in type position + /// (`` `s2 ``). Such a reference is the LITERAL name `s2` used as a type — + /// resolution skips the builtin/reserved classifier and looks up a + /// `` `s2 ``-declared type (struct/enum/union/alias), else "unknown type" + /// (issue 0089). A bare `s2` keeps `is_raw = false` and is the int type. + is_raw: bool = false, }; /// `$[]` in type position. Resolves to the i-th @@ -530,6 +550,10 @@ pub const ReturnStmt = struct { pub const ImportDecl = struct { path: []const u8, name: ?[]const u8, + /// True when the namespace NAME was a backtick raw identifier + /// (`` `s2 :: #import "…" ``) — exempt from the reserved-type-name decl + /// check (issue 0089). A flat `#import` (name == null) binds nothing. + is_raw: bool = false, }; pub const ArrayTypeExpr = struct { @@ -638,6 +662,9 @@ pub const SpreadExpr = struct { pub const NamespaceDecl = struct { name: []const u8, decls: []const *Node, + /// True when the namespace NAME was a backtick raw identifier — exempt + /// from the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const ForeignExpr = struct { @@ -648,6 +675,9 @@ pub const ForeignExpr = struct { pub const LibraryDecl = struct { lib_name: []const u8, name: []const u8, // sx-side constant name + /// True when the constant NAME was a backtick raw identifier — exempt from + /// the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const FrameworkDecl = struct { @@ -691,6 +721,9 @@ pub const TupleElement = struct { pub const UfcsAlias = struct { name: []const u8, target: []const u8, + /// True when the alias NAME was a backtick raw identifier — exempt from + /// the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const CImportDecl = struct { @@ -700,6 +733,9 @@ pub const CImportDecl = struct { flags: []const []const u8, name: ?[]const u8 = null, bitcode_paths: []const []const u8 = &.{}, // populated during import resolution + /// True when the namespace NAME was a backtick raw identifier — exempt + /// from the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const ProtocolMethodDecl = struct { @@ -720,6 +756,9 @@ pub const ProtocolDecl = struct { methods: []const ProtocolMethodDecl, is_inline: bool = false, // #inline — embedded fn ptrs instead of vtable pointer type_params: []const StructTypeParam = &.{}, // for `protocol(Target: Type) { ... }` + /// True when the declared NAME was a backtick raw identifier — exempt from + /// the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const ForeignRuntime = enum { @@ -776,6 +815,9 @@ pub const ForeignClassDecl = struct { members: []const ForeignClassMember = &.{}, is_foreign: bool = false, // `#foreign #...` prefix — class is provided by the foreign runtime; we only reference it is_main: bool = false, // `#jni_main` / `#objc_main` — class is the launchable entry (Activity / UIApplicationDelegate / ...) + /// True when the sx-side alias NAME was a backtick raw identifier — exempt + /// from the reserved-type-name decl check (issue 0089). + is_raw: bool = false, }; pub const JniEnvBlock = struct { diff --git a/src/imports.zig b/src/imports.zig index 5ce9de5..6afe18d 100644 --- a/src/imports.zig +++ b/src/imports.zig @@ -354,6 +354,7 @@ pub const ResolvedModule = struct { name: []const u8, other: ResolvedModule, span: ast.Span, + is_raw: bool, ) !void { const ns_node = try allocator.create(Node); ns_node.* = .{ @@ -361,6 +362,10 @@ pub const ResolvedModule = struct { .data = .{ .namespace_decl = .{ .name = name, .decls = other.decls, + // Carry the backtick raw escape from the `name :: #import …` + // form so a reserved-name namespace is exempt from the decl + // check, symmetric to every other decl site (issue 0089). + .is_raw = is_raw, } }, }; try self.scope.put(name, {}); @@ -487,6 +492,7 @@ pub fn resolveImports( .data = .{ .namespace_decl = .{ .name = ns_name, .decls = try ns_decls.toOwnedSlice(allocator), + .is_raw = ci.is_raw, } }, }; ns_node.source_file = file_path; @@ -569,7 +575,7 @@ pub fn resolveImports( }; if (imp.name) |ns_name| { - try mod.addNamespace(allocator, &decl_list, &own_decl_list, &seen_in_list, ns_name, imported_mod, decl.span); + try mod.addNamespace(allocator, &decl_list, &own_decl_list, &seen_in_list, ns_name, imported_mod, decl.span, imp.is_raw); } else { try mod.mergeFlat(allocator, &decl_list, &seen_in_list, &seen_nodes, imported_mod); } diff --git a/src/ir/lower.zig b/src/ir/lower.zig index aa536b9..79bc7d3 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -11892,8 +11892,8 @@ pub const Lowering = struct { // type_bridge, which now takes the alias map as an explicit argument // (the `TypeTable.aliases` borrow is gone, A2.3). switch (node.data) { - .type_expr => |te| return self.typeResolver().resolveName(te.name), - .identifier => |id| return self.typeResolver().resolveName(id.name), + .type_expr => |te| return self.typeResolver().resolveName(te.name, te.is_raw), + .identifier => |id| return self.typeResolver().resolveName(id.name, id.is_raw), // A non-spread tuple literal in a type position is a tuple-type // literal (`(s32, s32)`); validate its elements are types and reject // non-type elements loudly (issue 0067). diff --git a/src/ir/semantic_diagnostics.zig b/src/ir/semantic_diagnostics.zig index 9918099..9dacdb4 100644 --- a/src/ir/semantic_diagnostics.zig +++ b/src/ir/semantic_diagnostics.zig @@ -116,13 +116,16 @@ pub const UnknownTypeChecker = struct { if (node.source_file) |sf| self.diagnostics.current_source_file = sf; switch (node.data) { // ── Binding-introducing nodes: check the name(s), then recurse. ── + // Every site passes the node's own `is_raw` straight to the check — + // never an `if (!is_raw)` call-site guard — so the check and its + // exemption are one operation that cannot be threaded apart (0089). .var_decl => |vd| { - if (!vd.is_raw) self.checkBindingName(vd.name, vd.name_span); + self.checkBindingName(vd.name, vd.name_span, vd.is_raw); if (vd.value) |v| self.checkBindingNames(v); }, .destructure_decl => |dd| { for (dd.names, dd.name_spans, dd.name_is_raw) |n, sp, raw| { - if (!raw) self.checkBindingName(n, sp); + self.checkBindingName(n, sp, raw); } self.checkBindingNames(dd.value); }, @@ -131,7 +134,7 @@ pub const UnknownTypeChecker = struct { // `s2 :: (…) {…}` (free fn or struct/impl method) is rejected, // exactly like `s2 := …`. Backtick (`` `s2 :: … ``) and // `#import c` foreign fns set `is_raw` and are exempt (0089). - if (!fd.is_raw) self.checkBindingName(fd.name, fd.name_span); + self.checkBindingName(fd.name, fd.name_span, fd.is_raw); self.checkParamNames(fd.params); self.checkBindingNames(fd.body); }, @@ -140,29 +143,23 @@ pub const UnknownTypeChecker = struct { self.checkBindingNames(lm.body); }, .param => |p| { - if (!p.is_raw) self.checkBindingName(p.name, p.name_span); + self.checkBindingName(p.name, p.name_span, p.is_raw); if (p.default_expr) |de| self.checkBindingNames(de); }, .if_expr => |ie| { - if (ie.binding_name) |bn| { - if (!ie.binding_is_raw) self.checkBindingName(bn, ie.binding_span); - } + if (ie.binding_name) |bn| self.checkBindingName(bn, ie.binding_span, ie.binding_is_raw); self.checkBindingNames(ie.condition); self.checkBindingNames(ie.then_branch); if (ie.else_branch) |e| self.checkBindingNames(e); }, .while_expr => |we| { - if (we.binding_name) |bn| { - if (!we.binding_is_raw) self.checkBindingName(bn, we.binding_span); - } + if (we.binding_name) |bn| self.checkBindingName(bn, we.binding_span, we.binding_is_raw); self.checkBindingNames(we.condition); self.checkBindingNames(we.body); }, .for_expr => |fe| { - if (fe.capture_name.len != 0 and !fe.capture_is_raw) self.checkBindingName(fe.capture_name, fe.capture_span); - if (fe.index_name) |idx| { - if (!fe.index_is_raw) self.checkBindingName(idx, fe.index_span); - } + if (fe.capture_name.len != 0) self.checkBindingName(fe.capture_name, fe.capture_span, fe.capture_is_raw); + if (fe.index_name) |idx| self.checkBindingName(idx, fe.index_span, fe.index_is_raw); self.checkBindingNames(fe.iterable); if (fe.range_end) |re| self.checkBindingNames(re); self.checkBindingNames(fe.body); @@ -170,31 +167,23 @@ pub const UnknownTypeChecker = struct { .match_expr => |me| { self.checkBindingNames(me.subject); for (me.arms) |arm| { - if (arm.capture) |cap| { - if (!arm.capture_is_raw) self.checkBindingName(cap, arm.capture_span); - } + if (arm.capture) |cap| self.checkBindingName(cap, arm.capture_span, arm.capture_is_raw); if (arm.pattern) |p| self.checkBindingNames(p); self.checkBindingNames(arm.body); } }, .match_arm => |arm| { - if (arm.capture) |cap| { - if (!arm.capture_is_raw) self.checkBindingName(cap, arm.capture_span); - } + if (arm.capture) |cap| self.checkBindingName(cap, arm.capture_span, arm.capture_is_raw); if (arm.pattern) |p| self.checkBindingNames(p); self.checkBindingNames(arm.body); }, .catch_expr => |ce| { - if (ce.binding) |b| { - if (!ce.binding_is_raw) self.checkBindingName(b, ce.binding_span); - } + if (ce.binding) |b| self.checkBindingName(b, ce.binding_span, ce.binding_is_raw); self.checkBindingNames(ce.operand); self.checkBindingNames(ce.body); }, .onfail_stmt => |os| { - if (os.binding) |b| { - if (!os.binding_is_raw) self.checkBindingName(b, os.binding_span); - } + if (os.binding) |b| self.checkBindingName(b, os.binding_span, os.binding_is_raw); self.checkBindingNames(os.body); }, // impl / protocol-default / foreign-class method bodies: each @@ -203,12 +192,12 @@ pub const UnknownTypeChecker = struct { // param/local names mis-lower the same as any other. .impl_block => |ib| for (ib.methods) |m| self.checkBindingNames(m), .protocol_decl => |pd| { - self.checkDeclName(node, pd.name); + self.checkDeclName(node, pd.name, pd.is_raw); for (pd.methods) |m| { if (m.default_body) |body| { for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { - if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; - self.checkBindingName(pn, sp); + const raw = i < m.param_name_is_raw.len and m.param_name_is_raw[i]; + self.checkBindingName(pn, sp, raw); } self.checkBindingNames(body); } @@ -217,12 +206,12 @@ pub const UnknownTypeChecker = struct { .foreign_class_decl => |fcd| { // The sx-side alias (left of `::`) is a user-chosen name, so a // reserved spelling is rejected like any other type decl (0089). - self.checkDeclName(node, fcd.name); + self.checkDeclName(node, fcd.name, fcd.is_raw); for (fcd.members) |member| switch (member) { .method => |m| if (m.body) |body| { for (m.param_names, m.param_name_spans, 0..) |pn, sp, i| { - if (i < m.param_name_is_raw.len and m.param_name_is_raw[i]) continue; - self.checkBindingName(pn, sp); + const raw = i < m.param_name_is_raw.len and m.param_name_is_raw[i]; + self.checkBindingName(pn, sp, raw); } self.checkBindingNames(body); }, @@ -235,7 +224,7 @@ pub const UnknownTypeChecker = struct { // module decls held inline; descend so an imported module's // reserved-name binding is rejected too (issue 0077). .namespace_decl => |nd| { - self.checkDeclName(node, nd.name); + self.checkDeclName(node, nd.name, nd.is_raw); for (nd.decls) |d| self.checkBindingNames(d); }, .const_decl => |cd| { @@ -247,12 +236,12 @@ pub const UnknownTypeChecker = struct { // own name on recursion — don't double-check it here (0089). switch (cd.value.data) { .builtin_expr, .struct_decl, .enum_decl, .union_decl, .error_set_decl, .fn_decl => {}, - else => if (!cd.is_raw) self.checkBindingName(cd.name, cd.name_span), + else => self.checkBindingName(cd.name, cd.name_span, cd.is_raw), } self.checkBindingNames(cd.value); }, .struct_decl => |sd| { - self.checkDeclName(node, sd.name); + self.checkDeclName(node, sd.name, sd.is_raw); for (sd.methods) |m| self.checkBindingNames(m); for (sd.constants) |c| self.checkBindingNames(c); for (sd.field_defaults) |fdef| if (fdef) |d| self.checkBindingNames(d); @@ -319,13 +308,13 @@ pub const UnknownTypeChecker = struct { // spelling as the declared name is rejected (issue 0089). These // have no nested binding sites, so only the name is checked. A // flat `#import`/`#import c` (name == null) binds nothing. ── - .enum_decl => |ed| self.checkDeclName(node, ed.name), - .union_decl => |ud| self.checkDeclName(node, ud.name), - .error_set_decl => |esd| self.checkDeclName(node, esd.name), - .ufcs_alias => |ua| self.checkDeclName(node, ua.name), - .library_decl => |ld| self.checkDeclName(node, ld.name), - .import_decl => |imp| if (imp.name) |n| self.checkDeclName(node, n), - .c_import_decl => |cid| if (cid.name) |n| self.checkDeclName(node, n), + .enum_decl => |ed| self.checkDeclName(node, ed.name, ed.is_raw), + .union_decl => |ud| self.checkDeclName(node, ud.name, ud.is_raw), + .error_set_decl => |esd| self.checkDeclName(node, esd.name, esd.is_raw), + .ufcs_alias => |ua| self.checkDeclName(node, ua.name, ua.is_raw), + .library_decl => |ld| self.checkDeclName(node, ld.name, ld.is_raw), + .import_decl => |imp| if (imp.name) |n| self.checkDeclName(node, n, imp.is_raw), + .c_import_decl => |cid| if (cid.name) |n| self.checkDeclName(node, n, cid.is_raw), // ── Leaves & pure type-expression nodes: no binding sites below. ── // Type-expression subtrees carry only type names (no value // bindings). Listing each tag explicitly (rather than an `else`) is @@ -370,8 +359,9 @@ pub const UnknownTypeChecker = struct { fn checkParamNames(self: UnknownTypeChecker, params: []const ast.Param) void { for (params) |p| { // A backtick raw param (`` (`s2: T) ``) or a `#import c` foreign - // param is exempt from the reserved-type-name rule (issue 0089). - if (!p.is_raw) self.checkBindingName(p.name, p.name_span); + // param is exempt from the reserved-type-name rule (issue 0089) — + // the exemption is honored inside `checkBindingName` via `p.is_raw`. + self.checkBindingName(p.name, p.name_span, p.is_raw); if (p.default_expr) |de| self.checkBindingNames(de); } } @@ -708,8 +698,8 @@ pub const UnknownTypeChecker = struct { switch (node.data) { // A `$`-prefixed name (`-> $R`) introduces/references a generic type // param inline — always valid in a type position. - .type_expr => |te| if (!te.is_generic) self.reportIfUnknownType(te.name, node.span, declared, in_scope, type_vals), - .identifier => |id| self.reportIfUnknownType(id.name, node.span, declared, in_scope, type_vals), + .type_expr => |te| if (!te.is_generic) self.reportIfUnknownType(te.name, node.span, declared, in_scope, type_vals, te.is_raw), + .identifier => |id| self.reportIfUnknownType(id.name, node.span, declared, in_scope, type_vals, id.is_raw), .pointer_type_expr => |pt| self.checkTypeNodeForUnknown(pt.pointee_type, declared, in_scope, type_vals), .many_pointer_type_expr => |mp| self.checkTypeNodeForUnknown(mp.element_type, declared, in_scope, type_vals), .slice_type_expr => |st| self.checkTypeNodeForUnknown(st.element_type, declared, in_scope, type_vals), @@ -753,11 +743,17 @@ pub const UnknownTypeChecker = struct { declared: *std.StringHashMap(void), in_scope: []const ast.StructTypeParam, type_vals: []const []const u8, + is_raw: bool, ) void { // Only bare identifiers are validated. Inline-spelled compound types // (`[:0]u8`, `mod.Type`, …) carry non-identifier characters — trust them. if (!isIdentLike(name)) return; - if (isBuiltinTypeName(name)) return; + // A backtick raw reference (`` `s2 ``) is the LITERAL name used as a + // type — explicitly NOT the builtin/reserved spelling — so it must + // resolve to a `` `s2 ``-declared type, else a normal "unknown type" + // error. Skip the builtin-name exemption that would otherwise wave a + // bare `s2` through (issue 0089). + if (!is_raw and isBuiltinTypeName(name)) return; for (in_scope) |tp| if (std.mem.eql(u8, tp.name, name)) return; if (declared.contains(name)) return; // Registered as a real (non-stub) type — covers imported concrete @@ -789,7 +785,14 @@ pub const UnknownTypeChecker = struct { /// (LLVM verifier abort, or a silent mutation-losing copy). Rejecting the /// name here, before lowering, keeps the `.identifier`-only address-of paths /// correct without any lowering special-case. - fn checkBindingName(self: UnknownTypeChecker, name: []const u8, span: ?ast.Span) void { + /// `is_raw` is a REQUIRED argument, not a call-site guard: the exemption + /// lives INSIDE the check so no caller can validate a name without also + /// honoring the backtick / `#import c` foreign exemption. This is what keeps + /// the check and the exemption from desyncing — the recurring failure of the + /// earlier attempts, where each site threaded an `if (!is_raw)` guard + /// separately and one was forgotten (issue 0089). + fn checkBindingName(self: UnknownTypeChecker, name: []const u8, span: ?ast.Span, is_raw: bool) void { + if (is_raw) return; if (isReservedTypeName(name)) self.diagnostics.addFmt(.err, span, "'{s}' is a reserved type name and cannot be used as an identifier", .{name}); } @@ -798,12 +801,14 @@ pub const UnknownTypeChecker = struct { /// identifier but carries no dedicated `name_span` field — struct / enum / /// union / error-set / protocol / foreign-class type decls, ufcs aliases, /// and namespaced imports (issue 0089). Each such node begins at its name - /// token, so the name's length isolates the caret onto the name. A - /// backtick raw / `#import c` foreign name never reaches here (those forms - /// are exempt at their own decl path). - fn checkDeclName(self: UnknownTypeChecker, node: *const Node, name: []const u8) void { + /// token (`createNode(name_start, …)`), so the name's length isolates the + /// caret onto the name — a single source for the span, no separate stored + /// field to drift from `node.span`. `is_raw` is REQUIRED, exactly as in + /// `checkBindingName`: a backtick raw / `#import c` foreign name is exempt + /// by construction. + fn checkDeclName(self: UnknownTypeChecker, node: *const Node, name: []const u8, is_raw: bool) void { const span = ast.Span{ .start = node.span.start, .end = node.span.start + @as(u32, @intCast(name.len)) }; - self.checkBindingName(name, span); + self.checkBindingName(name, span, is_raw); } }; diff --git a/src/ir/type_bridge.zig b/src/ir/type_bridge.zig index eef4c67..3d87458 100644 --- a/src/ir/type_bridge.zig +++ b/src/ir/type_bridge.zig @@ -107,8 +107,8 @@ pub fn resolveAstType(node: ?*const Node, table: *TypeTable, alias_map: AliasMap const n = node orelse return .unresolved; const si = StatelessInner{ .table = table, .alias_map = alias_map, .consts = consts }; return switch (n.data) { - .type_expr => |te| resolveTypeName(te.name, table, alias_map), - .identifier => |id| resolveTypeName(id.name, table, alias_map), + .type_expr => |te| resolveTypeName(te.name, table, alias_map, te.is_raw), + .identifier => |id| resolveTypeName(id.name, table, alias_map, id.is_raw), // Structural shapes (`*T`/`[*]T`/`[]T`/`?T`/`[N]T`, functions, plain // closures, plain tuples) are owned by the single canonical // `TypeResolver.resolveCompound` — no independent compound algorithm @@ -174,8 +174,9 @@ pub fn resolveAstType(node: ?*const Node, table: *TypeTable, alias_map: AliasMap /// Resolve a bare type name. The algorithm lives in `type_resolver.zig` /// (`TypeResolver.resolveNamed`, the single source); `type_bridge` forwards the /// caller-threaded `alias_map` (the single-source `ProgramIndex.type_alias_map`). -fn resolveTypeName(name: []const u8, table: *TypeTable, alias_map: AliasMap) TypeId { - return type_resolver.TypeResolver.resolveNamed(name, table, alias_map); +/// `skip_builtin` carries the backtick raw escape (issue 0089). +fn resolveTypeName(name: []const u8, table: *TypeTable, alias_map: AliasMap, skip_builtin: bool) TypeId { + return type_resolver.TypeResolver.resolveNamed(name, table, alias_map, skip_builtin); } /// Builtin primitive keyword → TypeId. The keyword table now lives in @@ -535,7 +536,7 @@ fn resolveInlineErrorSet(esd: *const ast.ErrorSetDecl, table: *TypeTable) TypeId /// resolves to the same empty inferred set, which is correct while no /// function raises (E1.3+). fn resolveErrorType(ete: *const ast.ErrorTypeExpr, table: *TypeTable, alias_map: AliasMap) TypeId { - if (ete.name) |name| return resolveTypeName(name, table, alias_map); + if (ete.name) |name| return resolveTypeName(name, table, alias_map, false); // `!` is not a legal type/identifier name, so this reserved StringId can // never collide with a user-declared set. const name_id = table.internString("!"); diff --git a/src/ir/type_resolver.test.zig b/src/ir/type_resolver.test.zig index 51b0208..70e895d 100644 --- a/src/ir/type_resolver.test.zig +++ b/src/ir/type_resolver.test.zig @@ -144,21 +144,33 @@ test "TypeResolver.resolveName resolves aliases via ProgramIndex (not the TypeTa try index.type_alias_map.put("NodeRef", ptr_s64); // alias → pointer const tr = TypeResolver{ .alloc = alloc, .types = &table, .diagnostics = null, .index = &index }; - try std.testing.expectEqual(@as(TypeId, .u32), tr.resolveName("ShaderHandle")); - try std.testing.expectEqual(ptr_s64, tr.resolveName("NodeRef")); + try std.testing.expectEqual(@as(TypeId, .u32), tr.resolveName("ShaderHandle", false)); + try std.testing.expectEqual(ptr_s64, tr.resolveName("NodeRef", false)); // Primitive is checked before alias. - try std.testing.expectEqual(@as(TypeId, .s64), tr.resolveName("s64")); + try std.testing.expectEqual(@as(TypeId, .s64), tr.resolveName("s64", false)); } test "TypeResolver.resolveNamed: width-int, string-prefix, unknown→stub" { const alloc = std.testing.allocator; var table = TypeTable.init(alloc); defer table.deinit(); - try std.testing.expectEqual(table.intern(.{ .signed = 7 }), TypeResolver.resolveNamed("s7", &table, null)); - try std.testing.expectEqual(table.ptrTo(.s64), TypeResolver.resolveNamed("*s64", &table, null)); + try std.testing.expectEqual(table.intern(.{ .signed = 7 }), TypeResolver.resolveNamed("s7", &table, null, false)); + try std.testing.expectEqual(table.ptrTo(.s64), TypeResolver.resolveNamed("*s64", &table, null, false)); // Unknown name, no alias map → empty-struct stub (preserved behavior; // never `.unresolved`, which is reserved for failed *generic* resolution). - try std.testing.expect(TypeResolver.resolveNamed("Unknown", &table, null) != .unresolved); + try std.testing.expect(TypeResolver.resolveNamed("Unknown", &table, null, false) != .unresolved); +} + +test "TypeResolver.resolveNamed: skip_builtin resolves a raw reserved-name type, not the builtin" { + const alloc = std.testing.allocator; + var table = TypeTable.init(alloc); + defer table.deinit(); + // A registered user type named "s2" (a reserved int spelling). + const name_id = table.internString("s2"); + const user_s2 = table.intern(.{ .@"struct" = .{ .name = name_id, .fields = &.{} } }); + // Bare lookup → the builtin 2-bit signed int; raw lookup → the user type. + try std.testing.expectEqual(table.intern(.{ .signed = 2 }), TypeResolver.resolveNamed("s2", &table, null, false)); + try std.testing.expectEqual(user_s2, TypeResolver.resolveNamed("s2", &table, null, true)); } test "TypeResolver.parseWidthInt: every width 1..64, both signs; rejects out-of-range / non-int" { diff --git a/src/ir/type_resolver.zig b/src/ir/type_resolver.zig index 3b673a1..a4cf58c 100644 --- a/src/ir/type_resolver.zig +++ b/src/ir/type_resolver.zig @@ -244,11 +244,21 @@ pub const TypeResolver = struct { /// `type_bridge` via the alias map threaded through `resolveAstType`. The /// stub fall-through preserves long-standing behavior for as-yet- /// unregistered names. - pub fn resolveNamed(name: []const u8, table: *TypeTable, alias_map: ?*const std.StringHashMap(TypeId)) TypeId { + /// + /// `skip_builtin` is the backtick raw-identifier escape (`` `s2 `` in type + /// position, issue 0089): a raw reference is the LITERAL name used as a + /// type, so it bypasses the builtin/reserved classifier and resolves only + /// through registered-type → alias → stub. A bare `s2` keeps the default + /// (`false`) and resolves to the builtin int type. The string-prefix + /// recursion always passes `false`: the inner names (`*T`/`?T`) are bare, + /// never raw. + pub fn resolveNamed(name: []const u8, table: *TypeTable, alias_map: ?*const std.StringHashMap(TypeId), skip_builtin: bool) TypeId { // Builtin primitive keyword or arbitrary-width integer (`s1`-`s64`, // `u1`-`u64`) — the single builtin classifier, also reused by the // numeric-limit accessor intercept. - if (resolveBuiltinName(name, table)) |id| return id; + if (!skip_builtin) { + if (resolveBuiltinName(name, table)) |id| return id; + } // Sentinel-terminated slice: [:0]u8 → string. if (name.len >= 5 and name[0] == '[' and name[1] == ':') { if (std.mem.indexOfScalar(u8, name, ']')) |close| { @@ -259,15 +269,15 @@ pub const TypeResolver = struct { } // Many-pointer: [*]T. if (name.len >= 4 and name[0] == '[' and name[1] == '*' and name[2] == ']') { - return table.manyPtrTo(resolveNamed(name[3..], table, alias_map)); + return table.manyPtrTo(resolveNamed(name[3..], table, alias_map, false)); } // Pointer: *T. if (name.len >= 2 and name[0] == '*') { - return table.ptrTo(resolveNamed(name[1..], table, alias_map)); + return table.ptrTo(resolveNamed(name[1..], table, alias_map, false)); } // Optional: ?T. if (name.len >= 2 and name[0] == '?') { - return table.optionalOf(resolveNamed(name[1..], table, alias_map)); + return table.optionalOf(resolveNamed(name[1..], table, alias_map, false)); } // Named struct/enum/union — already-registered wins, then alias, then // a fresh empty-struct stub for an as-yet-unregistered name. @@ -280,8 +290,9 @@ pub const TypeResolver = struct { } /// Resolve a bare type name through the canonical alias source - /// (`ProgramIndex.type_alias_map`). - pub fn resolveName(self: TypeResolver, name: []const u8) TypeId { - return resolveNamed(name, self.types, &self.index.type_alias_map); + /// (`ProgramIndex.type_alias_map`). `skip_builtin` carries the backtick raw + /// escape (issue 0089) — see `resolveNamed`. + pub fn resolveName(self: TypeResolver, name: []const u8, skip_builtin: bool) TypeId { + return resolveNamed(name, self.types, &self.index.type_alias_map, skip_builtin); } }; diff --git a/src/parser.zig b/src/parser.zig index 0ef7bc7..8ec384e 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -88,7 +88,7 @@ pub const Parser = struct { // Check for #import c { ... } (C import block) if (self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), "c") and self.peekNext() == .l_brace) { self.advance(); // consume 'c' - return self.parseCImportBlock(start, null); + return self.parseCImportBlock(start, null, false); } if (self.current.tag != .string_literal) { return self.fail("expected string path after '#import'"); @@ -183,7 +183,7 @@ pub const Parser = struct { // Check for name :: #import c { ... } if (self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), "c") and self.peekNext() == .l_brace) { self.advance(); // consume 'c' - return self.parseCImportBlock(start_pos, name); + return self.parseCImportBlock(start_pos, name, name_is_raw); } if (self.current.tag != .string_literal) { return self.fail("expected string path after '#import'"); @@ -192,7 +192,7 @@ pub const Parser = struct { const path = raw[1 .. raw.len - 1]; self.advance(); try self.expect(.semicolon); - return try self.createNode(start_pos, .{ .import_decl = .{ .path = path, .name = name } }); + return try self.createNode(start_pos, .{ .import_decl = .{ .path = path, .name = name, .is_raw = name_is_raw } }); } // Named library: name :: #library "libname"; @@ -205,7 +205,7 @@ pub const Parser = struct { const lib_name = raw[1 .. raw.len - 1]; self.advance(); try self.expect(.semicolon); - return try self.createNode(start_pos, .{ .library_decl = .{ .lib_name = lib_name, .name = name } }); + return try self.createNode(start_pos, .{ .library_decl = .{ .lib_name = lib_name, .name = name, .is_raw = name_is_raw } }); } // Compile-time evaluation: name :: #run expr; @@ -229,22 +229,22 @@ pub const Parser = struct { // Enum declaration if (self.current.tag == .kw_enum) { - return self.parseEnumDecl(name, start_pos); + return self.parseEnumDecl(name, start_pos, name_is_raw); } // Error-set declaration: name :: error { TagA, TagB } if (self.current.tag == .kw_error) { - return self.parseErrorSetDecl(name, start_pos); + return self.parseErrorSetDecl(name, start_pos, name_is_raw); } // Struct declaration if (self.current.tag == .kw_struct) { - return self.parseStructDecl(name, start_pos); + return self.parseStructDecl(name, start_pos, name_is_raw); } // Protocol declaration if (self.current.tag == .kw_protocol) { - return self.parseProtocolDecl(name, start_pos); + return self.parseProtocolDecl(name, start_pos, name_is_raw); } // Foreign-type binding with optional prefix modifiers: @@ -255,12 +255,12 @@ pub const Parser = struct { // `#foreign` flips that to "reference an existing class on the foreign side." // `#jni_main` flags the class as the launchable entry (Android Activity). if (self.tryParseForeignClassPrefix()) |prefix| { - return self.parseForeignClassDecl(name, start_pos, prefix.runtime, prefix.is_foreign, prefix.is_main); + return self.parseForeignClassDecl(name, start_pos, prefix.runtime, prefix.is_foreign, prefix.is_main, name_is_raw); } // C-style union declaration if (self.current.tag == .kw_union) { - return self.parseUnionDecl(name, start_pos); + return self.parseUnionDecl(name, start_pos, name_is_raw); } // UFCS alias: name :: ufcs target; @@ -272,7 +272,7 @@ pub const Parser = struct { const target = self.tokenSlice(self.current); self.advance(); try self.expect(.semicolon); - return try self.createNode(start_pos, .{ .ufcs_alias = .{ .name = name, .target = target } }); + return try self.createNode(start_pos, .{ .ufcs_alias = .{ .name = name, .target = target, .is_raw = name_is_raw } }); } // Function declaration: (params) -> type { body } or () { body } @@ -332,7 +332,7 @@ pub const Parser = struct { return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = value, .name_span = name_span, .is_raw = name_is_raw } }); } - fn parseCImportBlock(self: *Parser, start: u32, name: ?[]const u8) anyerror!*Node { + fn parseCImportBlock(self: *Parser, start: u32, name: ?[]const u8, name_is_raw: bool) anyerror!*Node { try self.expect(.l_brace); var includes = std.ArrayList([]const u8).empty; var sources = std.ArrayList([]const u8).empty; @@ -381,6 +381,7 @@ pub const Parser = struct { .defines = try defines.toOwnedSlice(self.allocator), .flags = try flags.toOwnedSlice(self.allocator), .name = name, + .is_raw = name_is_raw, } }); } @@ -394,7 +395,7 @@ pub const Parser = struct { self.advance(); const value = try self.parseExpr(); try self.expectSemicolonAfter(value); - return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = type_node, .value = value } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = type_node, .value = value, .name_span = name_span, .is_raw = name_is_raw } }); } if (self.current.tag == .equal) { @@ -629,11 +630,16 @@ pub const Parser = struct { } if (self.current.tag.isTypeKeyword() or self.isIdentLike()) { - // A backtick raw identifier (`` `s2 ``) is a VALUE-name escape; it is - // never a type. Reject it in type position rather than silently - // type-classifying it (issue 0089). + // A backtick raw identifier (`` `s2 ``) in type position is the + // LITERAL name `s2` used as a type reference — never the builtin / + // reserved keyword. It is always a plain named-type reference (no + // qualified-path, `Closure`, or parameterized continuation), so emit + // a raw `type_expr` and return; resolution skips the builtin + // classifier and looks up a `` `s2 ``-declared type (issue 0089). if (self.current.is_raw) { - return self.failFmt("`{s}` is a raw identifier, not a type — the backtick escape names a value, never a type", .{self.tokenSlice(self.current)}); + const raw_name = self.tokenSlice(self.current); + self.advance(); + return try self.createNode(start, .{ .type_expr = .{ .name = raw_name, .is_raw = true } }); } var name = self.tokenSlice(self.current); self.advance(); @@ -787,20 +793,20 @@ pub const Parser = struct { } // Inline struct type in type position: struct { ... } if (self.current.tag == .kw_struct) { - return try self.parseStructDecl("__anon", start); + return try self.parseStructDecl("__anon", start, false); } // Inline C-style union in type position: union { ... } if (self.current.tag == .kw_union) { - return try self.parseUnionDecl("__anon", start); + return try self.parseUnionDecl("__anon", start, false); } // Inline enum type in type position: enum { ... } if (self.current.tag == .kw_enum) { - return try self.parseEnumDecl("__anon", start); + return try self.parseEnumDecl("__anon", start, false); } return self.fail("expected type name"); } - fn parseEnumDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseEnumDecl(self: *Parser, name: []const u8, start_pos: u32, name_is_raw: bool) anyerror!*Node { self.advance(); // skip 'enum' // Check for 'flags' modifier: enum flags { ... } @@ -874,10 +880,11 @@ pub const Parser = struct { .is_flags = is_flags, .variant_values = if (has_any_value) try variant_values.toOwnedSlice(self.allocator) else &.{}, .backing_type = backing_type, + .is_raw = name_is_raw, } }); } - fn parseErrorSetDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseErrorSetDecl(self: *Parser, name: []const u8, start_pos: u32, name_is_raw: bool) anyerror!*Node { self.advance(); // skip 'error' try self.expect(.l_brace); var tag_names = std.ArrayList([]const u8).empty; @@ -899,10 +906,11 @@ pub const Parser = struct { return try self.createNode(start_pos, .{ .error_set_decl = .{ .name = name, .tag_names = try tag_names.toOwnedSlice(self.allocator), + .is_raw = name_is_raw, } }); } - fn parseUnionDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseUnionDecl(self: *Parser, name: []const u8, start_pos: u32, name_is_raw: bool) anyerror!*Node { self.advance(); // skip 'union' try self.expect(.l_brace); var field_names = std.ArrayList([]const u8).empty; @@ -914,7 +922,7 @@ pub const Parser = struct { const anon_field = try std.fmt.allocPrint(self.allocator, "__anon_{d}", .{anon_idx}); anon_idx += 1; const anon_struct_name = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ name, anon_field }); - const struct_node = try self.parseStructDecl(anon_struct_name, self.current.loc.start); + const struct_node = try self.parseStructDecl(anon_struct_name, self.current.loc.start, false); try field_names.append(self.allocator, anon_field); try field_types.append(self.allocator, struct_node); if (self.current.tag == .semicolon) { @@ -942,10 +950,11 @@ pub const Parser = struct { .name = name, .field_names = try field_names.toOwnedSlice(self.allocator), .field_types = try field_types.toOwnedSlice(self.allocator), + .is_raw = name_is_raw, } }); } - fn parseStructDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseStructDecl(self: *Parser, name: []const u8, start_pos: u32, name_is_raw: bool) anyerror!*Node { self.advance(); // skip 'struct' // Optional `#compiler` attribute: all methods inside this struct are @@ -1133,10 +1142,11 @@ pub const Parser = struct { .using_entries = try using_entries.toOwnedSlice(self.allocator), .methods = try methods.toOwnedSlice(self.allocator), .constants = try constants.toOwnedSlice(self.allocator), + .is_raw = name_is_raw, } }); } - fn parseProtocolDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + fn parseProtocolDecl(self: *Parser, name: []const u8, start_pos: u32, name_is_raw: bool) anyerror!*Node { self.advance(); // skip 'protocol' // Optional type params: protocol(Target: Type, U: Type) { ... } @@ -1249,6 +1259,7 @@ pub const Parser = struct { .methods = try methods.toOwnedSlice(self.allocator), .is_inline = is_inline, .type_params = try type_params.toOwnedSlice(self.allocator), + .is_raw = name_is_raw, } }); } @@ -1335,7 +1346,7 @@ pub const Parser = struct { }; } - fn parseForeignClassDecl(self: *Parser, name: []const u8, start_pos: u32, runtime: ast.ForeignRuntime, is_foreign: bool, is_main: bool) anyerror!*Node { + fn parseForeignClassDecl(self: *Parser, name: []const u8, start_pos: u32, runtime: ast.ForeignRuntime, is_foreign: bool, is_main: bool, name_is_raw: bool) anyerror!*Node { self.advance(); // skip directive token try self.expect(.l_paren); @@ -1576,6 +1587,7 @@ pub const Parser = struct { .members = try members.toOwnedSlice(self.allocator), .is_foreign = is_foreign, .is_main = is_main, + .is_raw = name_is_raw, } }); } @@ -2820,15 +2832,15 @@ pub const Parser = struct { }, .kw_struct => { // Anonymous struct expression: struct { value: T; count: u32; } - return try self.parseStructDecl("__anon", start); + return try self.parseStructDecl("__anon", start, false); }, .kw_enum => { // Anonymous enum expression: enum { variant: T; other: u32; } - return try self.parseEnumDecl("__anon", start); + return try self.parseEnumDecl("__anon", start, false); }, .kw_union => { // Anonymous C-style union expression: union { f: f32; i: s32; } - return try self.parseUnionDecl("__anon", start); + return try self.parseUnionDecl("__anon", start, false); }, .kw_if => { return self.parseIfExpr(); From ef8f021c0173f311421a7f2352aba56180a922f3 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 21:14:35 +0300 Subject: [PATCH 06/11] =?UTF-8?q?feat(lang):=20universal=20raw=20identifie?= =?UTF-8?q?r=20=E2=80=94=20parser=20exhaustiveness=20+=20raw=20type=20cont?= =?UTF-8?q?inuations=20+=20sema/LSP=20[F0.6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the remaining three F0.6 findings so the universal backtick raw identifier holds in BOTH classifiers and at EVERY parser construction site. 1. Struct-body constants thread is_raw + name_span. The struct-body const forms (untyped `` `s2 :: 5 `` and typed `` `s2 : T : v ``) built the const_decl node without name_span/is_raw, so a backtick const was falsely rejected and a bare reserved-name const caretted at 1:1. They now capture both. Structural cure: `ast.ConstDecl`'s name_span + is_raw carry NO default, so the compiler rejects any construction site that omits them (mirrors checkBindingName's required `is_raw` arg). FnDecl keeps its defaults — every parser fn_decl routes through parseFnDecl whose `name_is_raw` is a required parameter (equivalent guarantee). 2. Raw identifier in TYPE position flows through the normal continuations. parseTypeExpr no longer returns a terminal type_expr for a raw atom; the raw flag rides the atom through the qualified-path / Closure / parameterized continuations, so `` `s2(s64) ``, `` *`s2 ``, `` ?`s2 `` all parse. ParameterizedTypeExpr carries is_raw; resolveParameterizedWithBindings skips the `Vector` intrinsic when raw. 3. sema/LSP (the second classifier) honors is_raw. Type.fromTypeExpr returns null for a raw type_expr; resolveTypeNode skips the builtin classifier when raw; resolveTypeNameStr takes a skip_builtin arg threaded from te/id.is_raw (compound inner names pass false). A backtick reserved-name annotation now resolves to the user type in the editor index, not the builtin. Tests: examples/0156 (struct-body const), 0157 (parameterized raw type + wrappers), 1142 (bare struct-body const errors, caret on name); src/sema.test.zig pins the LSP raw-type resolution (fail-before verified). Gate: 365 unit tests, 429 examples, 0 failed. --- examples/0156-types-backtick-struct-const.sx | 21 +++++ ...7-types-backtick-parameterized-raw-type.sx | 30 +++++++ ...-diagnostics-reserved-name-struct-const.sx | 20 +++++ .../0156-types-backtick-struct-const.exit | 1 + .../0156-types-backtick-struct-const.stderr | 1 + .../0156-types-backtick-struct-const.stdout | 2 + ...types-backtick-parameterized-raw-type.exit | 1 + ...pes-backtick-parameterized-raw-type.stderr | 1 + ...pes-backtick-parameterized-raw-type.stdout | 3 + ...iagnostics-reserved-name-struct-const.exit | 1 + ...gnostics-reserved-name-struct-const.stderr | 11 +++ ...gnostics-reserved-name-struct-const.stdout | 1 + issues/0089-backtick-raw-identifier.md | 38 ++++++-- readme.md | 11 +-- specs.md | 12 ++- src/ast.zig | 24 ++++-- src/ir/lower.zig | 6 +- src/parser.zig | 28 ++++-- src/root.zig | 1 + src/sema.test.zig | 86 +++++++++++++++++++ src/sema.zig | 48 +++++++---- src/types.zig | 6 ++ 22 files changed, 300 insertions(+), 53 deletions(-) create mode 100644 examples/0156-types-backtick-struct-const.sx create mode 100644 examples/0157-types-backtick-parameterized-raw-type.sx create mode 100644 examples/1142-diagnostics-reserved-name-struct-const.sx create mode 100644 examples/expected/0156-types-backtick-struct-const.exit create mode 100644 examples/expected/0156-types-backtick-struct-const.stderr create mode 100644 examples/expected/0156-types-backtick-struct-const.stdout create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.exit create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.stderr create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.stdout create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.exit create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.stderr create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.stdout create mode 100644 src/sema.test.zig diff --git a/examples/0156-types-backtick-struct-const.sx b/examples/0156-types-backtick-struct-const.sx new file mode 100644 index 0000000..53466da --- /dev/null +++ b/examples/0156-types-backtick-struct-const.sx @@ -0,0 +1,21 @@ +// Backtick raw-identifier escape at a STRUCT-BODY constant — both the untyped +// `` `name :: value `` and the typed `` `name : T : value `` forms. A struct +// member constant is a binding site like any top-level const (examples/0153), +// so a reserved type spelling (`s2`, `u8`) needs the backtick to be used as the +// constant's name; the value is read back via `Holder.`name`. A *bare* +// reserved-name struct const still errors with the caret on the name (see +// examples/1142). The backtick is never part of the name's text. +// Regression (issue 0089 — attempt-5: struct-body const decls thread is_raw + +// the precise name_span, previously dropped to a false reject / 1:1 caret). +#import "modules/std.sx"; + +Holder :: struct { + `s2 :: 5; // untyped raw struct-body const + `u8 : s64 : 9; // typed raw struct-body const +} + +main :: () -> s32 { + print("untyped = {}\n", Holder.`s2); + print("typed = {}\n", Holder.`u8); + return 0; +} diff --git a/examples/0157-types-backtick-parameterized-raw-type.sx b/examples/0157-types-backtick-parameterized-raw-type.sx new file mode 100644 index 0000000..420dce1 --- /dev/null +++ b/examples/0157-types-backtick-parameterized-raw-type.sx @@ -0,0 +1,30 @@ +// Backtick raw identifier in PARAMETERIZED type position. A raw type reference +// (`` `s2 ``) flows through the SAME type-expression continuations as a bare +// name, so a reserved-spelled GENERIC template can be instantiated +// (`` `s2(s64) ``) and the result composes under pointer/field wrappers +// (`` *`s2(s64) ``, a struct field typed `` `s2(s64) ``). A bare `s2` in type +// position is still the 2-bit signed int. Complements examples/0154 (nullary +// raw type references). +// Regression (issue 0089 — attempt-5: the raw type atom no longer parses as a +// terminal `type_expr`; it reaches the parameterized + wrapper continuations). +#import "modules/std.sx"; + +`s2 :: struct($T: Type) { + x: $T; +} + +Wrapper :: struct { + inner: `s2(s64); // raw parameterized type as a struct field +} + +main :: () -> s32 { + v : `s2(s64); + v.x = 7; + p : *`s2(s64) = @v; // pointer to a raw parameterized type + w : Wrapper = ---; + w.inner.x = 12; + print("val = {}\n", v.x); + print("ptr = {}\n", p.x); + print("fld = {}\n", w.inner.x); + return 0; +} diff --git a/examples/1142-diagnostics-reserved-name-struct-const.sx b/examples/1142-diagnostics-reserved-name-struct-const.sx new file mode 100644 index 0000000..b07c455 --- /dev/null +++ b/examples/1142-diagnostics-reserved-name-struct-const.sx @@ -0,0 +1,20 @@ +// A bare reserved/builtin type-name spelling is rejected as the NAME of a +// STRUCT-BODY constant too — both the untyped (`s2 :: 5`) and the typed +// (`u8 : s64 : 9`) forms — exactly like a top-level const (examples/1140) or a +// type decl (examples/1141). A struct member constant is a binding site, so a +// bare reserved spelling mis-classifies and is rejected; the caret lands ON the +// constant's name (not at 1:1). The backtick escape (examples/0156) is the only +// way to spell these names in handwritten sx. +// +// Regression (issue 0089 — attempt-5: 0076 holds for struct-body consts, with +// the caret on the name). Expected: one error per const, caret on the name; exit 1. +#import "modules/std.sx"; + +Holder :: struct { + s2 :: 5; + u8 : s64 : 9; +} + +main :: () -> s32 { + return 0; +} diff --git a/examples/expected/0156-types-backtick-struct-const.exit b/examples/expected/0156-types-backtick-struct-const.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0156-types-backtick-struct-const.stderr b/examples/expected/0156-types-backtick-struct-const.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0156-types-backtick-struct-const.stdout b/examples/expected/0156-types-backtick-struct-const.stdout new file mode 100644 index 0000000..bdeeab2 --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.stdout @@ -0,0 +1,2 @@ +untyped = 5 +typed = 9 diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.exit b/examples/expected/0157-types-backtick-parameterized-raw-type.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.stderr b/examples/expected/0157-types-backtick-parameterized-raw-type.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.stdout b/examples/expected/0157-types-backtick-parameterized-raw-type.stdout new file mode 100644 index 0000000..ac9fd97 --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.stdout @@ -0,0 +1,3 @@ +val = 7 +ptr = 7 +fld = 12 diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.exit b/examples/expected/1142-diagnostics-reserved-name-struct-const.exit new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr b/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr new file mode 100644 index 0000000..81ea977 --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr @@ -0,0 +1,11 @@ +error: 's2' is a reserved type name and cannot be used as an identifier + --> examples/1142-diagnostics-reserved-name-struct-const.sx:14:5 + | +14 | s2 :: 5; + | ^^ + +error: 'u8' is a reserved type name and cannot be used as an identifier + --> examples/1142-diagnostics-reserved-name-struct-const.sx:15:5 + | +15 | u8 : s64 : 9; + | ^^ diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout b/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout @@ -0,0 +1 @@ + diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index 8c0df3d..49c1fd1 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -21,21 +21,35 @@ > - **Value position.** The parser skips `Type.fromName` for a raw identifier > in expression position ([src/parser.zig] `parsePrimary`), so `` `s2 `` is a > value identifier; a later bare reference resolves to the binding. -> - **Type position.** `parseTypeExpr` emits a raw `type_expr` (no qualified / -> `Closure` / parameterized continuation). Resolution skips the builtin -> classifier (`TypeResolver.resolveNamed`'s `skip_builtin`, threaded from -> `te.is_raw` in [src/ir/lower.zig] and [src/ir/type_bridge.zig]) and looks up -> a `` `s2 ``-declared type (struct / enum / union / alias), else a NORMAL +> - **Type position.** `parseTypeExpr` sets the raw flag on the type ATOM and +> lets it flow through the SAME continuations as a bare name (attempt 5), so a +> raw reference parameterizes a reserved-spelled template (`` `s2(s64) ``) and +> composes under the pointer / optional / slice wrappers; `ParameterizedTypeExpr` +> carries `is_raw` and `resolveParameterizedWithBindings` skips the `Vector` +> intrinsic when raw. Resolution skips the builtin classifier +> (`TypeResolver.resolveNamed`'s `skip_builtin`, threaded from `te.is_raw` in +> [src/ir/lower.zig] and [src/ir/type_bridge.zig]) and looks up a +> `` `s2 ``-declared type (struct / enum / union / alias), else a NORMAL > "unknown type 's2'" error (`UnknownTypeChecker.reportIfUnknownType` skips the > builtin-name exemption when raw). A bare `s2` in type position is still the -> builtin int. +> builtin int. The SECOND (editor/LSP) classifier in [src/sema.zig] +> (`Type.fromTypeExpr` / `resolveTypeNode` / `resolveTypeNameStr`) honors +> `is_raw` too, so a backtick reserved-name annotation resolves to the user type +> in hover/completion, not the builtin (no two-resolver divergence). > - **Declaration position.** A bare reserved-name declaration of EVERY kind > still errors (issue 0076 preserved); the backtick form is exempt. The check > and the exemption are made structurally symmetric: > `checkBindingName` / `checkDeclName` ([src/ir/semantic_diagnostics.zig]) take > `is_raw` as a REQUIRED argument and skip inside the check — no call site can > validate a name without also honoring the exemption, which is what kept the -> two from desyncing across the earlier attempts. +> two from desyncing across the earlier attempts. On the PARSER side the +> symmetry is enforced structurally for the bug-prone node: `ConstDecl`'s +> `name_span` + `is_raw` carry NO default (attempt 5), so the compiler rejects +> any construction site — including the two struct-body const forms (untyped +> `` `s2 :: 5 `` and typed `` `s2 : T : v ``) that previously dropped both — +> that omits them. `FnDecl` is built at every parser site through `parseFnDecl`, +> whose `name_is_raw` is a REQUIRED parameter (the equivalent guarantee); the +> type decls likewise route through parse-functions taking `name_is_raw`. > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign > `#foreign` decls with `Param.is_raw = true` (and the synthesized `FnDecl` > `is_raw = true`), so generated C names that collide with reserved type names @@ -55,14 +69,20 @@ > `examples/0154-types-backtick-raw-type-reference.sx` (raw in TYPE position — > struct / enum / union / alias decl + reference; bare `s2` still the int), > `examples/0155-types-backtick-typed-const-union-tag.sx` (typed const + union tag), +> `examples/0156-types-backtick-struct-const.sx` (struct-body const, untyped + typed), +> `examples/0157-types-backtick-parameterized-raw-type.sx` (raw parameterized type + +> pointer/field wrappers), > `examples/1054-errors-backtick-reserved-binding.sx` (`catch`/`onfail` tag > bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign > param + fn-name exemption, bare-callable foreign fn); negatives > `examples/1119`/`1121`/`1123` (bare reserved binding across forms), > `examples/1140-diagnostics-reserved-name-const-fn-decl.sx` (bare const + fn decl), > `examples/1141-diagnostics-reserved-name-type-decl.sx` (bare struct / enum / union -> / error / typed-const decl). Backtick lexer + `resolveNamed(skip_builtin)` unit -> tests in `src/lexer.zig` / `src/ir/type_resolver.test.zig`. +> / error / typed-const decl), +> `examples/1142-diagnostics-reserved-name-struct-const.sx` (bare struct-body const, +> caret on the name). Backtick lexer + `resolveNamed(skip_builtin)` unit tests in +> `src/lexer.zig` / `src/ir/type_resolver.test.zig`; the editor/LSP raw-type +> resolution (the second classifier) is pinned in `src/sema.test.zig`. > > The original report is preserved below. diff --git a/readme.md b/readme.md index e4f6fc0..4ad2119 100644 --- a/readme.md +++ b/readme.md @@ -125,11 +125,12 @@ x : s2 = 3; // bare `s2` in type position is still the int type ``` It works in every identifier position — local, global, parameter, struct field, -union tag, function name, type/alias/import name, constant, and the control-flow / -capture / binding forms (destructure, `if`/`while` binding, `for` capture, match -capture, `catch`/`onfail` tag) — and a reserved-spelled function is bare-callable -(`s2(10)`). A backtick name used as a type resolves to a `` `name ``-declared type, -else a normal `unknown type` error. +union tag, function name, type/alias/import name, a top-level or struct-body +constant, and the control-flow / capture / binding forms (destructure, `if`/`while` +binding, `for` capture, match capture, `catch`/`onfail` tag) — and a reserved-spelled +function is bare-callable (`s2(10)`). A backtick name used as a type resolves to a +`` `name ``-declared type — including a parameterized template (`` `s2(s64) ``) and +under pointer/optional wrappers — else a normal `unknown type` error. Foreign declarations from `#import c { … }` are exempt automatically: C names that collide with reserved type names (e.g. `s1`, `s2`) import unedited, and a foreign diff --git a/specs.md b/specs.md index 56ca1d2..2fe7450 100644 --- a/specs.md +++ b/specs.md @@ -55,13 +55,16 @@ x : s2 = 3; // bare `s2` in TYPE position is still the s2 int type reference: it resolves to a `` `s2 ``-declared type (struct / enum / union / type alias / …), and never the builtin. A bare `s2` in type position stays the builtin int; a backtick name with no matching declaration is a normal `unknown type 's2'` -error. +error. A raw type reference flows through the **same continuations** as a bare type +name, so it parameterizes a reserved-spelled generic template (`` `s2(s64) ``) and +composes under the pointer / optional / slice wrappers (`` *`s2 ``, `` ?`s2 ``). ```sx -`s2 :: struct { x: s64; } // declare a type whose name is a reserved spelling -v : `s2 = ---; // reference it as a type — resolves to the struct +`s2 :: struct($T: Type) { x: $T; } // generic template with a reserved-spelled name +v : `s2(s64) = ---; // parameterized raw type reference v.x = 7; -x : s2 = 3; // bare `s2` is still the 2-bit signed int +p : *`s2(s64) = @v; // wrappers compose over a raw type +x : s2 = 3; // bare `s2` is still the 2-bit signed int ``` **Declaration position.** A *bare* reserved-name declaration of every kind still @@ -79,6 +82,7 @@ reference, and every control-flow / capture / binding form (destructure name, `s2 : s64 : 5; // typed constant declaration `u8 :: (`s1: s64) -> s64 { `s1 } // function name + parameter P :: struct { `s2: f64; } // struct field +H :: struct { `s2 :: 5; } // struct-body constant (untyped + `: T :` typed) M :: union { `s1: s32; } // union tag `u16 :: enum { A; B; } // type-declaration name `u8, rest := pair(); // destructure name diff --git a/src/ast.zig b/src/ast.zig index f3c3541..cd2948c 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -133,11 +133,14 @@ pub const FnDecl = struct { call_conv: CallingConvention = .default, /// Span of the function's name token, for the reserved-type-name decl /// diagnostic (issue 0089). Synthesized decls (e.g. `#import c` foreign - /// functions) leave it zero. + /// functions, lowering-time objc/protocol method synthesis) leave it zero. name_span: Span = .{ .start = 0, .end = 0 }, /// True when the function NAME was written as a backtick raw identifier /// (`` `s2 :: … ``) or synthesized by a `#import c` foreign decl. A raw /// name is exempt from the reserved-type-name binding check (issue 0089). + /// Every PARSER fn_decl is built through `parseFnDecl`, whose `name_is_raw` + /// is a REQUIRED parameter, so a parser site cannot drop it; the default + /// here serves only post-check synthesized decls (which are never raw). is_raw: bool = false, }; @@ -316,12 +319,15 @@ pub const ConstDecl = struct { type_annotation: ?*Node, value: *Node, /// Span of the constant's name token, for the reserved-type-name decl - /// diagnostic (issue 0089). - name_span: Span = .{ .start = 0, .end = 0 }, + /// diagnostic (issue 0089). NO default: every construction site must set + /// it explicitly, so a struct-body const can't silently fall back to a + /// 1:1 caret (the finding-1 bug). + name_span: Span, /// True when the constant NAME was written as a backtick raw identifier - /// (`` `s2 :: … ``). A raw name is exempt from the reserved-type-name - /// binding check (issue 0089). - is_raw: bool = false, + /// (`` `s2 :: … ``). NO default: required at every site so the reserved- + /// name exemption can't be dropped — mirrors `checkBindingName`'s required + /// `is_raw` argument so the parser and the check can't desync (issue 0089). + is_raw: bool, }; pub const VarDecl = struct { @@ -573,6 +579,12 @@ pub const ArrayLiteral = struct { pub const ParameterizedTypeExpr = struct { name: []const u8, // e.g. "Vector", or later generic struct names args: []const *Node, // e.g. [int_literal(3), type_expr("f32")] + /// True when the base name was a backtick raw identifier in type position + /// (`` `s2(s64) ``). Such a reference is the LITERAL name `s2` used as a + /// parameterized type — resolution skips the builtin parameterized + /// classifier (e.g. the `Vector` intrinsic) and instantiates a + /// `` `s2 ``-declared generic template (issue 0089). + is_raw: bool = false, }; pub const IndexExpr = struct { diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 79bc7d3..2b6a8db 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -12057,8 +12057,10 @@ pub const Lowering = struct { const base_name = if (std.mem.lastIndexOfScalar(u8, pt.name, '.')) |dot| pt.name[dot + 1 ..] else pt.name; const table = &self.module.types; - // Vector(N, T) — built-in parameterized type - if (std.mem.eql(u8, base_name, "Vector")) { + // Vector(N, T) — built-in parameterized type. A backtick raw base + // (`` `Vector(…) ``) is the LITERAL user type named `Vector`, so it + // skips this intrinsic and resolves through the template map (0089). + if (!pt.is_raw and std.mem.eql(u8, base_name, "Vector")) { if (pt.args.len == 2) { const length = self.resolveVectorLane(pt.args[0]) orelse return .unresolved; const elem = self.resolveTypeWithBindings(pt.args[1]); diff --git a/src/parser.zig b/src/parser.zig index 8ec384e..2450f49 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -632,15 +632,13 @@ pub const Parser = struct { if (self.current.tag.isTypeKeyword() or self.isIdentLike()) { // A backtick raw identifier (`` `s2 ``) in type position is the // LITERAL name `s2` used as a type reference — never the builtin / - // reserved keyword. It is always a plain named-type reference (no - // qualified-path, `Closure`, or parameterized continuation), so emit - // a raw `type_expr` and return; resolution skips the builtin + // reserved keyword. The raw flag rides the type ATOM through the + // SAME qualified-path / `Closure` / parameterized continuations as a + // bare name (so `` `s2(s64) ``, `` `s2.Inner ``, `` *`s2 `` all + // parse); it is threaded onto the final `type_expr` / + // `parameterized_type_expr` so resolution skips the builtin // classifier and looks up a `` `s2 ``-declared type (issue 0089). - if (self.current.is_raw) { - const raw_name = self.tokenSlice(self.current); - self.advance(); - return try self.createNode(start, .{ .type_expr = .{ .name = raw_name, .is_raw = true } }); - } + const atom_is_raw = self.current.is_raw; var name = self.tokenSlice(self.current); self.advance(); @@ -781,6 +779,7 @@ pub const Parser = struct { return try self.createNode(start, .{ .parameterized_type_expr = .{ .name = name, .args = try args.toOwnedSlice(self.allocator), + .is_raw = atom_is_raw, } }); } @@ -789,7 +788,7 @@ pub const Parser = struct { for (self.struct_type_params) |tp| { if (std.mem.eql(u8, tp, name)) { is_struct_generic = true; break; } } - return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic } }); + return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic, .is_raw = atom_is_raw } }); } // Inline struct type in type position: struct { ... } if (self.current.tag == .kw_struct) { @@ -1067,6 +1066,8 @@ pub const Parser = struct { .name = method_name, .type_annotation = null, .value = value, + .name_span = method_name_span, + .is_raw = method_is_raw, } })); } continue; @@ -1080,6 +1081,13 @@ pub const Parser = struct { return self.fail("expected field name in struct"); } const field_start = self.current.loc.start; + // Captured for the single-name typed-const path (`name :Type: value`) + // below: a struct-body const binds a name like any other decl, so + // its name_span + raw flag must travel to the `const_decl` node + // (finding 1 — they were being dropped to a 1:1 caret / false + // reserved-name reject). + const field_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const field_is_raw = self.current.is_raw; try group_names.append(self.allocator, self.tokenSlice(self.current)); self.advance(); @@ -1104,6 +1112,8 @@ pub const Parser = struct { .name = group_names.items[0], .type_annotation = field_type, .value = value, + .name_span = field_name_span, + .is_raw = field_is_raw, } })); continue; } diff --git a/src/root.zig b/src/root.zig index 7b9bf13..230c390 100644 --- a/src/root.zig +++ b/src/root.zig @@ -11,6 +11,7 @@ pub const errors = @import("errors.zig"); pub const errors_tests = @import("errors.test.zig"); pub const trace_runtime_tests = @import("runtime_trace.test.zig"); pub const sema = @import("sema.zig"); +pub const sema_tests = @import("sema.test.zig"); pub const imports = @import("imports.zig"); pub const core = @import("core.zig"); pub const c_import = @import("c_import.zig"); diff --git a/src/sema.test.zig b/src/sema.test.zig new file mode 100644 index 0000000..09dd22f --- /dev/null +++ b/src/sema.test.zig @@ -0,0 +1,86 @@ +// Tests for sema.zig — the editor/LSP type classifier (the SECOND resolver, +// distinct from the codegen-side `ir/type_resolver.zig`). These pin behavior +// the example suite can't reach: the example runner exercises the codegen +// path (`sx run`), never sema's hover/completion/index resolution. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Node = ast.Node; +const Parser = @import("parser.zig").Parser; +const sema = @import("sema.zig"); +const types = @import("types.zig"); +const Type = types.Type; + +// issue 0089 — the backtick raw escape must hold in BOTH classifiers. A raw +// reserved-name type reference (`` `s2 ``) resolves to the user-declared type, +// while a BARE `s2` stays the builtin int. Before the fix sema's +// `resolveTypeNode` ran `Type.fromName` first and ignored `is_raw`, so the +// editor index would show the builtin for backtick code (the issue-0083 +// two-resolver divergence applied to raw types). +test "sema: backtick raw type reference resolves to the user type; bare stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct { x: s64; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + + var analyzer = sema.Analyzer.init(alloc); + _ = try analyzer.analyze(root); + + // The reserved-spelled user type registered under its plain name. + try std.testing.expect(analyzer.struct_types.contains("s2")); + + // RAW reference (`` `s2 ``) → the user struct, NOT the 2-bit signed int. + var raw_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = true } } }; + const raw_ty = analyzer.resolveTypeNode(&raw_node); + try std.testing.expect(raw_ty == .struct_type); + try std.testing.expectEqualStrings("s2", raw_ty.struct_type); + + // BARE `s2` → the builtin 2-bit signed int. + var bare_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = false } } }; + const bare_ty = analyzer.resolveTypeNode(&bare_node); + try std.testing.expect(bare_ty == .signed); + try std.testing.expectEqual(@as(u8, 2), bare_ty.signed); +} + +// The same divergence guard for the string-keyed entry (`resolveTypeNameStr`, +// reached via `fieldType` when registering struct field types): a raw field +// annotation (`` `u8 ``) resolves to the user struct, a bare one (`u8`) to the +// builtin. Driven through the real analyze pipeline (no private access). +test "sema: a raw struct-field annotation resolves to the user type; bare stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`u8 :: struct { y: s64; } + \\Holder :: struct { a: `u8; b: u8; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + + var analyzer = sema.Analyzer.init(alloc); + _ = try analyzer.analyze(root); + + const holder = analyzer.struct_types.get("Holder").?; + var a_ty: ?Type = null; + var b_ty: ?Type = null; + for (holder.field_names, holder.field_types) |fname, fty| { + if (std.mem.eql(u8, fname, "a")) a_ty = fty; + if (std.mem.eql(u8, fname, "b")) b_ty = fty; + } + + // field `a : `u8` → the user struct named "u8". + try std.testing.expect(a_ty.? == .struct_type); + try std.testing.expectEqualStrings("u8", a_ty.?.struct_type); + + // field `b : u8` → the builtin unsigned 8-bit int. + try std.testing.expect(b_ty.? == .unsigned); + try std.testing.expectEqual(@as(u8, 8), b_ty.?.unsigned); +} diff --git a/src/sema.zig b/src/sema.zig index ef24a7a..ffa8964 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -411,10 +411,15 @@ pub const Analyzer = struct { if (tn.data == .parameterized_type_expr) { return .void_type; } - // type_expr or identifier — check aliases, enums, structs + // type_expr or identifier — check aliases, enums, structs. A raw + // reference (`` `s2 ``) skips the builtin classifier and resolves + // through user-defined types only (issue 0089). if (tn.data == .type_expr or tn.data == .identifier) { const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name; - if (Type.fromName(name)) |t| return t; + const is_raw = if (tn.data == .type_expr) tn.data.type_expr.is_raw else tn.data.identifier.is_raw; + if (!is_raw) { + if (Type.fromName(name)) |t| return t; + } if (self.type_aliases.get(name)) |target| { if (Type.fromName(target)) |t| return t; if (self.struct_types.contains(target)) return .{ .struct_type = target }; @@ -430,9 +435,16 @@ pub const Analyzer = struct { /// Resolve a bare type-name string against the registry (aliases, enums, /// structs), falling back to primitive spellings. Unlike `Type.fromName`, /// this knows user-defined types; returns `unresolved` when it can't place - /// the name. - fn resolveTypeNameStr(self: *Analyzer, name: []const u8) Type { - if (Type.fromName(name)) |t| return t; + /// the name. `skip_builtin` is the backtick raw escape (issue 0089) — a raw + /// reference (`` `s2 ``) bypasses the builtin/reserved classifier and + /// resolves only through user-defined types, mirroring the codegen-side + /// `TypeResolver.resolveNamed`. Inner names of compound shapes + /// (pointer/slice element/pointee) are always bare, so their callers pass + /// `false`. + fn resolveTypeNameStr(self: *Analyzer, name: []const u8, skip_builtin: bool) Type { + if (!skip_builtin) { + if (Type.fromName(name)) |t| return t; + } if (self.type_aliases.get(name)) |target| { if (Type.fromName(target)) |t| return t; if (self.struct_types.contains(target)) return .{ .struct_type = target }; @@ -460,8 +472,8 @@ pub const Analyzer = struct { /// registry; the element name is resolved lazily at index/field time. fn fieldType(self: *Analyzer, node: *Node) Type { return switch (node.data) { - .type_expr => |te| self.resolveTypeNameStr(te.name), - .identifier => |id| self.resolveTypeNameStr(id.name), + .type_expr => |te| self.resolveTypeNameStr(te.name, te.is_raw), + .identifier => |id| self.resolveTypeNameStr(id.name, id.is_raw), .many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type) } }, .pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type) } }, .slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type) } }, @@ -476,15 +488,15 @@ pub const Analyzer = struct { /// pointee first (so `*List(Move)` still iterates `Move`). fn elementTypeOf(self: *Analyzer, ty: Type) ?Type { return switch (ty) { - .array_type => |i| self.resolveTypeNameStr(i.element_name), - .slice_type => |i| self.resolveTypeNameStr(i.element_name), - .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name), - .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name)), + .array_type => |i| self.resolveTypeNameStr(i.element_name, false), + .slice_type => |i| self.resolveTypeNameStr(i.element_name, false), + .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, false), + .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, false)), .struct_type => |name| blk: { const info = self.struct_types.get(name) orelse break :blk null; for (info.field_names, info.field_types) |fname, fty| { if (std.mem.eql(u8, fname, "items") and fty == .many_pointer_type) { - break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name); + break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, false); } } break :blk null; @@ -642,7 +654,7 @@ pub const Analyzer = struct { var obj_ty = self.inferExprType(fa.object); // `p.field` where `p` is `*T` resolves on the pointee `T`. if (obj_ty.isPointer()) { - obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name); + obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, false); } // `.len` / `.ptr` on the built-in containers (string, slice, array). if (std.mem.eql(u8, fa.field, "len")) { @@ -670,9 +682,9 @@ pub const Analyzer = struct { .index_expr => |ie| { const obj_ty = self.inferExprType(ie.object); if (obj_ty == .string_type) return Type.u(8); - if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name); - if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name); - if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name); + if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, false); + if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, false); + if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, false); return Type.unresolved; }, .slice_expr => |se| { @@ -1054,7 +1066,7 @@ pub const Analyzer = struct { .field_access => |fa| { try self.analyzeNode(fa.object); var owner_ty = self.inferExprType(fa.object); - if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name); + if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, false); self.recordMemberRef(fa.field, owner_ty.toName() orelse "", false); }, .enum_literal => |el| { @@ -1083,7 +1095,7 @@ pub const Analyzer = struct { .match_expr => |me| { try self.analyzeNode(me.subject); var subj_ty = self.inferExprType(me.subject); - if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name); + if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, false); const subj_owner = subj_ty.toName() orelse ""; for (me.arms) |arm| { if (arm.pattern) |pat| { diff --git a/src/types.zig b/src/types.zig index 3c2d1f7..168760a 100644 --- a/src/types.zig +++ b/src/types.zig @@ -198,6 +198,12 @@ pub const Type = union(enum) { pub fn fromTypeExpr(node: *Node) ?Type { if (node.data != .type_expr) return null; + // A backtick raw type reference (`` `s2 ``) is the LITERAL name used as + // a type — it must skip this builtin/reserved classifier and resolve + // through user-defined types only (issue 0089), mirroring the codegen- + // side `resolveNamed`'s `skip_builtin`. Returning null lets the sema + // callers fall through to their struct/enum/alias registry lookup. + if (node.data.type_expr.is_raw) return null; return fromName(node.data.type_expr.name); } From 724a919fc1f2d4f3214da186733ac67d930ebce5 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 21:46:31 +0300 Subject: [PATCH 07/11] =?UTF-8?q?feat(lang):=20raw=20provenance=20through?= =?UTF-8?q?=20ALL=20sema=20compound=20type=20metadata=20=E2=80=94=20finish?= =?UTF-8?q?=20universal=20raw=20identifier=20in=20the=20LSP=20classifier?= =?UTF-8?q?=20[F0.6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codegen-side resolver was already raw-aware for the universal model; the sema/LSP editor index (the second classifier) only honored the DIRECT raw type. A COMPOUND raw type (`*`s2`, `?`s2`, `[N]`s2`, `[]`s2`, `[*]`s2`) stores its inner type-name as a bare string on the Type info struct, and every resolution site re-read it with skip_builtin=false — so the index reclassified a user type named `s2` as the builtin int, diverging from codegen (issue-0083 class, LSP surface only; codegen unchanged). Structural cure: every compound info struct (Pointer/Optional/Slice/ ManyPointer/Array) carries a REQUIRED is_raw bit (no default — a future construction site cannot drop it). is_raw is set at every construction site (resolveTypeNode arms, fieldType arms, variadic slice, .ptr/slice_expr derivation, for-loop by-ref, substType) and passed as skip_builtin at every resolution site (elementTypeOf, field-access pointer unwrap, index, deref, optional unwrap/null-coalesce, if/while optional binding, match subject). Optional-unwrap + deref sites converted from Type.fromName/pointerPointeeType (builtin-only, divergent) to resolveTypeNameStr(name, is_raw); the now-dead pointerPointeeType removed. Tests: src/sema.test.zig gains pointer/optional/array raw-vs-bare regressions (raw → user type, bare → builtin control) — each FAILS on pre-fix sema, PASSES after — plus a parameterized-raw coverage test. --- issues/0089-backtick-raw-identifier.md | 15 ++- src/sema.test.zig | 129 +++++++++++++++++++++++++ src/sema.zig | 102 ++++++++++++------- src/types.zig | 25 +++-- 4 files changed, 223 insertions(+), 48 deletions(-) diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index 49c1fd1..77efe16 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -35,7 +35,15 @@ > builtin int. The SECOND (editor/LSP) classifier in [src/sema.zig] > (`Type.fromTypeExpr` / `resolveTypeNode` / `resolveTypeNameStr`) honors > `is_raw` too, so a backtick reserved-name annotation resolves to the user type -> in hover/completion, not the builtin (no two-resolver divergence). +> in hover/completion, not the builtin (no two-resolver divergence). The raw bit +> is carried STRUCTURALLY through every COMPOUND shape's inner-name metadata — +> `PointerTypeInfo` / `OptionalTypeInfo` / `SliceTypeInfo` / `ManyPointerTypeInfo` +> / `ArrayTypeInfo` each store a REQUIRED `is_raw` ([src/types.zig], no default, +> so a future construction site cannot drop it) that every `resolveTypeNameStr` +> call passes as its `skip_builtin` — so `` *`s2 ``, `` ?`s2 ``, `` [N]`s2 ``, +> `` []`s2 ``, `` [*]`s2 `` field-access / unwrap / index / deref in the editor +> index all reach the user type instead of reclassifying the inner `s2` to the +> builtin (the divergence the DIRECT-only attempt left for compound forms). > - **Declaration position.** A bare reserved-name declaration of EVERY kind > still errors (issue 0076 preserved); the backtick form is exempt. The check > and the exemption are made structurally symmetric: @@ -82,7 +90,10 @@ > `examples/1142-diagnostics-reserved-name-struct-const.sx` (bare struct-body const, > caret on the name). Backtick lexer + `resolveNamed(skip_builtin)` unit tests in > `src/lexer.zig` / `src/ir/type_resolver.test.zig`; the editor/LSP raw-type -> resolution (the second classifier) is pinned in `src/sema.test.zig`. +> resolution (the second classifier) is pinned in `src/sema.test.zig` — the direct +> case plus raw provenance through every compound shape (`` *`s2 `` field access, +> `` ?`s2 `` unwrap, `` [N]`s2 `` index, parameterized `` `s2(s64) ``), each with a +> bare-spelling control that stays the builtin (fail-before verified). > > The original report is preserved below. diff --git a/src/sema.test.zig b/src/sema.test.zig index 09dd22f..150bac5 100644 --- a/src/sema.test.zig +++ b/src/sema.test.zig @@ -84,3 +84,132 @@ test "sema: a raw struct-field annotation resolves to the user type; bare stays try std.testing.expect(b_ty.? == .unsigned); try std.testing.expectEqual(@as(u8, 8), b_ty.?.unsigned); } + +// ── issue 0089: raw provenance through sema's COMPOUND type metadata ──────── +// +// The direct-case fix (above) only covered a bare `` `s2 `` reference. A +// COMPOUND raw type (`*`s2`, `?`s2`, `[N]`s2`, …) stores its inner name as a +// bare string on the Type's info struct; the resolver re-reads that name via +// `resolveTypeNameStr`. Before threading `is_raw` ALONGSIDE the stored name, +// the resolver passed `skip_builtin = false`, so the LSP index reclassified a +// user type named `s2` as the builtin int — diverging from codegen. These +// pin every compound form: the raw inner resolves to the user type (FAILS on +// pre-fix sema), the bare inner stays the builtin (control, preserved). + +fn symType(res: sema.SemaResult, name: []const u8) ?Type { + for (res.symbols) |sym| { + if (std.mem.eql(u8, sym.name, name)) return sym.ty; + } + return null; +} + +test "sema: field access through a raw `*`s2` pointer resolves the user field; bare `*s2` stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct { x: s64; } + \\f :: (p: *`s2) { y := p.x; } + \\g :: (q: *s2) { w := q.*; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + var analyzer = sema.Analyzer.init(alloc); + const res = try analyzer.analyze(root); + + // RAW: `p: *`s2` → field `x` on the user struct → s64. (Pre-fix: the + // pointee `s2` reclassified to the 2-bit int, `.x` not found → unresolved.) + const y = symType(res, "y") orelse return error.MissingSymbol; + try std.testing.expect(y == .signed); + try std.testing.expectEqual(@as(u8, 64), y.signed); + + // CONTROL: `q: *s2` (bare) → deref yields the builtin 2-bit signed int. + const w = symType(res, "w") orelse return error.MissingSymbol; + try std.testing.expect(w == .signed); + try std.testing.expectEqual(@as(u8, 2), w.signed); +} + +test "sema: unwrapping a raw `?`s2` optional resolves the user field; bare `?s2` stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct { x: s64; } + \\f :: (o: ?`s2) { if val := o { y := val.x; } } + \\g :: (b: ?s2) { if v := b { w := v; } } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + var analyzer = sema.Analyzer.init(alloc); + const res = try analyzer.analyze(root); + + // RAW: `o: ?`s2` → `if val := o` unwraps to the user struct → `val.x` is s64. + // (Pre-fix: the optional child `s2` reclassified to the 2-bit int.) + const y = symType(res, "y") orelse return error.MissingSymbol; + try std.testing.expect(y == .signed); + try std.testing.expectEqual(@as(u8, 64), y.signed); + + // CONTROL: `b: ?s2` (bare) unwraps to the builtin 2-bit signed int. + const w = symType(res, "w") orelse return error.MissingSymbol; + try std.testing.expect(w == .signed); + try std.testing.expectEqual(@as(u8, 2), w.signed); +} + +test "sema: indexing a raw `[N]`s2` array resolves the user element; bare `[N]s2` stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct { x: s64; } + \\f :: (a: [4]`s2, b: [4]s2) { y := a[0]; w := b[0]; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + var analyzer = sema.Analyzer.init(alloc); + const res = try analyzer.analyze(root); + + // RAW: `a: [4]`s2` → element is the user struct. (Pre-fix: reclassified to + // the 2-bit int.) + const y = symType(res, "y") orelse return error.MissingSymbol; + try std.testing.expect(y == .struct_type); + try std.testing.expectEqualStrings("s2", y.struct_type); + + // CONTROL: `b: [4]s2` (bare) → element is the builtin 2-bit signed int. + const w = symType(res, "w") orelse return error.MissingSymbol; + try std.testing.expect(w == .signed); + try std.testing.expectEqual(@as(u8, 2), w.signed); +} + +// Parameterized raw type (`` `s2(s64) ``). Unlike the shapes above this never +// had the divergence — instantiation resolves the base name straight against +// `struct_types` (no builtin classifier in the path), so it passes before AND +// after. Included as coverage that the universal model holds for the +// parameterized form too: a `` `s2 ``-declared generic instantiates and its +// field resolves. +test "sema: a raw parameterized type `` `s2(s64) `` instantiates the user generic" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct ($T: Type) { items: [*]T = null; n: s64 = 0; } + \\f :: (v: `s2(s64)) { y := v.n; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + var analyzer = sema.Analyzer.init(alloc); + const res = try analyzer.analyze(root); + + // `v: `s2(s64)` instantiates the `` `s2 ``-declared generic; its concrete + // field `n` resolves to s64 (the raw base name was not misread as a builtin). + const y = symType(res, "y") orelse return error.MissingSymbol; + try std.testing.expect(y == .signed); + try std.testing.expectEqual(@as(u8, 64), y.signed); +} diff --git a/src/sema.zig b/src/sema.zig index ffa8964..9e2f7be 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -193,7 +193,12 @@ pub const Analyzer = struct { .slice_type_expr => |st| if (st.element_type.data == .type_expr) st.element_type.data.type_expr.name else "", else => "", }; - try param_types.append(self.allocator, .{ .slice_type = .{ .element_name = elem_name } }); + const elem_raw = switch (param.type_expr.data) { + .type_expr => |te| te.is_raw, + .slice_type_expr => |st| typeExprIsRaw(st.element_type), + else => false, + }; + try param_types.append(self.allocator, .{ .slice_type = .{ .element_name = elem_name, .is_raw = elem_raw } }); } else { try param_types.append(self.allocator, pt); } @@ -362,35 +367,35 @@ pub const Analyzer = struct { const length: u32 = @intCast(ate.length.data.int_literal.value); const elem_type = self.resolveTypeNode(ate.element_type); const elem_name = elem_type.displayName(self.allocator) catch return .void_type; - return .{ .array_type = .{ .element_name = elem_name, .length = length } }; + return .{ .array_type = .{ .element_name = elem_name, .length = length, .is_raw = typeExprIsRaw(ate.element_type) } }; } // Slice type: []T if (tn.data == .slice_type_expr) { const ste = tn.data.slice_type_expr; const elem_type = self.resolveTypeNode(ste.element_type); const elem_name = elem_type.displayName(self.allocator) catch return .void_type; - return .{ .slice_type = .{ .element_name = elem_name } }; + return .{ .slice_type = .{ .element_name = elem_name, .is_raw = typeExprIsRaw(ste.element_type) } }; } // Optional type: ?T if (tn.data == .optional_type_expr) { const ote = tn.data.optional_type_expr; const inner_type = self.resolveTypeNode(ote.inner_type); const inner_name = inner_type.displayName(self.allocator) catch return .void_type; - return .{ .optional_type = .{ .child_name = inner_name } }; + return .{ .optional_type = .{ .child_name = inner_name, .is_raw = typeExprIsRaw(ote.inner_type) } }; } // Pointer type: *T if (tn.data == .pointer_type_expr) { const pte = tn.data.pointer_type_expr; const pointee_type = self.resolveTypeNode(pte.pointee_type); const pointee_name = pointee_type.displayName(self.allocator) catch return .void_type; - return .{ .pointer_type = .{ .pointee_name = pointee_name } }; + return .{ .pointer_type = .{ .pointee_name = pointee_name, .is_raw = typeExprIsRaw(pte.pointee_type) } }; } // Many-pointer type: [*]T if (tn.data == .many_pointer_type_expr) { const mpte = tn.data.many_pointer_type_expr; const elem_type = self.resolveTypeNode(mpte.element_type); const elem_name = elem_type.displayName(self.allocator) catch return .void_type; - return .{ .many_pointer_type = .{ .element_name = elem_name } }; + return .{ .many_pointer_type = .{ .element_name = elem_name, .is_raw = typeExprIsRaw(mpte.element_type) } }; } // Function pointer type: (ParamTypes) -> ReturnType if (tn.data == .function_type_expr) { @@ -466,6 +471,31 @@ pub const Analyzer = struct { }; } + /// The backtick raw bit of an inner type-name node (`` `s2 ``). A compound + /// shape (`*T`, `?T`, `[]T`, …) stores its inner name as a bare string, so + /// this bit must travel ALONGSIDE that name (issue 0089) — otherwise the + /// resolver re-reads `s2` as the builtin int. Non-leaf nodes are never raw. + fn typeExprIsRaw(node: *Node) bool { + return switch (node.data) { + .type_expr => |te| te.is_raw, + .identifier => |id| id.is_raw, + else => false, + }; + } + + /// When a compound shape stores the NAME of an ALREADY-resolved inner type + /// (no syntactic node to read `is_raw` from — e.g. a for-loop element), a + /// user nominal type must be re-resolved with `skip_builtin` so a struct/ + /// enum/union named `s2` is not reclassified as the builtin. Builtins keep + /// `false`. Harmless for non-colliding names (the registry lookup is the + /// same either way). + fn innerNameIsRaw(inner: Type) bool { + return switch (inner) { + .struct_type, .enum_type, .union_type => true, + else => false, + }; + } + /// Resolve a struct field's declared type, preserving the raw element/ /// pointee name of pointer/slice shapes so generic params (`T`) survive /// into `instantiateGeneric`'s substitution. Bare names resolve through the @@ -474,9 +504,9 @@ pub const Analyzer = struct { return switch (node.data) { .type_expr => |te| self.resolveTypeNameStr(te.name, te.is_raw), .identifier => |id| self.resolveTypeNameStr(id.name, id.is_raw), - .many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type) } }, - .pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type) } }, - .slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type) } }, + .many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type), .is_raw = typeExprIsRaw(mp.element_type) } }, + .pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type), .is_raw = typeExprIsRaw(p.pointee_type) } }, + .slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type), .is_raw = typeExprIsRaw(s.element_type) } }, .parameterized_type_expr => |pte| self.instantiateGeneric(pte.name, pte.args) orelse self.resolveTypeNode(node), else => self.resolveTypeNode(node), }; @@ -488,15 +518,15 @@ pub const Analyzer = struct { /// pointee first (so `*List(Move)` still iterates `Move`). fn elementTypeOf(self: *Analyzer, ty: Type) ?Type { return switch (ty) { - .array_type => |i| self.resolveTypeNameStr(i.element_name, false), - .slice_type => |i| self.resolveTypeNameStr(i.element_name, false), - .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, false), - .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, false)), + .array_type => |i| self.resolveTypeNameStr(i.element_name, i.is_raw), + .slice_type => |i| self.resolveTypeNameStr(i.element_name, i.is_raw), + .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, i.is_raw), + .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, i.is_raw)), .struct_type => |name| blk: { const info = self.struct_types.get(name) orelse break :blk null; for (info.field_names, info.field_types) |fname, fty| { if (std.mem.eql(u8, fname, "items") and fty == .many_pointer_type) { - break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, false); + break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, fty.many_pointer_type.is_raw); } } break :blk null; @@ -527,10 +557,10 @@ pub const Analyzer = struct { /// name-carrying shapes need rewriting; the rest pass through. fn substType(ty: Type, params: []const []const u8, args: []const []const u8) Type { return switch (ty) { - .many_pointer_type => |i| .{ .many_pointer_type = .{ .element_name = substName(i.element_name, params, args) } }, - .slice_type => |i| .{ .slice_type = .{ .element_name = substName(i.element_name, params, args) } }, - .array_type => |i| .{ .array_type = .{ .length = i.length, .element_name = substName(i.element_name, params, args) } }, - .pointer_type => |i| .{ .pointer_type = .{ .pointee_name = substName(i.pointee_name, params, args) } }, + .many_pointer_type => |i| .{ .many_pointer_type = .{ .element_name = substName(i.element_name, params, args), .is_raw = i.is_raw } }, + .slice_type => |i| .{ .slice_type = .{ .element_name = substName(i.element_name, params, args), .is_raw = i.is_raw } }, + .array_type => |i| .{ .array_type = .{ .length = i.length, .element_name = substName(i.element_name, params, args), .is_raw = i.is_raw } }, + .pointer_type => |i| .{ .pointer_type = .{ .pointee_name = substName(i.pointee_name, params, args), .is_raw = i.is_raw } }, .struct_type => |n| .{ .struct_type = substName(n, params, args) }, else => ty, }; @@ -654,16 +684,16 @@ pub const Analyzer = struct { var obj_ty = self.inferExprType(fa.object); // `p.field` where `p` is `*T` resolves on the pointee `T`. if (obj_ty.isPointer()) { - obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, false); + obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, obj_ty.pointer_type.is_raw); } // `.len` / `.ptr` on the built-in containers (string, slice, array). if (std.mem.eql(u8, fa.field, "len")) { if (obj_ty == .string_type or obj_ty.isSlice() or obj_ty.isArray()) return Type.s(64); } if (std.mem.eql(u8, fa.field, "ptr")) { - if (obj_ty == .string_type) return .{ .many_pointer_type = .{ .element_name = "u8" } }; - if (obj_ty.isSlice()) return .{ .many_pointer_type = .{ .element_name = obj_ty.slice_type.element_name } }; - if (obj_ty.isArray()) return .{ .many_pointer_type = .{ .element_name = obj_ty.array_type.element_name } }; + if (obj_ty == .string_type) return .{ .many_pointer_type = .{ .element_name = "u8", .is_raw = false } }; + if (obj_ty.isSlice()) return .{ .many_pointer_type = .{ .element_name = obj_ty.slice_type.element_name, .is_raw = obj_ty.slice_type.is_raw } }; + if (obj_ty.isArray()) return .{ .many_pointer_type = .{ .element_name = obj_ty.array_type.element_name, .is_raw = obj_ty.array_type.is_raw } }; } if (obj_ty.isStruct()) { if (self.struct_types.get(obj_ty.struct_type)) |info| { @@ -675,23 +705,23 @@ pub const Analyzer = struct { } } if (obj_ty.isArray()) { - return Type.fromName(obj_ty.array_type.element_name) orelse Type.unresolved; + return self.resolveTypeNameStr(obj_ty.array_type.element_name, obj_ty.array_type.is_raw); } return Type.unresolved; }, .index_expr => |ie| { const obj_ty = self.inferExprType(ie.object); if (obj_ty == .string_type) return Type.u(8); - if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, false); - if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, false); - if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, false); + if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, obj_ty.array_type.is_raw); + if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, obj_ty.many_pointer_type.is_raw); + if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, obj_ty.slice_type.is_raw); return Type.unresolved; }, .slice_expr => |se| { const obj_ty = self.inferExprType(se.object); if (obj_ty == .string_type) return .string_type; - if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name } }; - if (obj_ty.isManyPointer()) return .{ .slice_type = .{ .element_name = obj_ty.many_pointer_type.element_name } }; + if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name, .is_raw = obj_ty.array_type.is_raw } }; + if (obj_ty.isManyPointer()) return .{ .slice_type = .{ .element_name = obj_ty.many_pointer_type.element_name, .is_raw = obj_ty.many_pointer_type.is_raw } }; if (obj_ty.isSlice()) return obj_ty; return .void_type; }, @@ -721,17 +751,17 @@ pub const Analyzer = struct { }, .force_unwrap => |fu| { const opt_ty = self.inferExprType(fu.operand); - if (opt_ty.isOptional()) return Type.fromName(opt_ty.optional_type.child_name) orelse .void_type; + if (opt_ty.isOptional()) return self.resolveTypeNameStr(opt_ty.optional_type.child_name, opt_ty.optional_type.is_raw); return .void_type; }, .null_coalesce => |nc| { const opt_ty = self.inferExprType(nc.lhs); - if (opt_ty.isOptional()) return Type.fromName(opt_ty.optional_type.child_name) orelse .void_type; + if (opt_ty.isOptional()) return self.resolveTypeNameStr(opt_ty.optional_type.child_name, opt_ty.optional_type.is_raw); return self.inferExprType(nc.rhs); }, .deref_expr => |de| { const ptr_ty = self.inferExprType(de.operand); - if (ptr_ty.isPointer()) return ptr_ty.pointerPointeeType() orelse .void_type; + if (ptr_ty.isPointer()) return self.resolveTypeNameStr(ptr_ty.pointer_type.pointee_name, ptr_ty.pointer_type.is_raw); return .void_type; }, .null_literal => .void_type, @@ -1066,7 +1096,7 @@ pub const Analyzer = struct { .field_access => |fa| { try self.analyzeNode(fa.object); var owner_ty = self.inferExprType(fa.object); - if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, false); + if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, owner_ty.pointer_type.is_raw); self.recordMemberRef(fa.field, owner_ty.toName() orelse "", false); }, .enum_literal => |el| { @@ -1078,7 +1108,7 @@ pub const Analyzer = struct { // `if val := expr { ... }` — val is the unwrapped optional const cond_ty = self.inferExprType(ie.condition); const inner_ty: ?Type = if (cond_ty.isOptional()) - Type.fromName(cond_ty.optional_type.child_name) + self.resolveTypeNameStr(cond_ty.optional_type.child_name, cond_ty.optional_type.is_raw) else null; try self.pushScope(); @@ -1095,7 +1125,7 @@ pub const Analyzer = struct { .match_expr => |me| { try self.analyzeNode(me.subject); var subj_ty = self.inferExprType(me.subject); - if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, false); + if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, subj_ty.pointer_type.is_raw); const subj_owner = subj_ty.toName() orelse ""; for (me.arms) |arm| { if (arm.pattern) |pat| { @@ -1114,7 +1144,7 @@ pub const Analyzer = struct { if (we.binding_name) |bname| { const cond_ty = self.inferExprType(we.condition); const inner_ty: ?Type = if (cond_ty.isOptional()) - Type.fromName(cond_ty.optional_type.child_name) + self.resolveTypeNameStr(cond_ty.optional_type.child_name, cond_ty.optional_type.is_raw) else null; try self.pushScope(); @@ -1134,7 +1164,7 @@ pub const Analyzer = struct { cap_ty = .{ .signed = 64 }; } else if (self.elementTypeOf(self.inferExprType(fe.iterable))) |elem| { cap_ty = if (fe.capture_by_ref) - (if (elem.toName()) |en| Type{ .pointer_type = .{ .pointee_name = en } } else elem) + (if (elem.toName()) |en| Type{ .pointer_type = .{ .pointee_name = en, .is_raw = innerNameIsRaw(elem) } } else elem) else elem; } diff --git a/src/types.zig b/src/types.zig index 168760a..30adae6 100644 --- a/src/types.zig +++ b/src/types.zig @@ -42,16 +42,26 @@ pub const Type = union(enum) { /// `ir.TypeId.unresolved`. unresolved, + /// `is_raw` records whether the inner type-name came from a backtick raw + /// reference (`` `s2 ``) or an already-resolved user type. It is the + /// `skip_builtin` the resolver MUST pass when re-resolving the stored inner + /// name (issue 0089) — without it `resolveTypeNameStr` would reclassify a + /// user type named `s2` as the builtin int, diverging from codegen. The + /// field is REQUIRED (no default) so a future construction site cannot + /// silently drop the bit, the way the LSP index did for compound shapes. pub const SliceTypeInfo = struct { element_name: []const u8, + is_raw: bool, }; pub const PointerTypeInfo = struct { pointee_name: []const u8, + is_raw: bool, }; pub const ManyPointerTypeInfo = struct { element_name: []const u8, + is_raw: bool, }; pub const FunctionTypeInfo = struct { @@ -67,6 +77,7 @@ pub const Type = union(enum) { pub const ArrayTypeInfo = struct { element_name: []const u8, length: u32, + is_raw: bool, }; pub const VectorTypeInfo = struct { @@ -76,6 +87,7 @@ pub const Type = union(enum) { pub const OptionalTypeInfo = struct { child_name: []const u8, + is_raw: bool, }; pub const MetaTypeInfo = struct { @@ -125,7 +137,7 @@ pub const Type = union(enum) { if (std.mem.eql(u8, name, "f64")) return .f64; return null; }, - '?' => if (name.len >= 2) .{ .optional_type = .{ .child_name = name[1..] } } else null, + '?' => if (name.len >= 2) .{ .optional_type = .{ .child_name = name[1..], .is_raw = false } } else null, 'A' => if (std.mem.eql(u8, name, "Any")) .any_type else null, 'v' => if (std.mem.eql(u8, name, "void")) .void_type else null, '[' => { @@ -141,11 +153,11 @@ pub const Type = union(enum) { } // Many-pointer: [*]T if (name.len >= 4 and name[1] == '*' and name[2] == ']') { - return .{ .many_pointer_type = .{ .element_name = name[3..] } }; + return .{ .many_pointer_type = .{ .element_name = name[3..], .is_raw = false } }; } return null; }, - '*' => if (name.len >= 2) .{ .pointer_type = .{ .pointee_name = name[1..] } } else null, + '*' => if (name.len >= 2) .{ .pointer_type = .{ .pointee_name = name[1..], .is_raw = false } } else null, 'V' => { // Vector(N,T) if (name.len >= 10 and std.mem.startsWith(u8, name, "Vector(") and name[name.len - 1] == ')') { @@ -235,13 +247,6 @@ pub const Type = union(enum) { }; } - pub fn pointerPointeeType(self: Type) ?Type { - return switch (self) { - .pointer_type => |info| fromName(info.pointee_name), - else => null, - }; - } - pub fn isManyPointer(self: Type) bool { return switch (self) { .many_pointer_type => true, From d14e29be02d5b9afc22db5f15573eaad2dde253c Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 22:17:53 +0300 Subject: [PATCH 08/11] =?UTF-8?q?docs(lang):=20precise=20reserved-name=20r?= =?UTF-8?q?ule=20=E2=80=94=20member-name=20positions=20are=20EXEMPT=20[F0.?= =?UTF-8?q?6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AGRA RULING (issue 0089, attempt 7): bare reserved-name MEMBER positions are intentionally exempt from the reserved-type-name rule, and the implementation already does the right thing — this is a docs + one-example change, no code. The exempt member positions are struct FIELD names, union TAG names, and protocol method-SIGNATURE names: they sit in a member slot, are reached via obj.name (or dispatched by string), and are never type-classified, so they never mis-lower. The backtick is optional there. The exemption stops at member DEFINITIONS: an impl method is a real function (reached through the impl_block -> fn_decl arm), so a reserved-spelled impl method still needs the backtick, exactly like a free function (cf. examples/1122) — and every bare reserved-name value binding / declaration name still errors (0076 preserved). - specs.md / readme.md: replace the "every binding site" / "any binding site" overclaim with the precise rule — required positions (value bindings + declaration names + impl method definitions) vs the exempt member-name positions (field / tag / protocol signature; backtick optional). - examples/0158-types-reserved-name-member-exempt.sx: pins the exempt behavior — bare reserved-name struct fields + union tag read & written bare AND via backtick, and a protocol with a bare reserved-name method dispatched through the protocol (impl definition takes the backtick). - issues/0089: document the member-name exemption in the RESOLVED banner + add 0158 to the regression list. Gate: zig build, zig build test, bash tests/run_examples.sh — all green (430 passed, 0 failed, 0 timed out). --- .../0158-types-reserved-name-member-exempt.sx | 55 +++++++++++++++++++ ...158-types-reserved-name-member-exempt.exit | 1 + ...8-types-reserved-name-member-exempt.stderr | 1 + ...8-types-reserved-name-member-exempt.stdout | 5 ++ issues/0089-backtick-raw-identifier.md | 17 ++++++ readme.md | 21 ++++--- specs.md | 34 +++++++++--- 7 files changed, 118 insertions(+), 16 deletions(-) create mode 100644 examples/0158-types-reserved-name-member-exempt.sx create mode 100644 examples/expected/0158-types-reserved-name-member-exempt.exit create mode 100644 examples/expected/0158-types-reserved-name-member-exempt.stderr create mode 100644 examples/expected/0158-types-reserved-name-member-exempt.stdout diff --git a/examples/0158-types-reserved-name-member-exempt.sx b/examples/0158-types-reserved-name-member-exempt.sx new file mode 100644 index 0000000..63d66f5 --- /dev/null +++ b/examples/0158-types-reserved-name-member-exempt.sx @@ -0,0 +1,55 @@ +// Reserved-name MEMBER positions are EXEMPT from the reserved-type-name rule: +// a bare reserved spelling (`s2`, `u8`, `s1`, …) is legal as a struct FIELD +// name, a union TAG name, and a protocol METHOD-SIGNATURE name. These are +// unambiguous — the name sits in a member slot and is reached via `obj.name` +// (or dispatched by string), so it is never type-classified and never +// mislowers. The backtick form is optional there and resolves to the same +// member. Backtick access (`obj.`s2`) and bare access (`obj.s2`) both work. +// +// The exemption stops at member SIGNATURES: an `impl` method DEFINITION is a +// real function, so its name is a declaration site (like a free function) and a +// reserved spelling still needs the backtick (`` `s2 :: (self) ``) — bare would +// be type-classified and mislower (the issue-0076 protection). A bare reserved +// VALUE binding / declaration name still errors (see examples/1119, 1141, 1142). +// Regression (issue 0089 — attempt-7: pins the Agra-ruled member-name exemption). +#import "modules/std.sx"; + +// Struct fields spelled with reserved type names — bare is legal. +Holder :: struct { + s2: s64; + u8: s64; +} + +// Union tags spelled with reserved type names — bare is legal. +Tag :: union { + s1: s32; + u16: f64; +} + +// Protocol method SIGNATURE spelled with a reserved type name — bare is legal. +Speaker :: protocol { + s2 :: () -> s64; +} + +Dog :: struct { n: s64; } +impl Speaker for Dog { + `s2 :: (self: *Dog) -> s64 { self.n } // impl DEFINITION → backtick required +} + +main :: () -> s32 { + h := Holder.{ s2 = 10, u8 = 20 }; + print("fields bare = {} {}\n", h.s2, h.u8); // bare member access + print("fields tick = {} {}\n", h.`s2, h.`u8); // backtick member access + h.s2 = 11; + h.`u8 = 21; // backtick write + print("fields set = {} {}\n", h.s2, h.u8); + + t : Tag = ---; + t.s1 = 5; + print("union = {} {}\n", t.s1, t.`s1); // bare + backtick — same tag + + items : List(Speaker) = .{}; + items.append(Dog.{ n = 7 }); + print("dispatch = {}\n", items.items[0].s2()); // bare reserved-name method call + return 0; +} diff --git a/examples/expected/0158-types-reserved-name-member-exempt.exit b/examples/expected/0158-types-reserved-name-member-exempt.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0158-types-reserved-name-member-exempt.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0158-types-reserved-name-member-exempt.stderr b/examples/expected/0158-types-reserved-name-member-exempt.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0158-types-reserved-name-member-exempt.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0158-types-reserved-name-member-exempt.stdout b/examples/expected/0158-types-reserved-name-member-exempt.stdout new file mode 100644 index 0000000..1f5052e --- /dev/null +++ b/examples/expected/0158-types-reserved-name-member-exempt.stdout @@ -0,0 +1,5 @@ +fields bare = 10 20 +fields tick = 10 20 +fields set = 11 21 +union = 5 5 +dispatch = 7 diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index 77efe16..de3c3ca 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -58,6 +58,20 @@ > that omits them. `FnDecl` is built at every parser site through `parseFnDecl`, > whose `name_is_raw` is a REQUIRED parameter (the equivalent guarantee); the > type decls likewise route through parse-functions taking `name_is_raw`. +> - **Member-name positions are exempt** (Agra ruling, attempt 7). A struct +> **field** name, a union **tag** name, and a protocol **method-signature** +> name accept a bare reserved spelling: these sit in a member slot and are +> reached via `obj.name` / dispatched by string, so they are never +> type-classified and never mis-lower — the binding-name walk's `struct_decl` +> / `union_decl` / `enum_decl` / `protocol_decl` arms +> ([src/ir/semantic_diagnostics.zig]) check only the *type* name (and method +> *params*), not field / tag / variant / method-signature names. The backtick +> is optional there (`obj.s2` and `` obj.`s2 `` resolve to the same member). +> The exemption stops at member *definitions*: an `impl` method is a real +> function reached through the `impl_block` → `fn_decl` arm, so a +> reserved-spelled impl method needs the backtick (`` `s2 :: (self) ``), no +> more exempt than a free function (cf. `examples/1122`). Pinned by +> `examples/0158-types-reserved-name-member-exempt.sx`. > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign > `#foreign` decls with `Param.is_raw = true` (and the synthesized `FnDecl` > `is_raw = true`), so generated C names that collide with reserved type names @@ -80,6 +94,9 @@ > `examples/0156-types-backtick-struct-const.sx` (struct-body const, untyped + typed), > `examples/0157-types-backtick-parameterized-raw-type.sx` (raw parameterized type + > pointer/field wrappers), +> `examples/0158-types-reserved-name-member-exempt.sx` (bare reserved-name struct +> fields / union tag / protocol method signature — read & written bare and via +> backtick; impl method definition takes the backtick), > `examples/1054-errors-backtick-reserved-binding.sx` (`catch`/`onfail` tag > bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign > param + fn-name exemption, bare-callable foreign fn); negatives diff --git a/readme.md b/readme.md index 4ad2119..712142d 100644 --- a/readme.md +++ b/readme.md @@ -106,14 +106,19 @@ z : s32 = ---; // uninitialized ``` Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and a *bare* -spelling can't be used as an identifier at **any** binding site — a value binding -(`:=` / typed local / parameter), a `::` constant or function declaration, or a -`::` type declaration (`struct` / `enum` / `union` / alias / `protocol` / …) — each -is an error (`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). A -leading backtick escapes one into a **raw identifier**: `` `name `` is the literal -identifier `name` (the backtick drops out of the text), usable in **every** -position — value, declaration, and type. It is the only way handwritten sx can -spell a reserved name. +spelling can't be used as an identifier at a **value-binding or declaration-name** +site — a value binding (`:=` / typed local / parameter), a `::` constant or +function declaration, an `impl` method definition, or a `::` type declaration +(`struct` / `enum` / `union` / alias / `protocol` / …) — each is an error +(`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). **Member-name +positions are exempt**: a struct *field*, a union *tag*, and a protocol +*method-signature* may be a bare reserved spelling (`struct { s2: s64 }`, +`union { u8: … }`, `protocol { s2 :: (self) }`) — they are reached via `obj.name`, +so they never mis-lower. A leading backtick escapes one into a **raw identifier**: +`` `name `` is the literal identifier `name` (the backtick drops out of the text), +usable in **every** position — value, declaration, and type, and optional in the +exempt member positions. It is the only way handwritten sx can spell a reserved +name in a binding or declaration site. ```sx `s2 := 2.5; // identifier "s2", distinct from the s2 type diff --git a/specs.md b/specs.md index 2fe7450..583943e 100644 --- a/specs.md +++ b/specs.md @@ -17,14 +17,26 @@ Line comments start with `//` and extend to end of line. A spelling that names a builtin type — the arbitrary-width integers `s1`..`s64` / `u1`..`u64`, plus `bool`, `string`, `void`, `f32`, `f64`, `usize`, `isize`, `Any` — -is reserved. A bare reserved spelling is rejected at **every binding site** — -anywhere handwritten sx introduces a name: a value binding (`:=` / typed local / -parameter), a `::` **constant** or **function** declaration, and a `::` **type** -declaration (`struct` / `enum` / `union` / `error` / type alias / `protocol` / -foreign class / ufcs alias / namespaced import). A value-spelled-as-type parses as -a *type*, not a value, so its address-of / autoref paths would mis-lower; a -type/const/function name spelled as a builtin would shadow the builtin. The only -exemptions are the backtick escape (below) and `#import c` foreign decls. +is reserved. A bare reserved spelling is rejected at **value-binding and +declaration-name sites**: a value binding (`:=` / typed local / parameter), a +`::` **constant** or **function** declaration, an `impl` method **definition**, +and a `::` **type** declaration (`struct` / `enum` / `union` / `error` / type +alias / `protocol` / foreign class / ufcs alias / namespaced import). A +value-spelled-as-type parses as a *type*, not a value, so its address-of / +autoref paths would mis-lower; a type / const / function / method name spelled as +a builtin would shadow the builtin. The exemptions are the backtick escape +(below), `#import c` foreign decls, and **member-name positions** (next) — it is +**not** rejected at every place a name appears. + +**Member-name positions are exempt.** A struct **field** name, a union **tag** +name, and a protocol **method-signature** name may be a bare reserved spelling. +These sit in a member slot (`name: T` / `name :: (…)`) and are reached only via +`obj.name` (or dispatched by string), so they are never type-classified and never +mis-lower. The backtick form is optional there and names the same member — `obj.s2` +and `` obj.`s2 `` both resolve. The exemption covers member *signatures* only: an +`impl` method **definition** is a real function (a declaration site, not a member +slot), so a reserved-spelled impl method still needs the backtick +(`` `s2 :: (self) ``), exactly like a free function. See `examples/0158`. ```sx s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier @@ -91,6 +103,12 @@ for xs: (`bool, `u16) { } // for capture + index x catch `s2 { } // catch tag binding ``` +In the **member-name positions** among these — struct field, union tag, and +protocol method signature — the backtick is *optional*: the bare reserved +spelling is already legal there (see "Member-name positions are exempt" above). +Everywhere else (value bindings and declaration names, including an `impl` method +definition) the backtick is *required* to spell a reserved name. + A reserved-spelled **function** is bare-callable: `` `s2 :: (n: s64) -> s64 { … } `` can be invoked as `s2(10)` (the bare callee spelling parses as a type but resolves to the function when one of that name is in scope; `TypeName(val)` is not a cast). From 685d3d122b61281b44ecc487c1051b5be7e6dc9d Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 22:42:09 +0300 Subject: [PATCH 09/11] docs(lang): keyword-spelled f32/f64 still need a backtick in member-name positions [F0.6] The member-name exemption applies only to identifier-classified reserved spellings (s1..s64, u1..u64, bool, string, void, usize, isize, Any). f32/f64 are lexer keywords (token.zig kw_f32/kw_f64) and member-name slots require an identifier token, so a bare f32/f64 field/tag/method name is rejected at parse; the backtick is required there too. specs.md + readme.md corrected. --- readme.md | 6 +++++- specs.md | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 712142d..80ea1da 100644 --- a/readme.md +++ b/readme.md @@ -114,7 +114,11 @@ function declaration, an `impl` method definition, or a `::` type declaration positions are exempt**: a struct *field*, a union *tag*, and a protocol *method-signature* may be a bare reserved spelling (`struct { s2: s64 }`, `union { u8: … }`, `protocol { s2 :: (self) }`) — they are reached via `obj.name`, -so they never mis-lower. A leading backtick escapes one into a **raw identifier**: +so they never mis-lower. The bare exemption covers only the identifier-classified +reserved names (`s1`..`s64`, `u1`..`u64`, `bool`, `string`, `void`, `usize`, +`isize`, `Any`); `f32` and `f64` are lexer keywords, so even in a member slot they +need the backtick (`` struct { `f32: s64 } ``). A leading backtick escapes one into +a **raw identifier**: `` `name `` is the literal identifier `name` (the backtick drops out of the text), usable in **every** position — value, declaration, and type, and optional in the exempt member positions. It is the only way handwritten sx can spell a reserved diff --git a/specs.md b/specs.md index 583943e..3683a57 100644 --- a/specs.md +++ b/specs.md @@ -38,6 +38,15 @@ and `` obj.`s2 `` both resolve. The exemption covers member *signatures* only: a slot), so a reserved-spelled impl method still needs the backtick (`` `s2 :: (self) ``), exactly like a free function. See `examples/0158`. +The bare member-name exemption applies only to the **identifier-classified** +reserved spellings — `s1`..`s64`, `u1`..`u64`, `bool`, `string`, `void`, `usize`, +`isize`, `Any` — which all lex as ordinary identifiers. The two +**keyword-classified** reserved spellings, `f32` and `f64`, are lexer keywords, and +member-name slots require an identifier token; a bare `f32` / `f64` is therefore +rejected at parse (`expected field name in struct`) even in a member position. Use +the backtick there too — `` struct { `f32: s64; } `` / `` union { `f64: … } `` / +`` protocol { `f32 :: (self); } `` work as field / tag / method names. + ```sx s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier s2 :: 5; // ERROR — a `::` constant name is a binding site too From 166b42c308926ac144205ddf8abd788c94ef8b30 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 22:52:29 +0300 Subject: [PATCH 10/11] docs(lang): sync issues/0089 member-name exemption with the f32/f64 keyword caveat [F0.6] --- issues/0089-backtick-raw-identifier.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index de3c3ca..b0fbd39 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -67,6 +67,15 @@ > ([src/ir/semantic_diagnostics.zig]) check only the *type* name (and method > *params*), not field / tag / variant / method-signature names. The backtick > is optional there (`obj.s2` and `` obj.`s2 `` resolve to the same member). +> This bare member-name exemption covers only the **identifier-classified** +> reserved spellings — `s1`..`s64`, `u1`..`u64`, `bool`, `string`, `void`, +> `usize`, `isize`, `Any` — which all lex as ordinary identifiers. The two +> **keyword-classified** spellings, `f32` and `f64`, are lexer keywords +> ([src/token.zig]), and a member-name slot requires an identifier token +> ([src/parser.zig]); a bare `f32` / `f64` is therefore rejected at parse +> (`expected field name in struct`) even in a member position, and still needs +> the backtick there too — `` struct { `f32: s64; } `` / `` union { `f64: … } `` +> / `` protocol { `f32 :: (self); } `` work as field / tag / method names. > The exemption stops at member *definitions*: an `impl` method is a real > function reached through the `impl_block` → `fn_decl` arm, so a > reserved-spelled impl method needs the backtick (`` `s2 :: (self) ``), no From b9a29c39c5debc9eab2d5518faf2f26a8516ff41 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 23:02:09 +0300 Subject: [PATCH 11/11] =?UTF-8?q?docs(lang):=20fix=20invalid=20protocol=20?= =?UTF-8?q?method-signature=20snippets=20=E2=80=94=20(self)=20->=20()=20->?= =?UTF-8?q?=20s64=20[F0.6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A protocol method signature omits the receiver; a bare `self` has no type, so `protocol { … :: (self) … }` fails at parse with 'expected :'. Correct the three member-exemption doc snippets (readme.md, specs.md, issues/0089) to the valid signature form, matching examples/0158's `Speaker :: protocol { s2 :: () -> s64; }`. --- issues/0089-backtick-raw-identifier.md | 2 +- readme.md | 2 +- specs.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index b0fbd39..bb7d1a0 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -75,7 +75,7 @@ > ([src/parser.zig]); a bare `f32` / `f64` is therefore rejected at parse > (`expected field name in struct`) even in a member position, and still needs > the backtick there too — `` struct { `f32: s64; } `` / `` union { `f64: … } `` -> / `` protocol { `f32 :: (self); } `` work as field / tag / method names. +> / `` protocol { `f32 :: () -> s64; } `` work as field / tag / method names. > The exemption stops at member *definitions*: an `impl` method is a real > function reached through the `impl_block` → `fn_decl` arm, so a > reserved-spelled impl method needs the backtick (`` `s2 :: (self) ``), no diff --git a/readme.md b/readme.md index 80ea1da..0c71b8e 100644 --- a/readme.md +++ b/readme.md @@ -113,7 +113,7 @@ function declaration, an `impl` method definition, or a `::` type declaration (`s2 :: 5` and `s2 :: (n) { … }` are rejected just like `s2 := 5`). **Member-name positions are exempt**: a struct *field*, a union *tag*, and a protocol *method-signature* may be a bare reserved spelling (`struct { s2: s64 }`, -`union { u8: … }`, `protocol { s2 :: (self) }`) — they are reached via `obj.name`, +`union { u8: … }`, `protocol { s2 :: () -> s64 }`) — they are reached via `obj.name`, so they never mis-lower. The bare exemption covers only the identifier-classified reserved names (`s1`..`s64`, `u1`..`u64`, `bool`, `string`, `void`, `usize`, `isize`, `Any`); `f32` and `f64` are lexer keywords, so even in a member slot they diff --git a/specs.md b/specs.md index 3683a57..b30853e 100644 --- a/specs.md +++ b/specs.md @@ -45,7 +45,7 @@ reserved spellings — `s1`..`s64`, `u1`..`u64`, `bool`, `string`, `void`, `usiz member-name slots require an identifier token; a bare `f32` / `f64` is therefore rejected at parse (`expected field name in struct`) even in a member position. Use the backtick there too — `` struct { `f32: s64; } `` / `` union { `f64: … } `` / -`` protocol { `f32 :: (self); } `` work as field / tag / method names. +`` protocol { `f32 :: () -> s64; } `` work as field / tag / method names. ```sx s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier