From ef8f021c0173f311421a7f2352aba56180a922f3 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 21:14:35 +0300 Subject: [PATCH] =?UTF-8?q?feat(lang):=20universal=20raw=20identifier=20?= =?UTF-8?q?=E2=80=94=20parser=20exhaustiveness=20+=20raw=20type=20continua?= =?UTF-8?q?tions=20+=20sema/LSP=20[F0.6]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the remaining three F0.6 findings so the universal backtick raw identifier holds in BOTH classifiers and at EVERY parser construction site. 1. Struct-body constants thread is_raw + name_span. The struct-body const forms (untyped `` `s2 :: 5 `` and typed `` `s2 : T : v ``) built the const_decl node without name_span/is_raw, so a backtick const was falsely rejected and a bare reserved-name const caretted at 1:1. They now capture both. Structural cure: `ast.ConstDecl`'s name_span + is_raw carry NO default, so the compiler rejects any construction site that omits them (mirrors checkBindingName's required `is_raw` arg). FnDecl keeps its defaults — every parser fn_decl routes through parseFnDecl whose `name_is_raw` is a required parameter (equivalent guarantee). 2. Raw identifier in TYPE position flows through the normal continuations. parseTypeExpr no longer returns a terminal type_expr for a raw atom; the raw flag rides the atom through the qualified-path / Closure / parameterized continuations, so `` `s2(s64) ``, `` *`s2 ``, `` ?`s2 `` all parse. ParameterizedTypeExpr carries is_raw; resolveParameterizedWithBindings skips the `Vector` intrinsic when raw. 3. sema/LSP (the second classifier) honors is_raw. Type.fromTypeExpr returns null for a raw type_expr; resolveTypeNode skips the builtin classifier when raw; resolveTypeNameStr takes a skip_builtin arg threaded from te/id.is_raw (compound inner names pass false). A backtick reserved-name annotation now resolves to the user type in the editor index, not the builtin. Tests: examples/0156 (struct-body const), 0157 (parameterized raw type + wrappers), 1142 (bare struct-body const errors, caret on name); src/sema.test.zig pins the LSP raw-type resolution (fail-before verified). Gate: 365 unit tests, 429 examples, 0 failed. --- examples/0156-types-backtick-struct-const.sx | 21 +++++ ...7-types-backtick-parameterized-raw-type.sx | 30 +++++++ ...-diagnostics-reserved-name-struct-const.sx | 20 +++++ .../0156-types-backtick-struct-const.exit | 1 + .../0156-types-backtick-struct-const.stderr | 1 + .../0156-types-backtick-struct-const.stdout | 2 + ...types-backtick-parameterized-raw-type.exit | 1 + ...pes-backtick-parameterized-raw-type.stderr | 1 + ...pes-backtick-parameterized-raw-type.stdout | 3 + ...iagnostics-reserved-name-struct-const.exit | 1 + ...gnostics-reserved-name-struct-const.stderr | 11 +++ ...gnostics-reserved-name-struct-const.stdout | 1 + issues/0089-backtick-raw-identifier.md | 38 ++++++-- readme.md | 11 +-- specs.md | 12 ++- src/ast.zig | 24 ++++-- src/ir/lower.zig | 6 +- src/parser.zig | 28 ++++-- src/root.zig | 1 + src/sema.test.zig | 86 +++++++++++++++++++ src/sema.zig | 48 +++++++---- src/types.zig | 6 ++ 22 files changed, 300 insertions(+), 53 deletions(-) create mode 100644 examples/0156-types-backtick-struct-const.sx create mode 100644 examples/0157-types-backtick-parameterized-raw-type.sx create mode 100644 examples/1142-diagnostics-reserved-name-struct-const.sx create mode 100644 examples/expected/0156-types-backtick-struct-const.exit create mode 100644 examples/expected/0156-types-backtick-struct-const.stderr create mode 100644 examples/expected/0156-types-backtick-struct-const.stdout create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.exit create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.stderr create mode 100644 examples/expected/0157-types-backtick-parameterized-raw-type.stdout create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.exit create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.stderr create mode 100644 examples/expected/1142-diagnostics-reserved-name-struct-const.stdout create mode 100644 src/sema.test.zig diff --git a/examples/0156-types-backtick-struct-const.sx b/examples/0156-types-backtick-struct-const.sx new file mode 100644 index 0000000..53466da --- /dev/null +++ b/examples/0156-types-backtick-struct-const.sx @@ -0,0 +1,21 @@ +// Backtick raw-identifier escape at a STRUCT-BODY constant — both the untyped +// `` `name :: value `` and the typed `` `name : T : value `` forms. A struct +// member constant is a binding site like any top-level const (examples/0153), +// so a reserved type spelling (`s2`, `u8`) needs the backtick to be used as the +// constant's name; the value is read back via `Holder.`name`. A *bare* +// reserved-name struct const still errors with the caret on the name (see +// examples/1142). The backtick is never part of the name's text. +// Regression (issue 0089 — attempt-5: struct-body const decls thread is_raw + +// the precise name_span, previously dropped to a false reject / 1:1 caret). +#import "modules/std.sx"; + +Holder :: struct { + `s2 :: 5; // untyped raw struct-body const + `u8 : s64 : 9; // typed raw struct-body const +} + +main :: () -> s32 { + print("untyped = {}\n", Holder.`s2); + print("typed = {}\n", Holder.`u8); + return 0; +} diff --git a/examples/0157-types-backtick-parameterized-raw-type.sx b/examples/0157-types-backtick-parameterized-raw-type.sx new file mode 100644 index 0000000..420dce1 --- /dev/null +++ b/examples/0157-types-backtick-parameterized-raw-type.sx @@ -0,0 +1,30 @@ +// Backtick raw identifier in PARAMETERIZED type position. A raw type reference +// (`` `s2 ``) flows through the SAME type-expression continuations as a bare +// name, so a reserved-spelled GENERIC template can be instantiated +// (`` `s2(s64) ``) and the result composes under pointer/field wrappers +// (`` *`s2(s64) ``, a struct field typed `` `s2(s64) ``). A bare `s2` in type +// position is still the 2-bit signed int. Complements examples/0154 (nullary +// raw type references). +// Regression (issue 0089 — attempt-5: the raw type atom no longer parses as a +// terminal `type_expr`; it reaches the parameterized + wrapper continuations). +#import "modules/std.sx"; + +`s2 :: struct($T: Type) { + x: $T; +} + +Wrapper :: struct { + inner: `s2(s64); // raw parameterized type as a struct field +} + +main :: () -> s32 { + v : `s2(s64); + v.x = 7; + p : *`s2(s64) = @v; // pointer to a raw parameterized type + w : Wrapper = ---; + w.inner.x = 12; + print("val = {}\n", v.x); + print("ptr = {}\n", p.x); + print("fld = {}\n", w.inner.x); + return 0; +} diff --git a/examples/1142-diagnostics-reserved-name-struct-const.sx b/examples/1142-diagnostics-reserved-name-struct-const.sx new file mode 100644 index 0000000..b07c455 --- /dev/null +++ b/examples/1142-diagnostics-reserved-name-struct-const.sx @@ -0,0 +1,20 @@ +// A bare reserved/builtin type-name spelling is rejected as the NAME of a +// STRUCT-BODY constant too — both the untyped (`s2 :: 5`) and the typed +// (`u8 : s64 : 9`) forms — exactly like a top-level const (examples/1140) or a +// type decl (examples/1141). A struct member constant is a binding site, so a +// bare reserved spelling mis-classifies and is rejected; the caret lands ON the +// constant's name (not at 1:1). The backtick escape (examples/0156) is the only +// way to spell these names in handwritten sx. +// +// Regression (issue 0089 — attempt-5: 0076 holds for struct-body consts, with +// the caret on the name). Expected: one error per const, caret on the name; exit 1. +#import "modules/std.sx"; + +Holder :: struct { + s2 :: 5; + u8 : s64 : 9; +} + +main :: () -> s32 { + return 0; +} diff --git a/examples/expected/0156-types-backtick-struct-const.exit b/examples/expected/0156-types-backtick-struct-const.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0156-types-backtick-struct-const.stderr b/examples/expected/0156-types-backtick-struct-const.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0156-types-backtick-struct-const.stdout b/examples/expected/0156-types-backtick-struct-const.stdout new file mode 100644 index 0000000..bdeeab2 --- /dev/null +++ b/examples/expected/0156-types-backtick-struct-const.stdout @@ -0,0 +1,2 @@ +untyped = 5 +typed = 9 diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.exit b/examples/expected/0157-types-backtick-parameterized-raw-type.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.stderr b/examples/expected/0157-types-backtick-parameterized-raw-type.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0157-types-backtick-parameterized-raw-type.stdout b/examples/expected/0157-types-backtick-parameterized-raw-type.stdout new file mode 100644 index 0000000..ac9fd97 --- /dev/null +++ b/examples/expected/0157-types-backtick-parameterized-raw-type.stdout @@ -0,0 +1,3 @@ +val = 7 +ptr = 7 +fld = 12 diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.exit b/examples/expected/1142-diagnostics-reserved-name-struct-const.exit new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr b/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr new file mode 100644 index 0000000..81ea977 --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.stderr @@ -0,0 +1,11 @@ +error: 's2' is a reserved type name and cannot be used as an identifier + --> examples/1142-diagnostics-reserved-name-struct-const.sx:14:5 + | +14 | s2 :: 5; + | ^^ + +error: 'u8' is a reserved type name and cannot be used as an identifier + --> examples/1142-diagnostics-reserved-name-struct-const.sx:15:5 + | +15 | u8 : s64 : 9; + | ^^ diff --git a/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout b/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1142-diagnostics-reserved-name-struct-const.stdout @@ -0,0 +1 @@ + diff --git a/issues/0089-backtick-raw-identifier.md b/issues/0089-backtick-raw-identifier.md index 8c0df3d..49c1fd1 100644 --- a/issues/0089-backtick-raw-identifier.md +++ b/issues/0089-backtick-raw-identifier.md @@ -21,21 +21,35 @@ > - **Value position.** The parser skips `Type.fromName` for a raw identifier > in expression position ([src/parser.zig] `parsePrimary`), so `` `s2 `` is a > value identifier; a later bare reference resolves to the binding. -> - **Type position.** `parseTypeExpr` emits a raw `type_expr` (no qualified / -> `Closure` / parameterized continuation). Resolution skips the builtin -> classifier (`TypeResolver.resolveNamed`'s `skip_builtin`, threaded from -> `te.is_raw` in [src/ir/lower.zig] and [src/ir/type_bridge.zig]) and looks up -> a `` `s2 ``-declared type (struct / enum / union / alias), else a NORMAL +> - **Type position.** `parseTypeExpr` sets the raw flag on the type ATOM and +> lets it flow through the SAME continuations as a bare name (attempt 5), so a +> raw reference parameterizes a reserved-spelled template (`` `s2(s64) ``) and +> composes under the pointer / optional / slice wrappers; `ParameterizedTypeExpr` +> carries `is_raw` and `resolveParameterizedWithBindings` skips the `Vector` +> intrinsic when raw. Resolution skips the builtin classifier +> (`TypeResolver.resolveNamed`'s `skip_builtin`, threaded from `te.is_raw` in +> [src/ir/lower.zig] and [src/ir/type_bridge.zig]) and looks up a +> `` `s2 ``-declared type (struct / enum / union / alias), else a NORMAL > "unknown type 's2'" error (`UnknownTypeChecker.reportIfUnknownType` skips the > builtin-name exemption when raw). A bare `s2` in type position is still the -> builtin int. +> builtin int. The SECOND (editor/LSP) classifier in [src/sema.zig] +> (`Type.fromTypeExpr` / `resolveTypeNode` / `resolveTypeNameStr`) honors +> `is_raw` too, so a backtick reserved-name annotation resolves to the user type +> in hover/completion, not the builtin (no two-resolver divergence). > - **Declaration position.** A bare reserved-name declaration of EVERY kind > still errors (issue 0076 preserved); the backtick form is exempt. The check > and the exemption are made structurally symmetric: > `checkBindingName` / `checkDeclName` ([src/ir/semantic_diagnostics.zig]) take > `is_raw` as a REQUIRED argument and skip inside the check — no call site can > validate a name without also honoring the exemption, which is what kept the -> two from desyncing across the earlier attempts. +> two from desyncing across the earlier attempts. On the PARSER side the +> symmetry is enforced structurally for the bug-prone node: `ConstDecl`'s +> `name_span` + `is_raw` carry NO default (attempt 5), so the compiler rejects +> any construction site — including the two struct-body const forms (untyped +> `` `s2 :: 5 `` and typed `` `s2 : T : v ``) that previously dropped both — +> that omits them. `FnDecl` is built at every parser site through `parseFnDecl`, +> whose `name_is_raw` is a REQUIRED parameter (the equivalent guarantee); the +> type decls likewise route through parse-functions taking `name_is_raw`. > 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign > `#foreign` decls with `Param.is_raw = true` (and the synthesized `FnDecl` > `is_raw = true`), so generated C names that collide with reserved type names @@ -55,14 +69,20 @@ > `examples/0154-types-backtick-raw-type-reference.sx` (raw in TYPE position — > struct / enum / union / alias decl + reference; bare `s2` still the int), > `examples/0155-types-backtick-typed-const-union-tag.sx` (typed const + union tag), +> `examples/0156-types-backtick-struct-const.sx` (struct-body const, untyped + typed), +> `examples/0157-types-backtick-parameterized-raw-type.sx` (raw parameterized type + +> pointer/field wrappers), > `examples/1054-errors-backtick-reserved-binding.sx` (`catch`/`onfail` tag > bindings), `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign > param + fn-name exemption, bare-callable foreign fn); negatives > `examples/1119`/`1121`/`1123` (bare reserved binding across forms), > `examples/1140-diagnostics-reserved-name-const-fn-decl.sx` (bare const + fn decl), > `examples/1141-diagnostics-reserved-name-type-decl.sx` (bare struct / enum / union -> / error / typed-const decl). Backtick lexer + `resolveNamed(skip_builtin)` unit -> tests in `src/lexer.zig` / `src/ir/type_resolver.test.zig`. +> / error / typed-const decl), +> `examples/1142-diagnostics-reserved-name-struct-const.sx` (bare struct-body const, +> caret on the name). Backtick lexer + `resolveNamed(skip_builtin)` unit tests in +> `src/lexer.zig` / `src/ir/type_resolver.test.zig`; the editor/LSP raw-type +> resolution (the second classifier) is pinned in `src/sema.test.zig`. > > The original report is preserved below. diff --git a/readme.md b/readme.md index e4f6fc0..4ad2119 100644 --- a/readme.md +++ b/readme.md @@ -125,11 +125,12 @@ x : s2 = 3; // bare `s2` in type position is still the int type ``` It works in every identifier position — local, global, parameter, struct field, -union tag, function name, type/alias/import name, constant, and the control-flow / -capture / binding forms (destructure, `if`/`while` binding, `for` capture, match -capture, `catch`/`onfail` tag) — and a reserved-spelled function is bare-callable -(`s2(10)`). A backtick name used as a type resolves to a `` `name ``-declared type, -else a normal `unknown type` error. +union tag, function name, type/alias/import name, a top-level or struct-body +constant, and the control-flow / capture / binding forms (destructure, `if`/`while` +binding, `for` capture, match capture, `catch`/`onfail` tag) — and a reserved-spelled +function is bare-callable (`s2(10)`). A backtick name used as a type resolves to a +`` `name ``-declared type — including a parameterized template (`` `s2(s64) ``) and +under pointer/optional wrappers — else a normal `unknown type` error. Foreign declarations from `#import c { … }` are exempt automatically: C names that collide with reserved type names (e.g. `s1`, `s2`) import unedited, and a foreign diff --git a/specs.md b/specs.md index 56ca1d2..2fe7450 100644 --- a/specs.md +++ b/specs.md @@ -55,13 +55,16 @@ x : s2 = 3; // bare `s2` in TYPE position is still the s2 int type reference: it resolves to a `` `s2 ``-declared type (struct / enum / union / type alias / …), and never the builtin. A bare `s2` in type position stays the builtin int; a backtick name with no matching declaration is a normal `unknown type 's2'` -error. +error. A raw type reference flows through the **same continuations** as a bare type +name, so it parameterizes a reserved-spelled generic template (`` `s2(s64) ``) and +composes under the pointer / optional / slice wrappers (`` *`s2 ``, `` ?`s2 ``). ```sx -`s2 :: struct { x: s64; } // declare a type whose name is a reserved spelling -v : `s2 = ---; // reference it as a type — resolves to the struct +`s2 :: struct($T: Type) { x: $T; } // generic template with a reserved-spelled name +v : `s2(s64) = ---; // parameterized raw type reference v.x = 7; -x : s2 = 3; // bare `s2` is still the 2-bit signed int +p : *`s2(s64) = @v; // wrappers compose over a raw type +x : s2 = 3; // bare `s2` is still the 2-bit signed int ``` **Declaration position.** A *bare* reserved-name declaration of every kind still @@ -79,6 +82,7 @@ reference, and every control-flow / capture / binding form (destructure name, `s2 : s64 : 5; // typed constant declaration `u8 :: (`s1: s64) -> s64 { `s1 } // function name + parameter P :: struct { `s2: f64; } // struct field +H :: struct { `s2 :: 5; } // struct-body constant (untyped + `: T :` typed) M :: union { `s1: s32; } // union tag `u16 :: enum { A; B; } // type-declaration name `u8, rest := pair(); // destructure name diff --git a/src/ast.zig b/src/ast.zig index f3c3541..cd2948c 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -133,11 +133,14 @@ pub const FnDecl = struct { call_conv: CallingConvention = .default, /// Span of the function's name token, for the reserved-type-name decl /// diagnostic (issue 0089). Synthesized decls (e.g. `#import c` foreign - /// functions) leave it zero. + /// functions, lowering-time objc/protocol method synthesis) leave it zero. name_span: Span = .{ .start = 0, .end = 0 }, /// True when the function NAME was written as a backtick raw identifier /// (`` `s2 :: … ``) or synthesized by a `#import c` foreign decl. A raw /// name is exempt from the reserved-type-name binding check (issue 0089). + /// Every PARSER fn_decl is built through `parseFnDecl`, whose `name_is_raw` + /// is a REQUIRED parameter, so a parser site cannot drop it; the default + /// here serves only post-check synthesized decls (which are never raw). is_raw: bool = false, }; @@ -316,12 +319,15 @@ pub const ConstDecl = struct { type_annotation: ?*Node, value: *Node, /// Span of the constant's name token, for the reserved-type-name decl - /// diagnostic (issue 0089). - name_span: Span = .{ .start = 0, .end = 0 }, + /// diagnostic (issue 0089). NO default: every construction site must set + /// it explicitly, so a struct-body const can't silently fall back to a + /// 1:1 caret (the finding-1 bug). + name_span: Span, /// True when the constant NAME was written as a backtick raw identifier - /// (`` `s2 :: … ``). A raw name is exempt from the reserved-type-name - /// binding check (issue 0089). - is_raw: bool = false, + /// (`` `s2 :: … ``). NO default: required at every site so the reserved- + /// name exemption can't be dropped — mirrors `checkBindingName`'s required + /// `is_raw` argument so the parser and the check can't desync (issue 0089). + is_raw: bool, }; pub const VarDecl = struct { @@ -573,6 +579,12 @@ pub const ArrayLiteral = struct { pub const ParameterizedTypeExpr = struct { name: []const u8, // e.g. "Vector", or later generic struct names args: []const *Node, // e.g. [int_literal(3), type_expr("f32")] + /// True when the base name was a backtick raw identifier in type position + /// (`` `s2(s64) ``). Such a reference is the LITERAL name `s2` used as a + /// parameterized type — resolution skips the builtin parameterized + /// classifier (e.g. the `Vector` intrinsic) and instantiates a + /// `` `s2 ``-declared generic template (issue 0089). + is_raw: bool = false, }; pub const IndexExpr = struct { diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 79bc7d3..2b6a8db 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -12057,8 +12057,10 @@ pub const Lowering = struct { const base_name = if (std.mem.lastIndexOfScalar(u8, pt.name, '.')) |dot| pt.name[dot + 1 ..] else pt.name; const table = &self.module.types; - // Vector(N, T) — built-in parameterized type - if (std.mem.eql(u8, base_name, "Vector")) { + // Vector(N, T) — built-in parameterized type. A backtick raw base + // (`` `Vector(…) ``) is the LITERAL user type named `Vector`, so it + // skips this intrinsic and resolves through the template map (0089). + if (!pt.is_raw and std.mem.eql(u8, base_name, "Vector")) { if (pt.args.len == 2) { const length = self.resolveVectorLane(pt.args[0]) orelse return .unresolved; const elem = self.resolveTypeWithBindings(pt.args[1]); diff --git a/src/parser.zig b/src/parser.zig index 8ec384e..2450f49 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -632,15 +632,13 @@ pub const Parser = struct { if (self.current.tag.isTypeKeyword() or self.isIdentLike()) { // A backtick raw identifier (`` `s2 ``) in type position is the // LITERAL name `s2` used as a type reference — never the builtin / - // reserved keyword. It is always a plain named-type reference (no - // qualified-path, `Closure`, or parameterized continuation), so emit - // a raw `type_expr` and return; resolution skips the builtin + // reserved keyword. The raw flag rides the type ATOM through the + // SAME qualified-path / `Closure` / parameterized continuations as a + // bare name (so `` `s2(s64) ``, `` `s2.Inner ``, `` *`s2 `` all + // parse); it is threaded onto the final `type_expr` / + // `parameterized_type_expr` so resolution skips the builtin // classifier and looks up a `` `s2 ``-declared type (issue 0089). - if (self.current.is_raw) { - const raw_name = self.tokenSlice(self.current); - self.advance(); - return try self.createNode(start, .{ .type_expr = .{ .name = raw_name, .is_raw = true } }); - } + const atom_is_raw = self.current.is_raw; var name = self.tokenSlice(self.current); self.advance(); @@ -781,6 +779,7 @@ pub const Parser = struct { return try self.createNode(start, .{ .parameterized_type_expr = .{ .name = name, .args = try args.toOwnedSlice(self.allocator), + .is_raw = atom_is_raw, } }); } @@ -789,7 +788,7 @@ pub const Parser = struct { for (self.struct_type_params) |tp| { if (std.mem.eql(u8, tp, name)) { is_struct_generic = true; break; } } - return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic } }); + return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic, .is_raw = atom_is_raw } }); } // Inline struct type in type position: struct { ... } if (self.current.tag == .kw_struct) { @@ -1067,6 +1066,8 @@ pub const Parser = struct { .name = method_name, .type_annotation = null, .value = value, + .name_span = method_name_span, + .is_raw = method_is_raw, } })); } continue; @@ -1080,6 +1081,13 @@ pub const Parser = struct { return self.fail("expected field name in struct"); } const field_start = self.current.loc.start; + // Captured for the single-name typed-const path (`name :Type: value`) + // below: a struct-body const binds a name like any other decl, so + // its name_span + raw flag must travel to the `const_decl` node + // (finding 1 — they were being dropped to a 1:1 caret / false + // reserved-name reject). + const field_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + const field_is_raw = self.current.is_raw; try group_names.append(self.allocator, self.tokenSlice(self.current)); self.advance(); @@ -1104,6 +1112,8 @@ pub const Parser = struct { .name = group_names.items[0], .type_annotation = field_type, .value = value, + .name_span = field_name_span, + .is_raw = field_is_raw, } })); continue; } diff --git a/src/root.zig b/src/root.zig index 7b9bf13..230c390 100644 --- a/src/root.zig +++ b/src/root.zig @@ -11,6 +11,7 @@ pub const errors = @import("errors.zig"); pub const errors_tests = @import("errors.test.zig"); pub const trace_runtime_tests = @import("runtime_trace.test.zig"); pub const sema = @import("sema.zig"); +pub const sema_tests = @import("sema.test.zig"); pub const imports = @import("imports.zig"); pub const core = @import("core.zig"); pub const c_import = @import("c_import.zig"); diff --git a/src/sema.test.zig b/src/sema.test.zig new file mode 100644 index 0000000..09dd22f --- /dev/null +++ b/src/sema.test.zig @@ -0,0 +1,86 @@ +// Tests for sema.zig — the editor/LSP type classifier (the SECOND resolver, +// distinct from the codegen-side `ir/type_resolver.zig`). These pin behavior +// the example suite can't reach: the example runner exercises the codegen +// path (`sx run`), never sema's hover/completion/index resolution. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Node = ast.Node; +const Parser = @import("parser.zig").Parser; +const sema = @import("sema.zig"); +const types = @import("types.zig"); +const Type = types.Type; + +// issue 0089 — the backtick raw escape must hold in BOTH classifiers. A raw +// reserved-name type reference (`` `s2 ``) resolves to the user-declared type, +// while a BARE `s2` stays the builtin int. Before the fix sema's +// `resolveTypeNode` ran `Type.fromName` first and ignored `is_raw`, so the +// editor index would show the builtin for backtick code (the issue-0083 +// two-resolver divergence applied to raw types). +test "sema: backtick raw type reference resolves to the user type; bare stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`s2 :: struct { x: s64; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + + var analyzer = sema.Analyzer.init(alloc); + _ = try analyzer.analyze(root); + + // The reserved-spelled user type registered under its plain name. + try std.testing.expect(analyzer.struct_types.contains("s2")); + + // RAW reference (`` `s2 ``) → the user struct, NOT the 2-bit signed int. + var raw_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = true } } }; + const raw_ty = analyzer.resolveTypeNode(&raw_node); + try std.testing.expect(raw_ty == .struct_type); + try std.testing.expectEqualStrings("s2", raw_ty.struct_type); + + // BARE `s2` → the builtin 2-bit signed int. + var bare_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = false } } }; + const bare_ty = analyzer.resolveTypeNode(&bare_node); + try std.testing.expect(bare_ty == .signed); + try std.testing.expectEqual(@as(u8, 2), bare_ty.signed); +} + +// The same divergence guard for the string-keyed entry (`resolveTypeNameStr`, +// reached via `fieldType` when registering struct field types): a raw field +// annotation (`` `u8 ``) resolves to the user struct, a bare one (`u8`) to the +// builtin. Driven through the real analyze pipeline (no private access). +test "sema: a raw struct-field annotation resolves to the user type; bare stays builtin" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const src = + \\`u8 :: struct { y: s64; } + \\Holder :: struct { a: `u8; b: u8; } + \\ + ; + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + + var analyzer = sema.Analyzer.init(alloc); + _ = try analyzer.analyze(root); + + const holder = analyzer.struct_types.get("Holder").?; + var a_ty: ?Type = null; + var b_ty: ?Type = null; + for (holder.field_names, holder.field_types) |fname, fty| { + if (std.mem.eql(u8, fname, "a")) a_ty = fty; + if (std.mem.eql(u8, fname, "b")) b_ty = fty; + } + + // field `a : `u8` → the user struct named "u8". + try std.testing.expect(a_ty.? == .struct_type); + try std.testing.expectEqualStrings("u8", a_ty.?.struct_type); + + // field `b : u8` → the builtin unsigned 8-bit int. + try std.testing.expect(b_ty.? == .unsigned); + try std.testing.expectEqual(@as(u8, 8), b_ty.?.unsigned); +} diff --git a/src/sema.zig b/src/sema.zig index ef24a7a..ffa8964 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -411,10 +411,15 @@ pub const Analyzer = struct { if (tn.data == .parameterized_type_expr) { return .void_type; } - // type_expr or identifier — check aliases, enums, structs + // type_expr or identifier — check aliases, enums, structs. A raw + // reference (`` `s2 ``) skips the builtin classifier and resolves + // through user-defined types only (issue 0089). if (tn.data == .type_expr or tn.data == .identifier) { const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name; - if (Type.fromName(name)) |t| return t; + const is_raw = if (tn.data == .type_expr) tn.data.type_expr.is_raw else tn.data.identifier.is_raw; + if (!is_raw) { + if (Type.fromName(name)) |t| return t; + } if (self.type_aliases.get(name)) |target| { if (Type.fromName(target)) |t| return t; if (self.struct_types.contains(target)) return .{ .struct_type = target }; @@ -430,9 +435,16 @@ pub const Analyzer = struct { /// Resolve a bare type-name string against the registry (aliases, enums, /// structs), falling back to primitive spellings. Unlike `Type.fromName`, /// this knows user-defined types; returns `unresolved` when it can't place - /// the name. - fn resolveTypeNameStr(self: *Analyzer, name: []const u8) Type { - if (Type.fromName(name)) |t| return t; + /// the name. `skip_builtin` is the backtick raw escape (issue 0089) — a raw + /// reference (`` `s2 ``) bypasses the builtin/reserved classifier and + /// resolves only through user-defined types, mirroring the codegen-side + /// `TypeResolver.resolveNamed`. Inner names of compound shapes + /// (pointer/slice element/pointee) are always bare, so their callers pass + /// `false`. + fn resolveTypeNameStr(self: *Analyzer, name: []const u8, skip_builtin: bool) Type { + if (!skip_builtin) { + if (Type.fromName(name)) |t| return t; + } if (self.type_aliases.get(name)) |target| { if (Type.fromName(target)) |t| return t; if (self.struct_types.contains(target)) return .{ .struct_type = target }; @@ -460,8 +472,8 @@ pub const Analyzer = struct { /// registry; the element name is resolved lazily at index/field time. fn fieldType(self: *Analyzer, node: *Node) Type { return switch (node.data) { - .type_expr => |te| self.resolveTypeNameStr(te.name), - .identifier => |id| self.resolveTypeNameStr(id.name), + .type_expr => |te| self.resolveTypeNameStr(te.name, te.is_raw), + .identifier => |id| self.resolveTypeNameStr(id.name, id.is_raw), .many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type) } }, .pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type) } }, .slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type) } }, @@ -476,15 +488,15 @@ pub const Analyzer = struct { /// pointee first (so `*List(Move)` still iterates `Move`). fn elementTypeOf(self: *Analyzer, ty: Type) ?Type { return switch (ty) { - .array_type => |i| self.resolveTypeNameStr(i.element_name), - .slice_type => |i| self.resolveTypeNameStr(i.element_name), - .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name), - .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name)), + .array_type => |i| self.resolveTypeNameStr(i.element_name, false), + .slice_type => |i| self.resolveTypeNameStr(i.element_name, false), + .many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, false), + .pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, false)), .struct_type => |name| blk: { const info = self.struct_types.get(name) orelse break :blk null; for (info.field_names, info.field_types) |fname, fty| { if (std.mem.eql(u8, fname, "items") and fty == .many_pointer_type) { - break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name); + break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, false); } } break :blk null; @@ -642,7 +654,7 @@ pub const Analyzer = struct { var obj_ty = self.inferExprType(fa.object); // `p.field` where `p` is `*T` resolves on the pointee `T`. if (obj_ty.isPointer()) { - obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name); + obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, false); } // `.len` / `.ptr` on the built-in containers (string, slice, array). if (std.mem.eql(u8, fa.field, "len")) { @@ -670,9 +682,9 @@ pub const Analyzer = struct { .index_expr => |ie| { const obj_ty = self.inferExprType(ie.object); if (obj_ty == .string_type) return Type.u(8); - if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name); - if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name); - if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name); + if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, false); + if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, false); + if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, false); return Type.unresolved; }, .slice_expr => |se| { @@ -1054,7 +1066,7 @@ pub const Analyzer = struct { .field_access => |fa| { try self.analyzeNode(fa.object); var owner_ty = self.inferExprType(fa.object); - if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name); + if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, false); self.recordMemberRef(fa.field, owner_ty.toName() orelse "", false); }, .enum_literal => |el| { @@ -1083,7 +1095,7 @@ pub const Analyzer = struct { .match_expr => |me| { try self.analyzeNode(me.subject); var subj_ty = self.inferExprType(me.subject); - if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name); + if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, false); const subj_owner = subj_ty.toName() orelse ""; for (me.arms) |arm| { if (arm.pattern) |pat| { diff --git a/src/types.zig b/src/types.zig index 3c2d1f7..168760a 100644 --- a/src/types.zig +++ b/src/types.zig @@ -198,6 +198,12 @@ pub const Type = union(enum) { pub fn fromTypeExpr(node: *Node) ?Type { if (node.data != .type_expr) return null; + // A backtick raw type reference (`` `s2 ``) is the LITERAL name used as + // a type — it must skip this builtin/reserved classifier and resolve + // through user-defined types only (issue 0089), mirroring the codegen- + // side `resolveNamed`'s `skip_builtin`. Returning null lets the sema + // callers fall through to their struct/enum/alias registry lookup. + if (node.data.type_expr.is_raw) return null; return fromName(node.data.type_expr.name); }