feat(lang): universal raw identifier — parser exhaustiveness + raw type continuations + sema/LSP [F0.6]

Closes the remaining three F0.6 findings so the universal backtick raw
identifier holds in BOTH classifiers and at EVERY parser construction site.

1. Struct-body constants thread is_raw + name_span. The struct-body const
   forms (untyped `` `s2 :: 5 `` and typed `` `s2 : T : v ``) built the
   const_decl node without name_span/is_raw, so a backtick const was falsely
   rejected and a bare reserved-name const caretted at 1:1. They now capture
   both. Structural cure: `ast.ConstDecl`'s name_span + is_raw carry NO
   default, so the compiler rejects any construction site that omits them
   (mirrors checkBindingName's required `is_raw` arg). FnDecl keeps its
   defaults — every parser fn_decl routes through parseFnDecl whose
   `name_is_raw` is a required parameter (equivalent guarantee).

2. Raw identifier in TYPE position flows through the normal continuations.
   parseTypeExpr no longer returns a terminal type_expr for a raw atom; the
   raw flag rides the atom through the qualified-path / Closure / parameterized
   continuations, so `` `s2(s64) ``, `` *`s2 ``, `` ?`s2 `` all parse.
   ParameterizedTypeExpr carries is_raw; resolveParameterizedWithBindings
   skips the `Vector` intrinsic when raw.

3. sema/LSP (the second classifier) honors is_raw. Type.fromTypeExpr returns
   null for a raw type_expr; resolveTypeNode skips the builtin classifier when
   raw; resolveTypeNameStr takes a skip_builtin arg threaded from te/id.is_raw
   (compound inner names pass false). A backtick reserved-name annotation now
   resolves to the user type in the editor index, not the builtin.

Tests: examples/0156 (struct-body const), 0157 (parameterized raw type +
wrappers), 1142 (bare struct-body const errors, caret on name); src/sema.test.zig
pins the LSP raw-type resolution (fail-before verified). Gate: 365 unit tests,
429 examples, 0 failed.
This commit is contained in:
agra
2026-06-04 21:14:35 +03:00
parent 023971cae5
commit ef8f021c01
22 changed files with 300 additions and 53 deletions

View File

@@ -133,11 +133,14 @@ pub const FnDecl = struct {
call_conv: CallingConvention = .default,
/// Span of the function's name token, for the reserved-type-name decl
/// diagnostic (issue 0089). Synthesized decls (e.g. `#import c` foreign
/// functions) leave it zero.
/// functions, lowering-time objc/protocol method synthesis) leave it zero.
name_span: Span = .{ .start = 0, .end = 0 },
/// True when the function NAME was written as a backtick raw identifier
/// (`` `s2 :: … ``) or synthesized by a `#import c` foreign decl. A raw
/// name is exempt from the reserved-type-name binding check (issue 0089).
/// Every PARSER fn_decl is built through `parseFnDecl`, whose `name_is_raw`
/// is a REQUIRED parameter, so a parser site cannot drop it; the default
/// here serves only post-check synthesized decls (which are never raw).
is_raw: bool = false,
};
@@ -316,12 +319,15 @@ pub const ConstDecl = struct {
type_annotation: ?*Node,
value: *Node,
/// Span of the constant's name token, for the reserved-type-name decl
/// diagnostic (issue 0089).
name_span: Span = .{ .start = 0, .end = 0 },
/// diagnostic (issue 0089). NO default: every construction site must set
/// it explicitly, so a struct-body const can't silently fall back to a
/// 1:1 caret (the finding-1 bug).
name_span: Span,
/// True when the constant NAME was written as a backtick raw identifier
/// (`` `s2 :: … ``). A raw name is exempt from the reserved-type-name
/// binding check (issue 0089).
is_raw: bool = false,
/// (`` `s2 :: … ``). NO default: required at every site so the reserved-
/// name exemption can't be dropped — mirrors `checkBindingName`'s required
/// `is_raw` argument so the parser and the check can't desync (issue 0089).
is_raw: bool,
};
pub const VarDecl = struct {
@@ -573,6 +579,12 @@ pub const ArrayLiteral = struct {
pub const ParameterizedTypeExpr = struct {
name: []const u8, // e.g. "Vector", or later generic struct names
args: []const *Node, // e.g. [int_literal(3), type_expr("f32")]
/// True when the base name was a backtick raw identifier in type position
/// (`` `s2(s64) ``). Such a reference is the LITERAL name `s2` used as a
/// parameterized type — resolution skips the builtin parameterized
/// classifier (e.g. the `Vector` intrinsic) and instantiates a
/// `` `s2 ``-declared generic template (issue 0089).
is_raw: bool = false,
};
pub const IndexExpr = struct {

View File

@@ -12057,8 +12057,10 @@ pub const Lowering = struct {
const base_name = if (std.mem.lastIndexOfScalar(u8, pt.name, '.')) |dot| pt.name[dot + 1 ..] else pt.name;
const table = &self.module.types;
// Vector(N, T) — built-in parameterized type
if (std.mem.eql(u8, base_name, "Vector")) {
// Vector(N, T) — built-in parameterized type. A backtick raw base
// (`` `Vector(…) ``) is the LITERAL user type named `Vector`, so it
// skips this intrinsic and resolves through the template map (0089).
if (!pt.is_raw and std.mem.eql(u8, base_name, "Vector")) {
if (pt.args.len == 2) {
const length = self.resolveVectorLane(pt.args[0]) orelse return .unresolved;
const elem = self.resolveTypeWithBindings(pt.args[1]);

View File

@@ -632,15 +632,13 @@ pub const Parser = struct {
if (self.current.tag.isTypeKeyword() or self.isIdentLike()) {
// A backtick raw identifier (`` `s2 ``) in type position is the
// LITERAL name `s2` used as a type reference — never the builtin /
// reserved keyword. It is always a plain named-type reference (no
// qualified-path, `Closure`, or parameterized continuation), so emit
// a raw `type_expr` and return; resolution skips the builtin
// reserved keyword. The raw flag rides the type ATOM through the
// SAME qualified-path / `Closure` / parameterized continuations as a
// bare name (so `` `s2(s64) ``, `` `s2.Inner ``, `` *`s2 `` all
// parse); it is threaded onto the final `type_expr` /
// `parameterized_type_expr` so resolution skips the builtin
// classifier and looks up a `` `s2 ``-declared type (issue 0089).
if (self.current.is_raw) {
const raw_name = self.tokenSlice(self.current);
self.advance();
return try self.createNode(start, .{ .type_expr = .{ .name = raw_name, .is_raw = true } });
}
const atom_is_raw = self.current.is_raw;
var name = self.tokenSlice(self.current);
self.advance();
@@ -781,6 +779,7 @@ pub const Parser = struct {
return try self.createNode(start, .{ .parameterized_type_expr = .{
.name = name,
.args = try args.toOwnedSlice(self.allocator),
.is_raw = atom_is_raw,
} });
}
@@ -789,7 +788,7 @@ pub const Parser = struct {
for (self.struct_type_params) |tp| {
if (std.mem.eql(u8, tp, name)) { is_struct_generic = true; break; }
}
return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic } });
return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic, .is_raw = atom_is_raw } });
}
// Inline struct type in type position: struct { ... }
if (self.current.tag == .kw_struct) {
@@ -1067,6 +1066,8 @@ pub const Parser = struct {
.name = method_name,
.type_annotation = null,
.value = value,
.name_span = method_name_span,
.is_raw = method_is_raw,
} }));
}
continue;
@@ -1080,6 +1081,13 @@ pub const Parser = struct {
return self.fail("expected field name in struct");
}
const field_start = self.current.loc.start;
// Captured for the single-name typed-const path (`name :Type: value`)
// below: a struct-body const binds a name like any other decl, so
// its name_span + raw flag must travel to the `const_decl` node
// (finding 1 — they were being dropped to a 1:1 caret / false
// reserved-name reject).
const field_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end };
const field_is_raw = self.current.is_raw;
try group_names.append(self.allocator, self.tokenSlice(self.current));
self.advance();
@@ -1104,6 +1112,8 @@ pub const Parser = struct {
.name = group_names.items[0],
.type_annotation = field_type,
.value = value,
.name_span = field_name_span,
.is_raw = field_is_raw,
} }));
continue;
}

View File

@@ -11,6 +11,7 @@ pub const errors = @import("errors.zig");
pub const errors_tests = @import("errors.test.zig");
pub const trace_runtime_tests = @import("runtime_trace.test.zig");
pub const sema = @import("sema.zig");
pub const sema_tests = @import("sema.test.zig");
pub const imports = @import("imports.zig");
pub const core = @import("core.zig");
pub const c_import = @import("c_import.zig");

86
src/sema.test.zig Normal file
View File

@@ -0,0 +1,86 @@
// Tests for sema.zig — the editor/LSP type classifier (the SECOND resolver,
// distinct from the codegen-side `ir/type_resolver.zig`). These pin behavior
// the example suite can't reach: the example runner exercises the codegen
// path (`sx run`), never sema's hover/completion/index resolution.
const std = @import("std");
const ast = @import("ast.zig");
const Node = ast.Node;
const Parser = @import("parser.zig").Parser;
const sema = @import("sema.zig");
const types = @import("types.zig");
const Type = types.Type;
// issue 0089 — the backtick raw escape must hold in BOTH classifiers. A raw
// reserved-name type reference (`` `s2 ``) resolves to the user-declared type,
// while a BARE `s2` stays the builtin int. Before the fix sema's
// `resolveTypeNode` ran `Type.fromName` first and ignored `is_raw`, so the
// editor index would show the builtin for backtick code (the issue-0083
// two-resolver divergence applied to raw types).
test "sema: backtick raw type reference resolves to the user type; bare stays builtin" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
const src =
\\`s2 :: struct { x: s64; }
\\
;
var parser = Parser.init(alloc, src);
const root = try parser.parse();
var analyzer = sema.Analyzer.init(alloc);
_ = try analyzer.analyze(root);
// The reserved-spelled user type registered under its plain name.
try std.testing.expect(analyzer.struct_types.contains("s2"));
// RAW reference (`` `s2 ``) → the user struct, NOT the 2-bit signed int.
var raw_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = true } } };
const raw_ty = analyzer.resolveTypeNode(&raw_node);
try std.testing.expect(raw_ty == .struct_type);
try std.testing.expectEqualStrings("s2", raw_ty.struct_type);
// BARE `s2` → the builtin 2-bit signed int.
var bare_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = false } } };
const bare_ty = analyzer.resolveTypeNode(&bare_node);
try std.testing.expect(bare_ty == .signed);
try std.testing.expectEqual(@as(u8, 2), bare_ty.signed);
}
// The same divergence guard for the string-keyed entry (`resolveTypeNameStr`,
// reached via `fieldType` when registering struct field types): a raw field
// annotation (`` `u8 ``) resolves to the user struct, a bare one (`u8`) to the
// builtin. Driven through the real analyze pipeline (no private access).
test "sema: a raw struct-field annotation resolves to the user type; bare stays builtin" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
const src =
\\`u8 :: struct { y: s64; }
\\Holder :: struct { a: `u8; b: u8; }
\\
;
var parser = Parser.init(alloc, src);
const root = try parser.parse();
var analyzer = sema.Analyzer.init(alloc);
_ = try analyzer.analyze(root);
const holder = analyzer.struct_types.get("Holder").?;
var a_ty: ?Type = null;
var b_ty: ?Type = null;
for (holder.field_names, holder.field_types) |fname, fty| {
if (std.mem.eql(u8, fname, "a")) a_ty = fty;
if (std.mem.eql(u8, fname, "b")) b_ty = fty;
}
// field `a : `u8` → the user struct named "u8".
try std.testing.expect(a_ty.? == .struct_type);
try std.testing.expectEqualStrings("u8", a_ty.?.struct_type);
// field `b : u8` → the builtin unsigned 8-bit int.
try std.testing.expect(b_ty.? == .unsigned);
try std.testing.expectEqual(@as(u8, 8), b_ty.?.unsigned);
}

View File

@@ -411,10 +411,15 @@ pub const Analyzer = struct {
if (tn.data == .parameterized_type_expr) {
return .void_type;
}
// type_expr or identifier — check aliases, enums, structs
// type_expr or identifier — check aliases, enums, structs. A raw
// reference (`` `s2 ``) skips the builtin classifier and resolves
// through user-defined types only (issue 0089).
if (tn.data == .type_expr or tn.data == .identifier) {
const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name;
if (Type.fromName(name)) |t| return t;
const is_raw = if (tn.data == .type_expr) tn.data.type_expr.is_raw else tn.data.identifier.is_raw;
if (!is_raw) {
if (Type.fromName(name)) |t| return t;
}
if (self.type_aliases.get(name)) |target| {
if (Type.fromName(target)) |t| return t;
if (self.struct_types.contains(target)) return .{ .struct_type = target };
@@ -430,9 +435,16 @@ pub const Analyzer = struct {
/// Resolve a bare type-name string against the registry (aliases, enums,
/// structs), falling back to primitive spellings. Unlike `Type.fromName`,
/// this knows user-defined types; returns `unresolved` when it can't place
/// the name.
fn resolveTypeNameStr(self: *Analyzer, name: []const u8) Type {
if (Type.fromName(name)) |t| return t;
/// the name. `skip_builtin` is the backtick raw escape (issue 0089) — a raw
/// reference (`` `s2 ``) bypasses the builtin/reserved classifier and
/// resolves only through user-defined types, mirroring the codegen-side
/// `TypeResolver.resolveNamed`. Inner names of compound shapes
/// (pointer/slice element/pointee) are always bare, so their callers pass
/// `false`.
fn resolveTypeNameStr(self: *Analyzer, name: []const u8, skip_builtin: bool) Type {
if (!skip_builtin) {
if (Type.fromName(name)) |t| return t;
}
if (self.type_aliases.get(name)) |target| {
if (Type.fromName(target)) |t| return t;
if (self.struct_types.contains(target)) return .{ .struct_type = target };
@@ -460,8 +472,8 @@ pub const Analyzer = struct {
/// registry; the element name is resolved lazily at index/field time.
fn fieldType(self: *Analyzer, node: *Node) Type {
return switch (node.data) {
.type_expr => |te| self.resolveTypeNameStr(te.name),
.identifier => |id| self.resolveTypeNameStr(id.name),
.type_expr => |te| self.resolveTypeNameStr(te.name, te.is_raw),
.identifier => |id| self.resolveTypeNameStr(id.name, id.is_raw),
.many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type) } },
.pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type) } },
.slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type) } },
@@ -476,15 +488,15 @@ pub const Analyzer = struct {
/// pointee first (so `*List(Move)` still iterates `Move`).
fn elementTypeOf(self: *Analyzer, ty: Type) ?Type {
return switch (ty) {
.array_type => |i| self.resolveTypeNameStr(i.element_name),
.slice_type => |i| self.resolveTypeNameStr(i.element_name),
.many_pointer_type => |i| self.resolveTypeNameStr(i.element_name),
.pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name)),
.array_type => |i| self.resolveTypeNameStr(i.element_name, false),
.slice_type => |i| self.resolveTypeNameStr(i.element_name, false),
.many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, false),
.pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, false)),
.struct_type => |name| blk: {
const info = self.struct_types.get(name) orelse break :blk null;
for (info.field_names, info.field_types) |fname, fty| {
if (std.mem.eql(u8, fname, "items") and fty == .many_pointer_type) {
break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name);
break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, false);
}
}
break :blk null;
@@ -642,7 +654,7 @@ pub const Analyzer = struct {
var obj_ty = self.inferExprType(fa.object);
// `p.field` where `p` is `*T` resolves on the pointee `T`.
if (obj_ty.isPointer()) {
obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name);
obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, false);
}
// `.len` / `.ptr` on the built-in containers (string, slice, array).
if (std.mem.eql(u8, fa.field, "len")) {
@@ -670,9 +682,9 @@ pub const Analyzer = struct {
.index_expr => |ie| {
const obj_ty = self.inferExprType(ie.object);
if (obj_ty == .string_type) return Type.u(8);
if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name);
if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name);
if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name);
if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, false);
if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, false);
if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, false);
return Type.unresolved;
},
.slice_expr => |se| {
@@ -1054,7 +1066,7 @@ pub const Analyzer = struct {
.field_access => |fa| {
try self.analyzeNode(fa.object);
var owner_ty = self.inferExprType(fa.object);
if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name);
if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, false);
self.recordMemberRef(fa.field, owner_ty.toName() orelse "", false);
},
.enum_literal => |el| {
@@ -1083,7 +1095,7 @@ pub const Analyzer = struct {
.match_expr => |me| {
try self.analyzeNode(me.subject);
var subj_ty = self.inferExprType(me.subject);
if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name);
if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, false);
const subj_owner = subj_ty.toName() orelse "";
for (me.arms) |arm| {
if (arm.pattern) |pat| {

View File

@@ -198,6 +198,12 @@ pub const Type = union(enum) {
pub fn fromTypeExpr(node: *Node) ?Type {
if (node.data != .type_expr) return null;
// A backtick raw type reference (`` `s2 ``) is the LITERAL name used as
// a type — it must skip this builtin/reserved classifier and resolve
// through user-defined types only (issue 0089), mirroring the codegen-
// side `resolveNamed`'s `skip_builtin`. Returning null lets the sema
// callers fall through to their struct/enum/alias registry lookup.
if (node.data.type_expr.is_raw) return null;
return fromName(node.data.type_expr.name);
}