feat(lang): universal raw identifier — parser exhaustiveness + raw type continuations + sema/LSP [F0.6]
Closes the remaining three F0.6 findings so the universal backtick raw identifier holds in BOTH classifiers and at EVERY parser construction site. 1. Struct-body constants thread is_raw + name_span. The struct-body const forms (untyped `` `s2 :: 5 `` and typed `` `s2 : T : v ``) built the const_decl node without name_span/is_raw, so a backtick const was falsely rejected and a bare reserved-name const caretted at 1:1. They now capture both. Structural cure: `ast.ConstDecl`'s name_span + is_raw carry NO default, so the compiler rejects any construction site that omits them (mirrors checkBindingName's required `is_raw` arg). FnDecl keeps its defaults — every parser fn_decl routes through parseFnDecl whose `name_is_raw` is a required parameter (equivalent guarantee). 2. Raw identifier in TYPE position flows through the normal continuations. parseTypeExpr no longer returns a terminal type_expr for a raw atom; the raw flag rides the atom through the qualified-path / Closure / parameterized continuations, so `` `s2(s64) ``, `` *`s2 ``, `` ?`s2 `` all parse. ParameterizedTypeExpr carries is_raw; resolveParameterizedWithBindings skips the `Vector` intrinsic when raw. 3. sema/LSP (the second classifier) honors is_raw. Type.fromTypeExpr returns null for a raw type_expr; resolveTypeNode skips the builtin classifier when raw; resolveTypeNameStr takes a skip_builtin arg threaded from te/id.is_raw (compound inner names pass false). A backtick reserved-name annotation now resolves to the user type in the editor index, not the builtin. Tests: examples/0156 (struct-body const), 0157 (parameterized raw type + wrappers), 1142 (bare struct-body const errors, caret on name); src/sema.test.zig pins the LSP raw-type resolution (fail-before verified). Gate: 365 unit tests, 429 examples, 0 failed.
This commit is contained in:
24
src/ast.zig
24
src/ast.zig
@@ -133,11 +133,14 @@ pub const FnDecl = struct {
|
||||
call_conv: CallingConvention = .default,
|
||||
/// Span of the function's name token, for the reserved-type-name decl
|
||||
/// diagnostic (issue 0089). Synthesized decls (e.g. `#import c` foreign
|
||||
/// functions) leave it zero.
|
||||
/// functions, lowering-time objc/protocol method synthesis) leave it zero.
|
||||
name_span: Span = .{ .start = 0, .end = 0 },
|
||||
/// True when the function NAME was written as a backtick raw identifier
|
||||
/// (`` `s2 :: … ``) or synthesized by a `#import c` foreign decl. A raw
|
||||
/// name is exempt from the reserved-type-name binding check (issue 0089).
|
||||
/// Every PARSER fn_decl is built through `parseFnDecl`, whose `name_is_raw`
|
||||
/// is a REQUIRED parameter, so a parser site cannot drop it; the default
|
||||
/// here serves only post-check synthesized decls (which are never raw).
|
||||
is_raw: bool = false,
|
||||
};
|
||||
|
||||
@@ -316,12 +319,15 @@ pub const ConstDecl = struct {
|
||||
type_annotation: ?*Node,
|
||||
value: *Node,
|
||||
/// Span of the constant's name token, for the reserved-type-name decl
|
||||
/// diagnostic (issue 0089).
|
||||
name_span: Span = .{ .start = 0, .end = 0 },
|
||||
/// diagnostic (issue 0089). NO default: every construction site must set
|
||||
/// it explicitly, so a struct-body const can't silently fall back to a
|
||||
/// 1:1 caret (the finding-1 bug).
|
||||
name_span: Span,
|
||||
/// True when the constant NAME was written as a backtick raw identifier
|
||||
/// (`` `s2 :: … ``). A raw name is exempt from the reserved-type-name
|
||||
/// binding check (issue 0089).
|
||||
is_raw: bool = false,
|
||||
/// (`` `s2 :: … ``). NO default: required at every site so the reserved-
|
||||
/// name exemption can't be dropped — mirrors `checkBindingName`'s required
|
||||
/// `is_raw` argument so the parser and the check can't desync (issue 0089).
|
||||
is_raw: bool,
|
||||
};
|
||||
|
||||
pub const VarDecl = struct {
|
||||
@@ -573,6 +579,12 @@ pub const ArrayLiteral = struct {
|
||||
pub const ParameterizedTypeExpr = struct {
|
||||
name: []const u8, // e.g. "Vector", or later generic struct names
|
||||
args: []const *Node, // e.g. [int_literal(3), type_expr("f32")]
|
||||
/// True when the base name was a backtick raw identifier in type position
|
||||
/// (`` `s2(s64) ``). Such a reference is the LITERAL name `s2` used as a
|
||||
/// parameterized type — resolution skips the builtin parameterized
|
||||
/// classifier (e.g. the `Vector` intrinsic) and instantiates a
|
||||
/// `` `s2 ``-declared generic template (issue 0089).
|
||||
is_raw: bool = false,
|
||||
};
|
||||
|
||||
pub const IndexExpr = struct {
|
||||
|
||||
@@ -12057,8 +12057,10 @@ pub const Lowering = struct {
|
||||
const base_name = if (std.mem.lastIndexOfScalar(u8, pt.name, '.')) |dot| pt.name[dot + 1 ..] else pt.name;
|
||||
const table = &self.module.types;
|
||||
|
||||
// Vector(N, T) — built-in parameterized type
|
||||
if (std.mem.eql(u8, base_name, "Vector")) {
|
||||
// Vector(N, T) — built-in parameterized type. A backtick raw base
|
||||
// (`` `Vector(…) ``) is the LITERAL user type named `Vector`, so it
|
||||
// skips this intrinsic and resolves through the template map (0089).
|
||||
if (!pt.is_raw and std.mem.eql(u8, base_name, "Vector")) {
|
||||
if (pt.args.len == 2) {
|
||||
const length = self.resolveVectorLane(pt.args[0]) orelse return .unresolved;
|
||||
const elem = self.resolveTypeWithBindings(pt.args[1]);
|
||||
|
||||
@@ -632,15 +632,13 @@ pub const Parser = struct {
|
||||
if (self.current.tag.isTypeKeyword() or self.isIdentLike()) {
|
||||
// A backtick raw identifier (`` `s2 ``) in type position is the
|
||||
// LITERAL name `s2` used as a type reference — never the builtin /
|
||||
// reserved keyword. It is always a plain named-type reference (no
|
||||
// qualified-path, `Closure`, or parameterized continuation), so emit
|
||||
// a raw `type_expr` and return; resolution skips the builtin
|
||||
// reserved keyword. The raw flag rides the type ATOM through the
|
||||
// SAME qualified-path / `Closure` / parameterized continuations as a
|
||||
// bare name (so `` `s2(s64) ``, `` `s2.Inner ``, `` *`s2 `` all
|
||||
// parse); it is threaded onto the final `type_expr` /
|
||||
// `parameterized_type_expr` so resolution skips the builtin
|
||||
// classifier and looks up a `` `s2 ``-declared type (issue 0089).
|
||||
if (self.current.is_raw) {
|
||||
const raw_name = self.tokenSlice(self.current);
|
||||
self.advance();
|
||||
return try self.createNode(start, .{ .type_expr = .{ .name = raw_name, .is_raw = true } });
|
||||
}
|
||||
const atom_is_raw = self.current.is_raw;
|
||||
var name = self.tokenSlice(self.current);
|
||||
self.advance();
|
||||
|
||||
@@ -781,6 +779,7 @@ pub const Parser = struct {
|
||||
return try self.createNode(start, .{ .parameterized_type_expr = .{
|
||||
.name = name,
|
||||
.args = try args.toOwnedSlice(self.allocator),
|
||||
.is_raw = atom_is_raw,
|
||||
} });
|
||||
}
|
||||
|
||||
@@ -789,7 +788,7 @@ pub const Parser = struct {
|
||||
for (self.struct_type_params) |tp| {
|
||||
if (std.mem.eql(u8, tp, name)) { is_struct_generic = true; break; }
|
||||
}
|
||||
return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic } });
|
||||
return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = is_struct_generic, .is_raw = atom_is_raw } });
|
||||
}
|
||||
// Inline struct type in type position: struct { ... }
|
||||
if (self.current.tag == .kw_struct) {
|
||||
@@ -1067,6 +1066,8 @@ pub const Parser = struct {
|
||||
.name = method_name,
|
||||
.type_annotation = null,
|
||||
.value = value,
|
||||
.name_span = method_name_span,
|
||||
.is_raw = method_is_raw,
|
||||
} }));
|
||||
}
|
||||
continue;
|
||||
@@ -1080,6 +1081,13 @@ pub const Parser = struct {
|
||||
return self.fail("expected field name in struct");
|
||||
}
|
||||
const field_start = self.current.loc.start;
|
||||
// Captured for the single-name typed-const path (`name :Type: value`)
|
||||
// below: a struct-body const binds a name like any other decl, so
|
||||
// its name_span + raw flag must travel to the `const_decl` node
|
||||
// (finding 1 — they were being dropped to a 1:1 caret / false
|
||||
// reserved-name reject).
|
||||
const field_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end };
|
||||
const field_is_raw = self.current.is_raw;
|
||||
try group_names.append(self.allocator, self.tokenSlice(self.current));
|
||||
self.advance();
|
||||
|
||||
@@ -1104,6 +1112,8 @@ pub const Parser = struct {
|
||||
.name = group_names.items[0],
|
||||
.type_annotation = field_type,
|
||||
.value = value,
|
||||
.name_span = field_name_span,
|
||||
.is_raw = field_is_raw,
|
||||
} }));
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ pub const errors = @import("errors.zig");
|
||||
pub const errors_tests = @import("errors.test.zig");
|
||||
pub const trace_runtime_tests = @import("runtime_trace.test.zig");
|
||||
pub const sema = @import("sema.zig");
|
||||
pub const sema_tests = @import("sema.test.zig");
|
||||
pub const imports = @import("imports.zig");
|
||||
pub const core = @import("core.zig");
|
||||
pub const c_import = @import("c_import.zig");
|
||||
|
||||
86
src/sema.test.zig
Normal file
86
src/sema.test.zig
Normal file
@@ -0,0 +1,86 @@
|
||||
// Tests for sema.zig — the editor/LSP type classifier (the SECOND resolver,
|
||||
// distinct from the codegen-side `ir/type_resolver.zig`). These pin behavior
|
||||
// the example suite can't reach: the example runner exercises the codegen
|
||||
// path (`sx run`), never sema's hover/completion/index resolution.
|
||||
|
||||
const std = @import("std");
|
||||
const ast = @import("ast.zig");
|
||||
const Node = ast.Node;
|
||||
const Parser = @import("parser.zig").Parser;
|
||||
const sema = @import("sema.zig");
|
||||
const types = @import("types.zig");
|
||||
const Type = types.Type;
|
||||
|
||||
// issue 0089 — the backtick raw escape must hold in BOTH classifiers. A raw
|
||||
// reserved-name type reference (`` `s2 ``) resolves to the user-declared type,
|
||||
// while a BARE `s2` stays the builtin int. Before the fix sema's
|
||||
// `resolveTypeNode` ran `Type.fromName` first and ignored `is_raw`, so the
|
||||
// editor index would show the builtin for backtick code (the issue-0083
|
||||
// two-resolver divergence applied to raw types).
|
||||
test "sema: backtick raw type reference resolves to the user type; bare stays builtin" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
const src =
|
||||
\\`s2 :: struct { x: s64; }
|
||||
\\
|
||||
;
|
||||
var parser = Parser.init(alloc, src);
|
||||
const root = try parser.parse();
|
||||
|
||||
var analyzer = sema.Analyzer.init(alloc);
|
||||
_ = try analyzer.analyze(root);
|
||||
|
||||
// The reserved-spelled user type registered under its plain name.
|
||||
try std.testing.expect(analyzer.struct_types.contains("s2"));
|
||||
|
||||
// RAW reference (`` `s2 ``) → the user struct, NOT the 2-bit signed int.
|
||||
var raw_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = true } } };
|
||||
const raw_ty = analyzer.resolveTypeNode(&raw_node);
|
||||
try std.testing.expect(raw_ty == .struct_type);
|
||||
try std.testing.expectEqualStrings("s2", raw_ty.struct_type);
|
||||
|
||||
// BARE `s2` → the builtin 2-bit signed int.
|
||||
var bare_node = Node{ .span = .{ .start = 0, .end = 0 }, .data = .{ .type_expr = .{ .name = "s2", .is_raw = false } } };
|
||||
const bare_ty = analyzer.resolveTypeNode(&bare_node);
|
||||
try std.testing.expect(bare_ty == .signed);
|
||||
try std.testing.expectEqual(@as(u8, 2), bare_ty.signed);
|
||||
}
|
||||
|
||||
// The same divergence guard for the string-keyed entry (`resolveTypeNameStr`,
|
||||
// reached via `fieldType` when registering struct field types): a raw field
|
||||
// annotation (`` `u8 ``) resolves to the user struct, a bare one (`u8`) to the
|
||||
// builtin. Driven through the real analyze pipeline (no private access).
|
||||
test "sema: a raw struct-field annotation resolves to the user type; bare stays builtin" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
const src =
|
||||
\\`u8 :: struct { y: s64; }
|
||||
\\Holder :: struct { a: `u8; b: u8; }
|
||||
\\
|
||||
;
|
||||
var parser = Parser.init(alloc, src);
|
||||
const root = try parser.parse();
|
||||
|
||||
var analyzer = sema.Analyzer.init(alloc);
|
||||
_ = try analyzer.analyze(root);
|
||||
|
||||
const holder = analyzer.struct_types.get("Holder").?;
|
||||
var a_ty: ?Type = null;
|
||||
var b_ty: ?Type = null;
|
||||
for (holder.field_names, holder.field_types) |fname, fty| {
|
||||
if (std.mem.eql(u8, fname, "a")) a_ty = fty;
|
||||
if (std.mem.eql(u8, fname, "b")) b_ty = fty;
|
||||
}
|
||||
|
||||
// field `a : `u8` → the user struct named "u8".
|
||||
try std.testing.expect(a_ty.? == .struct_type);
|
||||
try std.testing.expectEqualStrings("u8", a_ty.?.struct_type);
|
||||
|
||||
// field `b : u8` → the builtin unsigned 8-bit int.
|
||||
try std.testing.expect(b_ty.? == .unsigned);
|
||||
try std.testing.expectEqual(@as(u8, 8), b_ty.?.unsigned);
|
||||
}
|
||||
48
src/sema.zig
48
src/sema.zig
@@ -411,10 +411,15 @@ pub const Analyzer = struct {
|
||||
if (tn.data == .parameterized_type_expr) {
|
||||
return .void_type;
|
||||
}
|
||||
// type_expr or identifier — check aliases, enums, structs
|
||||
// type_expr or identifier — check aliases, enums, structs. A raw
|
||||
// reference (`` `s2 ``) skips the builtin classifier and resolves
|
||||
// through user-defined types only (issue 0089).
|
||||
if (tn.data == .type_expr or tn.data == .identifier) {
|
||||
const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name;
|
||||
if (Type.fromName(name)) |t| return t;
|
||||
const is_raw = if (tn.data == .type_expr) tn.data.type_expr.is_raw else tn.data.identifier.is_raw;
|
||||
if (!is_raw) {
|
||||
if (Type.fromName(name)) |t| return t;
|
||||
}
|
||||
if (self.type_aliases.get(name)) |target| {
|
||||
if (Type.fromName(target)) |t| return t;
|
||||
if (self.struct_types.contains(target)) return .{ .struct_type = target };
|
||||
@@ -430,9 +435,16 @@ pub const Analyzer = struct {
|
||||
/// Resolve a bare type-name string against the registry (aliases, enums,
|
||||
/// structs), falling back to primitive spellings. Unlike `Type.fromName`,
|
||||
/// this knows user-defined types; returns `unresolved` when it can't place
|
||||
/// the name.
|
||||
fn resolveTypeNameStr(self: *Analyzer, name: []const u8) Type {
|
||||
if (Type.fromName(name)) |t| return t;
|
||||
/// the name. `skip_builtin` is the backtick raw escape (issue 0089) — a raw
|
||||
/// reference (`` `s2 ``) bypasses the builtin/reserved classifier and
|
||||
/// resolves only through user-defined types, mirroring the codegen-side
|
||||
/// `TypeResolver.resolveNamed`. Inner names of compound shapes
|
||||
/// (pointer/slice element/pointee) are always bare, so their callers pass
|
||||
/// `false`.
|
||||
fn resolveTypeNameStr(self: *Analyzer, name: []const u8, skip_builtin: bool) Type {
|
||||
if (!skip_builtin) {
|
||||
if (Type.fromName(name)) |t| return t;
|
||||
}
|
||||
if (self.type_aliases.get(name)) |target| {
|
||||
if (Type.fromName(target)) |t| return t;
|
||||
if (self.struct_types.contains(target)) return .{ .struct_type = target };
|
||||
@@ -460,8 +472,8 @@ pub const Analyzer = struct {
|
||||
/// registry; the element name is resolved lazily at index/field time.
|
||||
fn fieldType(self: *Analyzer, node: *Node) Type {
|
||||
return switch (node.data) {
|
||||
.type_expr => |te| self.resolveTypeNameStr(te.name),
|
||||
.identifier => |id| self.resolveTypeNameStr(id.name),
|
||||
.type_expr => |te| self.resolveTypeNameStr(te.name, te.is_raw),
|
||||
.identifier => |id| self.resolveTypeNameStr(id.name, id.is_raw),
|
||||
.many_pointer_type_expr => |mp| .{ .many_pointer_type = .{ .element_name = self.typeExprName(mp.element_type) } },
|
||||
.pointer_type_expr => |p| .{ .pointer_type = .{ .pointee_name = self.typeExprName(p.pointee_type) } },
|
||||
.slice_type_expr => |s| .{ .slice_type = .{ .element_name = self.typeExprName(s.element_type) } },
|
||||
@@ -476,15 +488,15 @@ pub const Analyzer = struct {
|
||||
/// pointee first (so `*List(Move)` still iterates `Move`).
|
||||
fn elementTypeOf(self: *Analyzer, ty: Type) ?Type {
|
||||
return switch (ty) {
|
||||
.array_type => |i| self.resolveTypeNameStr(i.element_name),
|
||||
.slice_type => |i| self.resolveTypeNameStr(i.element_name),
|
||||
.many_pointer_type => |i| self.resolveTypeNameStr(i.element_name),
|
||||
.pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name)),
|
||||
.array_type => |i| self.resolveTypeNameStr(i.element_name, false),
|
||||
.slice_type => |i| self.resolveTypeNameStr(i.element_name, false),
|
||||
.many_pointer_type => |i| self.resolveTypeNameStr(i.element_name, false),
|
||||
.pointer_type => |i| self.elementTypeOf(self.resolveTypeNameStr(i.pointee_name, false)),
|
||||
.struct_type => |name| blk: {
|
||||
const info = self.struct_types.get(name) orelse break :blk null;
|
||||
for (info.field_names, info.field_types) |fname, fty| {
|
||||
if (std.mem.eql(u8, fname, "items") and fty == .many_pointer_type) {
|
||||
break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name);
|
||||
break :blk self.resolveTypeNameStr(fty.many_pointer_type.element_name, false);
|
||||
}
|
||||
}
|
||||
break :blk null;
|
||||
@@ -642,7 +654,7 @@ pub const Analyzer = struct {
|
||||
var obj_ty = self.inferExprType(fa.object);
|
||||
// `p.field` where `p` is `*T` resolves on the pointee `T`.
|
||||
if (obj_ty.isPointer()) {
|
||||
obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name);
|
||||
obj_ty = self.resolveTypeNameStr(obj_ty.pointer_type.pointee_name, false);
|
||||
}
|
||||
// `.len` / `.ptr` on the built-in containers (string, slice, array).
|
||||
if (std.mem.eql(u8, fa.field, "len")) {
|
||||
@@ -670,9 +682,9 @@ pub const Analyzer = struct {
|
||||
.index_expr => |ie| {
|
||||
const obj_ty = self.inferExprType(ie.object);
|
||||
if (obj_ty == .string_type) return Type.u(8);
|
||||
if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name);
|
||||
if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name);
|
||||
if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name);
|
||||
if (obj_ty.isArray()) return self.resolveTypeNameStr(obj_ty.array_type.element_name, false);
|
||||
if (obj_ty.isManyPointer()) return self.resolveTypeNameStr(obj_ty.many_pointer_type.element_name, false);
|
||||
if (obj_ty.isSlice()) return self.resolveTypeNameStr(obj_ty.slice_type.element_name, false);
|
||||
return Type.unresolved;
|
||||
},
|
||||
.slice_expr => |se| {
|
||||
@@ -1054,7 +1066,7 @@ pub const Analyzer = struct {
|
||||
.field_access => |fa| {
|
||||
try self.analyzeNode(fa.object);
|
||||
var owner_ty = self.inferExprType(fa.object);
|
||||
if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name);
|
||||
if (owner_ty.isPointer()) owner_ty = self.resolveTypeNameStr(owner_ty.pointer_type.pointee_name, false);
|
||||
self.recordMemberRef(fa.field, owner_ty.toName() orelse "", false);
|
||||
},
|
||||
.enum_literal => |el| {
|
||||
@@ -1083,7 +1095,7 @@ pub const Analyzer = struct {
|
||||
.match_expr => |me| {
|
||||
try self.analyzeNode(me.subject);
|
||||
var subj_ty = self.inferExprType(me.subject);
|
||||
if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name);
|
||||
if (subj_ty.isPointer()) subj_ty = self.resolveTypeNameStr(subj_ty.pointer_type.pointee_name, false);
|
||||
const subj_owner = subj_ty.toName() orelse "";
|
||||
for (me.arms) |arm| {
|
||||
if (arm.pattern) |pat| {
|
||||
|
||||
@@ -198,6 +198,12 @@ pub const Type = union(enum) {
|
||||
|
||||
pub fn fromTypeExpr(node: *Node) ?Type {
|
||||
if (node.data != .type_expr) return null;
|
||||
// A backtick raw type reference (`` `s2 ``) is the LITERAL name used as
|
||||
// a type — it must skip this builtin/reserved classifier and resolve
|
||||
// through user-defined types only (issue 0089), mirroring the codegen-
|
||||
// side `resolveNamed`'s `skip_builtin`. Returning null lets the sema
|
||||
// callers fall through to their struct/enum/alias registry lookup.
|
||||
if (node.data.type_expr.is_raw) return null;
|
||||
return fromName(node.data.type_expr.name);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user