Files
sx/src/ir/semantic_diagnostics.zig
agra 8ff24472c9 refactor(ir): extract unknown-type diagnostic pass into semantic_diagnostics (A2.4)
Moves the issue-0064 unknown-type pass (checkUnknownTypeNames + 11 helpers:
collectDeclaredTypeNames, harvestScopeDecls, checkStructFieldTypes,
checkFnSignatureTypes, checkScope, walkBodyTypes, checkCastTarget,
checkTypeNodeForUnknown, reportIfUnknownType, isBuiltinTypeName, isIdentLike)
out of Lowering into a new src/ir/semantic_diagnostics.zig (UnknownTypeChecker).

The checker holds borrowed references (alloc, *DiagnosticList, *TypeTable,
*ProgramIndex, main_file) — not *Lowering — and queries the canonical facts:
declared top-level names from ProgramIndex, primitives from
TypeResolver.resolvePrimitive, registered concrete types from the TypeTable.
The AST decl/scope walk stays (it collects LOCAL type decls, which ProgramIndex
doesn't track — a per-pass scope need, not a parallel authoritative list).

Lowering.lowerRoot builds the checker only when diagnostics are active and runs
it; the 12 functions are deleted from lower.zig. Barrel-wired in ir.zig.
Example snapshots (issue-0064 regressions 1111-1115) are the guard, matching the
checkErrorFlow precedent (no .test.zig).

Phase A2 complete. Gate: zig build, zig build test, run_examples 351/0.
2026-06-02 16:12:28 +03:00

439 lines
23 KiB
Zig

const std = @import("std");
const ast = @import("../ast.zig");
const errors = @import("../errors.zig");
const types = @import("types.zig");
const program_index_mod = @import("program_index.zig");
const type_resolver = @import("type_resolver.zig");
const Node = ast.Node;
const TypeTable = types.TypeTable;
const ProgramIndex = program_index_mod.ProgramIndex;
const TypeResolver = type_resolver.TypeResolver;
/// Unknown-type diagnostic pass (issue 0064), extracted from `Lowering`
/// (architecture phase A2.4). Rejects an identifier used in a type position
/// that names no declared type, primitive, or in-scope generic type parameter.
/// Without it, `TypeResolver.resolveNamed`'s empty-struct-stub fallback silently
/// fabricates a 0-field struct named after the unknown identifier — so a value
/// param mistakenly used as a type (`(T: Type, …) -> T`, missing the `$`) or a
/// typo'd type name compiles and runs, rendering as `T{}`. Main-file decls only;
/// imported / library modules are trusted, matching `checkErrorFlow`.
///
/// Queries the canonical facts rather than maintaining a parallel authoritative
/// list: declared top-level names come from `ProgramIndex` (foreign classes,
/// generic templates, protocols, aliases) plus the AST decl/scope walk (for
/// LOCAL type decls, which `ProgramIndex` doesn't track); primitives come from
/// `TypeResolver.resolvePrimitive`; registered concrete types from the
/// `TypeTable`. Constructed by value with borrowed references; built only when
/// diagnostics are active.
pub const UnknownTypeChecker = struct {
alloc: std.mem.Allocator,
diagnostics: *errors.DiagnosticList,
types: *TypeTable,
index: *ProgramIndex,
main_file: ?[]const u8,
pub fn run(self: UnknownTypeChecker, decls: []const *const Node) void {
var declared = std.StringHashMap(void).init(self.alloc);
defer declared.deinit();
self.collectDeclaredTypeNames(decls, &declared);
for (decls) |decl| {
if (self.main_file) |mf| {
if (decl.source_file) |sf| {
if (!std.mem.eql(u8, sf, mf)) continue;
}
}
switch (decl.data) {
.fn_decl => self.checkFnSignatureTypes(&decl.data.fn_decl, &declared),
.struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared),
.const_decl => |cd| switch (cd.value.data) {
.fn_decl => self.checkFnSignatureTypes(&cd.value.data.fn_decl, &declared),
.struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared),
else => {},
},
else => {},
}
}
}
/// Collect every top-level name that can legitimately appear in a type
/// position: const-decl names (covers `T :: struct/enum/union/error/alias`
/// and value consts), plus the scan-populated foreign-class / generic-
/// template / protocol / alias maps from `ProgramIndex`. Built across ALL
/// files so a main-file reference to an imported type isn't flagged.
fn collectDeclaredTypeNames(self: UnknownTypeChecker, decls: []const *const Node, out: *std.StringHashMap(void)) void {
for (decls) |decl| {
switch (decl.data) {
.const_decl => |cd| {
out.put(cd.name, {}) catch {};
if (cd.value.data == .fn_decl) self.harvestScopeDecls(cd.value.data.fn_decl.body, out);
},
.struct_decl => |sd| out.put(sd.name, {}) catch {},
.fn_decl => |fd| self.harvestScopeDecls(fd.body, out),
else => {},
}
}
var it_fc = self.index.foreign_class_map.keyIterator();
while (it_fc.next()) |k| out.put(k.*, {}) catch {};
var it_tmpl = self.index.struct_template_map.keyIterator();
while (it_tmpl.next()) |k| out.put(k.*, {}) catch {};
var it_pd = self.index.protocol_decl_map.keyIterator();
while (it_pd.next()) |k| out.put(k.*, {}) catch {};
var it_pa = self.index.protocol_ast_map.keyIterator();
while (it_pa.next()) |k| out.put(k.*, {}) catch {};
var it_al = self.index.type_alias_map.keyIterator();
while (it_al.next()) |k| out.put(k.*, {}) catch {};
}
/// Harvest every type-declaration name (local `T :: struct/enum/union` and
/// named consts) anywhere in a function body — including inside nested
/// closure / function bodies — into the global declared set, so a type
/// annotation in any scope that references one isn't flagged. Over-collection
/// is safe: it only ever relaxes the unknown-type check, never tightens it.
fn harvestScopeDecls(self: UnknownTypeChecker, node: *const Node, out: *std.StringHashMap(void)) void {
switch (node.data) {
.block => |b| for (b.stmts) |s| self.harvestScopeDecls(s, out),
.if_expr => |ie| {
self.harvestScopeDecls(ie.condition, out);
self.harvestScopeDecls(ie.then_branch, out);
if (ie.else_branch) |e| self.harvestScopeDecls(e, out);
},
.while_expr => |we| {
self.harvestScopeDecls(we.condition, out);
self.harvestScopeDecls(we.body, out);
},
.for_expr => |fe| {
self.harvestScopeDecls(fe.iterable, out);
if (fe.range_end) |re| self.harvestScopeDecls(re, out);
self.harvestScopeDecls(fe.body, out);
},
.match_expr => |me| {
self.harvestScopeDecls(me.subject, out);
for (me.arms) |arm| self.harvestScopeDecls(arm.body, out);
},
.push_stmt => |ps| {
self.harvestScopeDecls(ps.context_expr, out);
self.harvestScopeDecls(ps.body, out);
},
.defer_stmt => |ds| self.harvestScopeDecls(ds.expr, out),
.onfail_stmt => |os| self.harvestScopeDecls(os.body, out),
.return_stmt => |r| if (r.value) |v| self.harvestScopeDecls(v, out),
.raise_stmt => |rs| self.harvestScopeDecls(rs.tag, out),
.assignment => |a| {
self.harvestScopeDecls(a.value, out);
self.harvestScopeDecls(a.target, out);
},
.multi_assign => |ma| for (ma.values) |v| self.harvestScopeDecls(v, out),
.destructure_decl => |dd| self.harvestScopeDecls(dd.value, out),
.var_decl => |vd| if (vd.value) |v| self.harvestScopeDecls(v, out),
.const_decl => |cd| {
out.put(cd.name, {}) catch {};
self.harvestScopeDecls(cd.value, out);
},
.struct_decl => |sd| out.put(sd.name, {}) catch {},
.enum_decl => |ed| out.put(ed.name, {}) catch {},
.union_decl => |ud| out.put(ud.name, {}) catch {},
.call => |c| {
self.harvestScopeDecls(c.callee, out);
for (c.args) |a| self.harvestScopeDecls(a, out);
},
.binary_op => |b| {
self.harvestScopeDecls(b.lhs, out);
self.harvestScopeDecls(b.rhs, out);
},
.unary_op => |u| self.harvestScopeDecls(u.operand, out),
.field_access => |fa| self.harvestScopeDecls(fa.object, out),
.index_expr => |ix| {
self.harvestScopeDecls(ix.object, out);
self.harvestScopeDecls(ix.index, out);
},
.struct_literal => |sl| {
for (sl.field_inits) |fi| self.harvestScopeDecls(fi.value, out);
if (sl.init_block) |ib| self.harvestScopeDecls(ib, out);
},
.array_literal => |al| for (al.elements) |e| self.harvestScopeDecls(e, out),
.force_unwrap => |fu| self.harvestScopeDecls(fu.operand, out),
.null_coalesce => |nc| {
self.harvestScopeDecls(nc.lhs, out);
self.harvestScopeDecls(nc.rhs, out);
},
.deref_expr => |de| self.harvestScopeDecls(de.operand, out),
.try_expr => |te| self.harvestScopeDecls(te.operand, out),
.catch_expr => |ce| {
self.harvestScopeDecls(ce.operand, out);
self.harvestScopeDecls(ce.body, out);
},
.comptime_expr => |ce| self.harvestScopeDecls(ce.expr, out),
.spread_expr => |se| self.harvestScopeDecls(se.operand, out),
.lambda => |lm| self.harvestScopeDecls(lm.body, out),
.fn_decl => |fd| self.harvestScopeDecls(fd.body, out),
else => {},
}
}
fn checkStructFieldTypes(self: UnknownTypeChecker, sd: *const ast.StructDecl, declared: *std.StringHashMap(void)) void {
// Generic struct fields reference the struct's own type params ($T) —
// resolved at instantiation, not here.
if (sd.type_params.len != 0) return;
for (sd.field_types) |ft| self.checkTypeNodeForUnknown(ft, declared, &.{}, &.{});
}
fn checkFnSignatureTypes(self: UnknownTypeChecker, fd: *const ast.FnDecl, declared: *std.StringHashMap(void)) void {
var in_scope = std.ArrayList(ast.StructTypeParam).empty;
defer in_scope.deinit(self.alloc);
var type_vals = std.ArrayList([]const u8).empty;
defer type_vals.deinit(self.alloc);
self.checkScope(fd.type_params, fd.params, fd.return_type, fd.body, declared, &in_scope, &type_vals);
}
/// Check one function/closure scope: its generic params (`$T`) and value-
/// `Type` params become in-scope (accumulated onto the parent's, so a nested
/// closure still sees the outer function's `$T`), its param/return
/// annotations are checked, then its body is walked. The scope additions are
/// popped on return.
fn checkScope(
self: UnknownTypeChecker,
type_params: []const ast.StructTypeParam,
params: []const ast.Param,
return_type: ?*Node,
body: *const Node,
declared: *std.StringHashMap(void),
in_scope: *std.ArrayList(ast.StructTypeParam),
type_vals: *std.ArrayList([]const u8),
) void {
const save_s = in_scope.items.len;
const save_v = type_vals.items.len;
defer in_scope.shrinkRetainingCapacity(save_s);
defer type_vals.shrinkRetainingCapacity(save_v);
for (type_params) |tp| in_scope.append(self.alloc, tp) catch {};
// Value params declared `: Type` (no `$`) — using one in a type position
// is the issue-0064 misuse; track them for the tailored hint.
for (params) |p| {
if (p.type_expr.data == .type_expr) {
const cn = p.type_expr.data.type_expr.name;
if (std.mem.eql(u8, cn, "Type") or std.mem.eql(u8, cn, "type")) {
type_vals.append(self.alloc, p.name) catch {};
}
}
}
for (params) |p| self.checkTypeNodeForUnknown(p.type_expr, declared, in_scope.items, type_vals.items);
if (return_type) |rt| self.checkTypeNodeForUnknown(rt, declared, in_scope.items, type_vals.items);
self.walkBodyTypes(body, declared, in_scope, type_vals);
}
/// Walk a scope body checking type annotations on local var / const
/// declarations (and body-local struct fields), descending control flow and
/// expressions. Nested closure / function literals re-enter via `checkScope`
/// with their own params added to `in_scope`.
fn walkBodyTypes(
self: UnknownTypeChecker,
node: *const Node,
declared: *std.StringHashMap(void),
in_scope: *std.ArrayList(ast.StructTypeParam),
type_vals: *std.ArrayList([]const u8),
) void {
switch (node.data) {
.block => |b| for (b.stmts) |s| self.walkBodyTypes(s, declared, in_scope, type_vals),
.if_expr => |ie| {
self.walkBodyTypes(ie.condition, declared, in_scope, type_vals);
self.walkBodyTypes(ie.then_branch, declared, in_scope, type_vals);
if (ie.else_branch) |e| self.walkBodyTypes(e, declared, in_scope, type_vals);
},
.while_expr => |we| {
self.walkBodyTypes(we.condition, declared, in_scope, type_vals);
self.walkBodyTypes(we.body, declared, in_scope, type_vals);
},
.for_expr => |fe| {
self.walkBodyTypes(fe.iterable, declared, in_scope, type_vals);
if (fe.range_end) |re| self.walkBodyTypes(re, declared, in_scope, type_vals);
self.walkBodyTypes(fe.body, declared, in_scope, type_vals);
},
.match_expr => |me| {
self.walkBodyTypes(me.subject, declared, in_scope, type_vals);
for (me.arms) |arm| self.walkBodyTypes(arm.body, declared, in_scope, type_vals);
},
.push_stmt => |ps| {
self.walkBodyTypes(ps.context_expr, declared, in_scope, type_vals);
self.walkBodyTypes(ps.body, declared, in_scope, type_vals);
},
.defer_stmt => |ds| self.walkBodyTypes(ds.expr, declared, in_scope, type_vals),
.onfail_stmt => |os| self.walkBodyTypes(os.body, declared, in_scope, type_vals),
.return_stmt => |r| if (r.value) |v| self.walkBodyTypes(v, declared, in_scope, type_vals),
.raise_stmt => |rs| self.walkBodyTypes(rs.tag, declared, in_scope, type_vals),
.assignment => |a| {
self.walkBodyTypes(a.value, declared, in_scope, type_vals);
self.walkBodyTypes(a.target, declared, in_scope, type_vals);
},
.multi_assign => |ma| for (ma.values) |v| self.walkBodyTypes(v, declared, in_scope, type_vals),
.destructure_decl => |dd| self.walkBodyTypes(dd.value, declared, in_scope, type_vals),
.var_decl => |vd| {
if (vd.type_annotation) |ta| self.checkTypeNodeForUnknown(ta, declared, in_scope.items, type_vals.items);
if (vd.value) |v| self.walkBodyTypes(v, declared, in_scope, type_vals);
},
.const_decl => |cd| {
if (cd.type_annotation) |ta| self.checkTypeNodeForUnknown(ta, declared, in_scope.items, type_vals.items);
self.walkBodyTypes(cd.value, declared, in_scope, type_vals);
},
.struct_decl => |sd| if (sd.type_params.len == 0) {
for (sd.field_types) |ft| self.checkTypeNodeForUnknown(ft, declared, in_scope.items, type_vals.items);
},
.call => |c| {
// `cast(T) x` parses to a `cast` call whose first arg is the
// target type spelled as an expression. An unknown *literal*
// target already errors via value resolution; only flag the
// otherwise-silent value-`Type`-param case here.
if (c.callee.data == .identifier and std.mem.eql(u8, c.callee.data.identifier.name, "cast") and c.args.len == 2) {
self.checkCastTarget(c.args[0], in_scope.items, type_vals.items);
}
self.walkBodyTypes(c.callee, declared, in_scope, type_vals);
for (c.args) |a| self.walkBodyTypes(a, declared, in_scope, type_vals);
},
.binary_op => |b| {
self.walkBodyTypes(b.lhs, declared, in_scope, type_vals);
self.walkBodyTypes(b.rhs, declared, in_scope, type_vals);
},
.unary_op => |u| self.walkBodyTypes(u.operand, declared, in_scope, type_vals),
.field_access => |fa| self.walkBodyTypes(fa.object, declared, in_scope, type_vals),
.index_expr => |ix| {
self.walkBodyTypes(ix.object, declared, in_scope, type_vals);
self.walkBodyTypes(ix.index, declared, in_scope, type_vals);
},
.struct_literal => |sl| {
for (sl.field_inits) |fi| self.walkBodyTypes(fi.value, declared, in_scope, type_vals);
if (sl.init_block) |ib| self.walkBodyTypes(ib, declared, in_scope, type_vals);
},
.array_literal => |al| for (al.elements) |e| self.walkBodyTypes(e, declared, in_scope, type_vals),
.force_unwrap => |fu| self.walkBodyTypes(fu.operand, declared, in_scope, type_vals),
.null_coalesce => |nc| {
self.walkBodyTypes(nc.lhs, declared, in_scope, type_vals);
self.walkBodyTypes(nc.rhs, declared, in_scope, type_vals);
},
.deref_expr => |de| self.walkBodyTypes(de.operand, declared, in_scope, type_vals),
.try_expr => |te| self.walkBodyTypes(te.operand, declared, in_scope, type_vals),
.catch_expr => |ce| {
self.walkBodyTypes(ce.operand, declared, in_scope, type_vals);
self.walkBodyTypes(ce.body, declared, in_scope, type_vals);
},
.comptime_expr => |ce| self.walkBodyTypes(ce.expr, declared, in_scope, type_vals),
.spread_expr => |se| self.walkBodyTypes(se.operand, declared, in_scope, type_vals),
.lambda => |lm| self.checkScope(lm.type_params, lm.params, lm.return_type, lm.body, declared, in_scope, type_vals),
.fn_decl => |fd| self.checkScope(fd.type_params, fd.params, fd.return_type, fd.body, declared, in_scope, type_vals),
else => {},
}
}
/// A `cast(T)` target naming a value-`Type` parameter (the otherwise-silent
/// issue-0064 case in cast position) gets the tailored `$T` hint.
fn checkCastTarget(self: UnknownTypeChecker, arg: *const Node, in_scope: []const ast.StructTypeParam, type_vals: []const []const u8) void {
const name = switch (arg.data) {
.identifier => |id| id.name,
.type_expr => |te| te.name,
else => return,
};
for (in_scope) |tp| if (std.mem.eql(u8, tp.name, name)) return;
for (type_vals) |tv| {
if (std.mem.eql(u8, tv, name)) {
self.diagnostics.addFmt(.err, arg.span, "'{s}' is a value parameter, not a type; introduce a generic type parameter with `${s}: Type`", .{ name, name });
return;
}
}
}
/// Recurse a type-annotation node to its leaf names, reporting any unknown.
fn checkTypeNodeForUnknown(
self: UnknownTypeChecker,
node: *const Node,
declared: *std.StringHashMap(void),
in_scope: []const ast.StructTypeParam,
type_vals: []const []const u8,
) void {
switch (node.data) {
// A `$`-prefixed name (`-> $R`) introduces/references a generic type
// param inline — always valid in a type position.
.type_expr => |te| if (!te.is_generic) self.reportIfUnknownType(te.name, node.span, declared, in_scope, type_vals),
.identifier => |id| self.reportIfUnknownType(id.name, node.span, declared, in_scope, type_vals),
.pointer_type_expr => |pt| self.checkTypeNodeForUnknown(pt.pointee_type, declared, in_scope, type_vals),
.many_pointer_type_expr => |mp| self.checkTypeNodeForUnknown(mp.element_type, declared, in_scope, type_vals),
.slice_type_expr => |st| self.checkTypeNodeForUnknown(st.element_type, declared, in_scope, type_vals),
.optional_type_expr => |ot| self.checkTypeNodeForUnknown(ot.inner_type, declared, in_scope, type_vals),
.array_type_expr => |at| self.checkTypeNodeForUnknown(at.element_type, declared, in_scope, type_vals),
.tuple_type_expr => |tt| for (tt.field_types) |ft| self.checkTypeNodeForUnknown(ft, declared, in_scope, type_vals),
.function_type_expr => |ft| {
for (ft.param_types) |pt| self.checkTypeNodeForUnknown(pt, declared, in_scope, type_vals);
if (ft.return_type) |rt| self.checkTypeNodeForUnknown(rt, declared, in_scope, type_vals);
},
.closure_type_expr => |ct| {
// Variadic type-pack closures (`Closure(..$args) -> R`) resolve
// their projections specially — don't walk them here.
if (ct.pack_name != null) return;
for (ct.param_types) |pt| self.checkTypeNodeForUnknown(pt, declared, in_scope, type_vals);
if (ct.return_type) |rt| self.checkTypeNodeForUnknown(rt, declared, in_scope, type_vals);
},
// Builtin constructors (Vector) and generic templates resolve the
// base name specially; just check the type args.
.parameterized_type_expr => |pt| for (pt.args) |a| self.checkTypeNodeForUnknown(a, declared, in_scope, type_vals),
else => {},
}
}
fn reportIfUnknownType(
self: UnknownTypeChecker,
name: []const u8,
span: ?ast.Span,
declared: *std.StringHashMap(void),
in_scope: []const ast.StructTypeParam,
type_vals: []const []const u8,
) void {
// Only bare identifiers are validated. Inline-spelled compound types
// (`[:0]u8`, `mod.Type`, …) carry non-identifier characters — trust them.
if (!isIdentLike(name)) return;
if (isBuiltinTypeName(name)) return;
for (in_scope) |tp| if (std.mem.eql(u8, tp.name, name)) return;
if (declared.contains(name)) return;
// Registered as a real (non-stub) type — covers imported concrete
// structs / enums / unions absent from the main-file decl list. A
// fabricated empty-struct stub (the very thing we're catching) is the
// sole 0-field-struct case, so it doesn't suppress the diagnostic.
const sid = self.types.internString(name);
if (self.types.findByName(sid)) |tid| {
const info = self.types.get(tid);
const empty_struct_stub = info == .@"struct" and info.@"struct".fields.len == 0;
if (!empty_struct_stub) return;
}
for (type_vals) |tv| {
if (std.mem.eql(u8, tv, name)) {
self.diagnostics.addFmt(.err, span, "'{s}' is a value parameter, not a type; introduce a generic type parameter with `${s}: Type`", .{ name, name });
return;
}
}
self.diagnostics.addFmt(.err, span, "unknown type '{s}'", .{name});
}
};
fn isBuiltinTypeName(name: []const u8) bool {
if (TypeResolver.resolvePrimitive(name) != null) return true;
// Arbitrary-width integers / floats: u1, s7, u128, f16, f80, …
if (name.len >= 2 and (name[0] == 'u' or name[0] == 's' or name[0] == 'f')) {
var all_digits = true;
for (name[1..]) |c| {
if (!std.ascii.isDigit(c)) {
all_digits = false;
break;
}
}
if (all_digits) return true;
}
const extra = [_][]const u8{ "Type", "type", "int", "float", "Self", "self", "any", "noreturn", "usize", "isize", "comptime_int", "comptime_float" };
for (extra) |e| if (std.mem.eql(u8, name, e)) return true;
return false;
}
fn isIdentLike(name: []const u8) bool {
if (name.len == 0) return false;
if (!(std.ascii.isAlphabetic(name[0]) or name[0] == '_')) return false;
for (name) |c| {
if (!(std.ascii.isAlphanumeric(c) or c == '_')) return false;
}
return true;
}