refactor(ir): extract GenericResolver (generics.zig) for substitution + mono keys (A4.1 step 2)
Generic substitution and monomorphization-key construction now live in one module, src/ir/generics.zig, behind a *Lowering facade (GenericResolver), mirroring CallResolver / ExprTyper. Moved verbatim: - mangleTypeName + mangleParamList (the mono-key fragment builder), - mangleGenericName (generic mono key), appendComptimeValueMangle (comptime-value fragment), - buildTypeBindings (call-site type-param inference), inferGenericReturnType (generic return resolution). inferGenericReturnType now uses a scoped TypeBindingScope (enter/exit with defer) instead of a manual type_bindings save/restore — the PLAN-ARCH A4.1 "scoped substitution env" shape; a generics.test.zig assertion confirms the prior bindings are restored (the issue-0048/0050 leak class, for this field). Lowering keeps a thin pub mangleTypeName wrapper delegating to genericResolver().mangleTypeName, because ~30 cross-cutting callers (impl-map keys, conversion keys, shape keys) reach it well beyond generics. mangleParamList (sole caller was mangleTypeName) moved fully. The other 4 originals are deleted (no fallback); their 6 call sites now go through self.genericResolver() (calls.zig via self.l.genericResolver()). matchTypeParam / extractTypeParam / isTypeParamDecl widened to pub (the moved substitution logic calls them); genericResolver() accessor added. The 2 mangleTypeName / inferGenericReturnType unit tests moved from lower.test.zig to generics.test.zig (driving GenericResolver directly) and wired into the barrel. monomorphizeFunction / monomorphizePackFn intentionally stay in lower.zig (they save/restore three fields across nested mono and call emission helpers) — a heavier scoped-env adoption deferred to an optional sub-step 3. zig build, zig build test, and tests/run_examples.sh (357/0) all green — no .ir snapshot churn, confirming the move preserved mono-key/substitution output.
This commit is contained in:
331
src/ir/generics.zig
Normal file
331
src/ir/generics.zig
Normal file
@@ -0,0 +1,331 @@
|
||||
const std = @import("std");
|
||||
const ast = @import("../ast.zig");
|
||||
const types = @import("types.zig");
|
||||
const type_bridge = @import("type_bridge.zig");
|
||||
const lower = @import("lower.zig");
|
||||
|
||||
const Node = ast.Node;
|
||||
const TypeId = types.TypeId;
|
||||
const Lowering = lower.Lowering;
|
||||
|
||||
/// Generic substitution + monomorphization-key construction (architecture
|
||||
/// phase A4.1), extracted from `Lowering`. Owns:
|
||||
/// - the type-name mangler (`mangleTypeName` / `mangleParamList`) — the leaf
|
||||
/// fragment every mono key is built from,
|
||||
/// - the generic mono key (`mangleGenericName`) and the comptime-value mono
|
||||
/// fragment (`appendComptimeValueMangle`),
|
||||
/// - type-parameter substitution: `buildTypeBindings` (call-site inference)
|
||||
/// and `inferGenericReturnType` (generic return resolution).
|
||||
///
|
||||
/// A `*Lowering` facade (Principle 5, like `CallResolver` / `ExprTyper`):
|
||||
/// substitution reads live type-binding / scope state and the type resolver
|
||||
/// helpers, so it borrows `*Lowering` rather than re-threading every field.
|
||||
/// `Lowering` keeps a thin `mangleTypeName` wrapper (it has ~30 cross-cutting
|
||||
/// callers — impl-map keys, conversion keys, shape keys — well beyond
|
||||
/// generics); the rest call through `Lowering.genericResolver()`.
|
||||
pub const GenericResolver = struct {
|
||||
l: *Lowering,
|
||||
|
||||
// ── Mono-key construction ───────────────────────────────────────────
|
||||
|
||||
/// Mangle a TypeId into its mono-key fragment ("s64", "ptr_T", "SL_T",
|
||||
/// "AR_n_T", struct name, "tu_X_Y", …). Recursive for compound shapes.
|
||||
pub fn mangleTypeName(self: GenericResolver, ty: TypeId) []const u8 {
|
||||
// Builtin types
|
||||
if (ty == .s8) return "s8";
|
||||
if (ty == .s16) return "s16";
|
||||
if (ty == .s32) return "s32";
|
||||
if (ty == .s64) return "s64";
|
||||
if (ty == .u8) return "u8";
|
||||
if (ty == .u16) return "u16";
|
||||
if (ty == .u32) return "u32";
|
||||
if (ty == .u64) return "u64";
|
||||
if (ty == .f32) return "f32";
|
||||
if (ty == .f64) return "f64";
|
||||
if (ty == .bool) return "bool";
|
||||
if (ty == .void) return "void";
|
||||
if (ty == .string) return "string";
|
||||
if (ty == .any) return "Any";
|
||||
if (ty == .usize) return "usize";
|
||||
if (ty == .isize) return "isize";
|
||||
|
||||
const info = self.l.module.types.get(ty);
|
||||
return switch (info) {
|
||||
.@"struct" => |s| self.l.module.types.getString(s.name),
|
||||
.@"union" => |u| self.l.module.types.getString(u.name),
|
||||
.tagged_union => |u| self.l.module.types.getString(u.name),
|
||||
.@"enum" => |e| self.l.module.types.getString(e.name),
|
||||
.pointer => |p| blk: {
|
||||
const inner = self.mangleTypeName(p.pointee);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "ptr_{s}", .{inner}) catch "pointer";
|
||||
},
|
||||
.many_pointer => |p| blk: {
|
||||
const inner = self.mangleTypeName(p.element);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "mptr_{s}", .{inner}) catch "many_pointer";
|
||||
},
|
||||
.slice => |s| blk: {
|
||||
const inner = self.mangleTypeName(s.element);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "SL_{s}", .{inner}) catch "slice";
|
||||
},
|
||||
.array => |a| blk: {
|
||||
const inner = self.mangleTypeName(a.element);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "AR_{d}_{s}", .{ a.length, inner }) catch "array";
|
||||
},
|
||||
.signed => |w| std.fmt.allocPrint(self.l.alloc, "s{d}", .{w}) catch "signed",
|
||||
.unsigned => |w| std.fmt.allocPrint(self.l.alloc, "u{d}", .{w}) catch "unsigned",
|
||||
.optional => |o| blk: {
|
||||
const inner = self.mangleTypeName(o.child);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "opt_{s}", .{inner}) catch "optional";
|
||||
},
|
||||
.vector => |v| blk: {
|
||||
const inner = self.mangleTypeName(v.element);
|
||||
break :blk std.fmt.allocPrint(self.l.alloc, "vec_{d}_{s}", .{ v.length, inner }) catch "vector";
|
||||
},
|
||||
.closure => |c| self.mangleParamList("cl", c.params, c.ret),
|
||||
.function => |f| self.mangleParamList("fn", f.params, f.ret),
|
||||
.tuple => |t| blk: {
|
||||
var buf = std.ArrayList(u8).empty;
|
||||
buf.appendSlice(self.l.alloc, "tu") catch break :blk "tuple";
|
||||
for (t.fields) |fid| {
|
||||
buf.append(self.l.alloc, '_') catch break :blk "tuple";
|
||||
buf.appendSlice(self.l.alloc, self.mangleTypeName(fid)) catch break :blk "tuple";
|
||||
}
|
||||
break :blk buf.items;
|
||||
},
|
||||
else => @tagName(info),
|
||||
};
|
||||
}
|
||||
|
||||
fn mangleParamList(self: GenericResolver, prefix: []const u8, params: []const TypeId, ret: TypeId) []const u8 {
|
||||
var buf = std.ArrayList(u8).empty;
|
||||
buf.appendSlice(self.l.alloc, prefix) catch return prefix;
|
||||
for (params) |p| {
|
||||
buf.append(self.l.alloc, '_') catch return prefix;
|
||||
buf.appendSlice(self.l.alloc, self.mangleTypeName(p)) catch return prefix;
|
||||
}
|
||||
buf.appendSlice(self.l.alloc, "__") catch return prefix;
|
||||
buf.appendSlice(self.l.alloc, self.mangleTypeName(ret)) catch return prefix;
|
||||
return buf.items;
|
||||
}
|
||||
|
||||
/// Mangle a generic call site into "base__Type1_Type2".
|
||||
/// Returns a heap-allocated string owned by the lowering allocator.
|
||||
pub fn mangleGenericName(
|
||||
self: GenericResolver,
|
||||
base_name: []const u8,
|
||||
fd: *const ast.FnDecl,
|
||||
bindings: *const std.StringHashMap(TypeId),
|
||||
) []const u8 {
|
||||
var mangled_buf: [256]u8 = undefined;
|
||||
var mangled_len: usize = 0;
|
||||
for (base_name) |ch| {
|
||||
if (mangled_len < mangled_buf.len) {
|
||||
mangled_buf[mangled_len] = ch;
|
||||
mangled_len += 1;
|
||||
}
|
||||
}
|
||||
for (fd.type_params) |tp| {
|
||||
for ("__") |ch| {
|
||||
if (mangled_len < mangled_buf.len) {
|
||||
mangled_buf[mangled_len] = ch;
|
||||
mangled_len += 1;
|
||||
}
|
||||
}
|
||||
const ty = bindings.get(tp.name) orelse .unresolved;
|
||||
const type_name_str = self.mangleTypeName(ty);
|
||||
for (type_name_str) |ch| {
|
||||
if (mangled_len < mangled_buf.len) {
|
||||
mangled_buf[mangled_len] = ch;
|
||||
mangled_len += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return self.l.alloc.dupe(u8, mangled_buf[0..mangled_len]) catch base_name;
|
||||
}
|
||||
|
||||
/// Append a comptime parameter VALUE's mono fragment to `buf` (int/bool
|
||||
/// verbatim, float with `.`/`-` escaped, string hashed) so distinct
|
||||
/// comptime-value call sites get distinct monos.
|
||||
pub fn appendComptimeValueMangle(self: GenericResolver, buf: *std.ArrayList(u8), node: *const Node) void {
|
||||
switch (node.data) {
|
||||
.int_literal => |lit| {
|
||||
var tmp: [32]u8 = undefined;
|
||||
const written = std.fmt.bufPrint(&tmp, "{d}", .{lit.value}) catch return;
|
||||
buf.appendSlice(self.l.alloc, written) catch return;
|
||||
},
|
||||
.bool_literal => |lit| {
|
||||
buf.appendSlice(self.l.alloc, if (lit.value) "true" else "false") catch return;
|
||||
},
|
||||
.float_literal => |lit| {
|
||||
var tmp: [64]u8 = undefined;
|
||||
const written = std.fmt.bufPrint(&tmp, "{d}", .{lit.value}) catch return;
|
||||
for (written) |c| {
|
||||
buf.append(self.l.alloc, if (c == '.') '_' else if (c == '-') 'n' else c) catch return;
|
||||
}
|
||||
},
|
||||
.string_literal => |lit| {
|
||||
// Hash the string to a fixed-length tag — keeps the
|
||||
// mangle short and stable for arbitrary content.
|
||||
var h = std.hash.Wyhash.init(0);
|
||||
h.update(lit.raw);
|
||||
var tmp: [32]u8 = undefined;
|
||||
const written = std.fmt.bufPrint(&tmp, "s{x}", .{h.final()}) catch return;
|
||||
buf.appendSlice(self.l.alloc, written) catch return;
|
||||
},
|
||||
else => buf.append(self.l.alloc, '?') catch return,
|
||||
}
|
||||
}
|
||||
|
||||
// ── Type-parameter substitution ─────────────────────────────────────
|
||||
|
||||
/// Build the `$T → concrete TypeId` bindings for a generic call site.
|
||||
/// Strategy 1: explicit type args (the param named `$T` IS a type
|
||||
/// expression). Strategy 2: infer from value params that use `T`
|
||||
/// (`a: $T`, `items: []$T`), picking the widest match.
|
||||
pub fn buildTypeBindings(
|
||||
self: GenericResolver,
|
||||
fd: *const ast.FnDecl,
|
||||
args_ast: []const *const Node,
|
||||
) std.StringHashMap(TypeId) {
|
||||
var bindings = std.StringHashMap(TypeId).init(self.l.alloc);
|
||||
const types_passed_explicitly = args_ast.len == fd.params.len;
|
||||
for (fd.type_params) |tp| {
|
||||
var found = false;
|
||||
// Strategy 1: explicit — the param whose name matches `tp.name` IS
|
||||
// the `$T: Type` declaration; the arg at that position is a type expression.
|
||||
if (types_passed_explicitly) {
|
||||
for (fd.params, 0..) |param, pi| {
|
||||
if (std.mem.eql(u8, param.name, tp.name)) {
|
||||
if (pi < args_ast.len and type_bridge.isTypeShapedAstNode(args_ast[pi], &self.l.module.types)) {
|
||||
const ty = self.l.resolveTypeArg(args_ast[pi]);
|
||||
bindings.put(tp.name, ty) catch {};
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found) continue;
|
||||
// Strategy 2: infer from value params that USE the type param
|
||||
// (e.g. a: $T, b: T, items: []$T). Pick widest type across matches.
|
||||
var inferred_ty: ?TypeId = null;
|
||||
var s2_arg_idx: usize = 0;
|
||||
for (fd.params) |param| {
|
||||
const is_type_decl = Lowering.isTypeParamDecl(¶m, fd.type_params);
|
||||
defer if (!is_type_decl) {
|
||||
s2_arg_idx += 1;
|
||||
};
|
||||
if (is_type_decl) {
|
||||
if (types_passed_explicitly) s2_arg_idx += 1;
|
||||
continue;
|
||||
}
|
||||
const matched = self.l.matchTypeParam(param.type_expr, tp.name);
|
||||
if (matched) {
|
||||
if (s2_arg_idx < args_ast.len) {
|
||||
const arg_ty = self.l.inferExprType(args_ast[s2_arg_idx]);
|
||||
const extracted = self.l.extractTypeParam(param.type_expr, arg_ty, tp.name);
|
||||
if (extracted) |ety| {
|
||||
if (inferred_ty) |prev| {
|
||||
if (ety == .f64 and prev != .f64) {
|
||||
inferred_ty = ety;
|
||||
} else if (ety == .f32 and prev != .f64 and prev != .f32) {
|
||||
inferred_ty = ety;
|
||||
}
|
||||
} else {
|
||||
inferred_ty = ety;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inferred_ty) |ty| {
|
||||
bindings.put(tp.name, ty) catch {};
|
||||
}
|
||||
}
|
||||
return bindings;
|
||||
}
|
||||
|
||||
/// Infer the return type of a generic function call by resolving type bindings.
|
||||
pub fn inferGenericReturnType(self: GenericResolver, fd: *const ast.FnDecl, c: *const ast.Call) TypeId {
|
||||
if (fd.return_type == null) return .void;
|
||||
|
||||
// Build ALL type bindings from call args before resolving return type
|
||||
var tmp_bindings = std.StringHashMap(TypeId).init(self.l.alloc);
|
||||
defer tmp_bindings.deinit();
|
||||
|
||||
for (fd.type_params) |tp| {
|
||||
// Strategy 1: direct type param decl ($T: Type) — param.name == tp.name.
|
||||
// Only fires when the caller actually supplied a type expression at
|
||||
// that position; otherwise fall through to value-based inference.
|
||||
var found = false;
|
||||
for (fd.params, 0..) |param, pi| {
|
||||
if (std.mem.eql(u8, param.name, tp.name)) {
|
||||
if (pi < c.args.len and type_bridge.isTypeShapedAstNode(c.args[pi], &self.l.module.types)) {
|
||||
const ty = self.l.resolveTypeArg(c.args[pi]);
|
||||
tmp_bindings.put(tp.name, ty) catch {};
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) continue;
|
||||
|
||||
// Strategy 2: inferred from usage (a: $T, b: T) — check ALL matching params, pick widest
|
||||
var inferred_ty: ?TypeId = null;
|
||||
for (fd.params, 0..) |param, pi| {
|
||||
if (param.type_expr.data == .type_expr) {
|
||||
const te = param.type_expr.data.type_expr;
|
||||
if (std.mem.eql(u8, te.name, tp.name)) {
|
||||
if (pi < c.args.len) {
|
||||
const arg_ty = self.l.inferExprType(c.args[pi]);
|
||||
if (inferred_ty) |prev| {
|
||||
if (arg_ty == .f64 and prev != .f64) {
|
||||
inferred_ty = arg_ty;
|
||||
} else if (arg_ty == .f32 and prev != .f64 and prev != .f32) {
|
||||
inferred_ty = arg_ty;
|
||||
}
|
||||
} else {
|
||||
inferred_ty = arg_ty;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inferred_ty) |ty| {
|
||||
tmp_bindings.put(tp.name, ty) catch {};
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve return type with whatever bindings we built. Even an
|
||||
// empty `tmp_bindings` is a valid input — non-generic literal
|
||||
// return types (e.g. `walk(..$args) -> string`) still need to
|
||||
// resolve through `resolveTypeWithBindings`, not fall through
|
||||
// to the historical `.s64` default. The default silently
|
||||
// misclassified pack-fn calls whose return type was a fixed
|
||||
// literal — every consumer (e.g. print's pack-shape mangling)
|
||||
// inferred `s64` and routed the value through the wrong Any
|
||||
// tag.
|
||||
var scope = TypeBindingScope.enter(self.l, tmp_bindings);
|
||||
defer scope.exit();
|
||||
return self.l.resolveTypeWithBindings(fd.return_type.?);
|
||||
}
|
||||
};
|
||||
|
||||
/// Scoped override of `Lowering.type_bindings`: install a binding set for the
|
||||
/// duration of a substitution, restoring the prior set on `exit`. Replaces the
|
||||
/// manual save/restore the generic-return resolution used (PLAN-ARCH A4.1
|
||||
/// "scoped substitution envs").
|
||||
const TypeBindingScope = struct {
|
||||
l: *Lowering,
|
||||
saved: ?std.StringHashMap(TypeId),
|
||||
|
||||
fn enter(l: *Lowering, bindings: std.StringHashMap(TypeId)) TypeBindingScope {
|
||||
const saved = l.type_bindings;
|
||||
l.type_bindings = bindings;
|
||||
return .{ .l = l, .saved = saved };
|
||||
}
|
||||
|
||||
fn exit(self: *TypeBindingScope) void {
|
||||
self.l.type_bindings = self.saved;
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user