fix: validate protocol impl method signatures vs the protocol declaration (issue 0178)

The issue-0176 conformance gate was name-only, so an impl P for T with a
mismatched return/param type (or arity) built a wrong-ABI thunk that
silently miscompiled (exit 0, wrong value). firstUnimplementedMethod now
validates arity (after self), each param type, and the return type
against the protocol declaration, substituting protocol Self->concrete
via resolveProtoTypeSubSelf (recurses through pointer/many-pointer/
optional/slice/array so []Self<->[]T match; conservative .unresolved for
Self-in-generic-arg). Comparison is by structural formatTypeName
(alias/module/spelling independent); typesClearlyDiffer skips when either
side has an unresolved leaf at any depth, biasing against false-positives.

Regressions: diagnostics/1201 (negative), protocols/0420 (positive,
[]Self param). Verified by 3+3 adversarial reviews (a mid-fix []Self
false-positive was found and closed); suite 792/0.
This commit is contained in:
agra
2026-06-23 08:48:31 +03:00
parent 8b613af96b
commit c41f51aed3
10 changed files with 293 additions and 2 deletions

View File

@@ -448,7 +448,21 @@ const NonConformance = struct {
/// monomorphization"), leaving `resolveFuncByName` null → the thunk's
/// `else => unreachable` arm fires at the first dispatch.
signature_mismatch,
/// A body exists with the right arity (after self), but a PARAMETER or
/// the RETURN type differs from the protocol method's declared type
/// (e.g. impl returns `bool` where the protocol method returns `i64`).
/// The thunk would call the impl with the wrong ABI and silently
/// miscompile (issue 0178). `detail` names the mismatching position.
type_mismatch,
/// A body exists but its arity (after self) differs from the protocol
/// method's parameter count. `detail` carries the "expected N, got M"
/// phrasing.
arity_mismatch,
},
/// Populated for `.type_mismatch` / `.arity_mismatch`: a human-readable
/// description of WHICH part mismatched (e.g. "return type: protocol
/// declares 'i64', impl declares 'bool'"). Empty for the other kinds.
detail: []const u8 = "",
};
/// First protocol method of `proto_name` for which `concrete_type_name` does
@@ -472,8 +486,12 @@ const NonConformance = struct {
/// type params are bound by the instance, not introduced by the method, and
/// `monomorphizeFunction` always registers it. Conformance is IMPL-DRIVEN, so a
/// type satisfying the method only via a free / `ufcs` function does NOT conform.
fn firstUnimplementedMethod(self: *Lowering, proto_name: []const u8, concrete_type_name: []const u8) ?NonConformance {
fn firstUnimplementedMethod(self: *Lowering, proto_name: []const u8, concrete_type_name: []const u8, concrete_ty: TypeId) ?NonConformance {
const pd = self.program_index.protocol_decl_map.get(proto_name) orelse return null;
// AST of the protocol (carries each method's raw param/return type nodes +
// the protocol's defining module). Absent only for synthesized protocols —
// then we fall back to NAME/arity checks without per-type validation.
const pd_ast = self.program_index.protocol_ast_map.get(proto_name);
for (pd.methods) |m| {
const qualified = std.fmt.allocPrint(self.alloc, "{s}.{s}", .{ concrete_type_name, m.name }) catch
return .{ .method = m.name, .kind = .missing };
@@ -482,6 +500,14 @@ fn firstUnimplementedMethod(self: *Lowering, proto_name: []const u8, concrete_ty
// thunk's `lazyLowerFunction(qualified)` would actually register it.
// A method with its own type params bails there → unreachable thunk.
if (fd.type_params.len > 0) return .{ .method = m.name, .kind = .signature_mismatch };
// Validate the impl method's SIGNATURE against the protocol method
// (issue 0178): a right-NAME but wrong-TYPE impl otherwise builds a
// wrong-ABI thunk and silently miscompiles.
if (pd_ast) |pda| {
if (methodAst(pda, m.name)) |mast| {
if (signatureMismatch(self, mast, m, fd, concrete_ty, pda.source_file)) |nc| return nc;
}
}
continue;
}
if (self.genericInstanceMethod(concrete_type_name, m.name) != null) continue;
@@ -490,6 +516,185 @@ fn firstUnimplementedMethod(self: *Lowering, proto_name: []const u8, concrete_ty
return null;
}
/// The protocol method declaration node named `name`, or null.
fn methodAst(pd: *const ast.ProtocolDecl, name: []const u8) ?ast.ProtocolMethodDecl {
for (pd.methods) |m| {
if (std.mem.eql(u8, m.name, name)) return m;
}
return null;
}
/// True when `node` (a type-expr AST) names `Self` ANYWHERE — at the leaf or
/// nested inside any compound (`*Self`, `[]Self`, `[2]*Self`, `?[]Self`,
/// `Box(Self)`, …). Used to decide whether a node needs structural
/// `Self`-substitution or can be resolved wholesale.
fn containsSelf(node: *const Node) bool {
return switch (node.data) {
.type_expr => |te| std.mem.eql(u8, te.name, "Self"),
.pointer_type_expr => |pt| containsSelf(pt.pointee_type),
.many_pointer_type_expr => |mp| containsSelf(mp.element_type),
.optional_type_expr => |opt| containsSelf(opt.inner_type),
.slice_type_expr => |st| containsSelf(st.element_type),
.array_type_expr => |at| containsSelf(at.element_type),
.parameterized_type_expr => |pt| blk: {
for (pt.args) |arg| {
if (containsSelf(arg)) break :blk true;
}
break :blk false;
},
else => false,
};
}
/// Resolve a protocol-method type node to a TypeId, substituting any `Self`
/// reference (at the leaf, or nested under ANY compound — pointer / many-pointer
/// / optional / slice / array, or a generic type-arg) with `concrete_ty` — the
/// type the protocol is being erased FOR. `proto_src` is the protocol's defining
/// module, so a type bare-visible only there still resolves (mirrors how
/// `registerProtocolDecl` builds `param_types`).
///
/// Recurses structurally so a nested `Self` (e.g. `[]Self`, `[2]*Self`,
/// `?[]Self`) is replaced at EVERY level before the type is rebuilt — without
/// this, `[]Self` would resolve to a real `.slice` named `[]Self` ≠ `[]T` and
/// the conformance gate would FALSELY reject a correct `[]T` impl (the gap from
/// the 0178 adversarial review).
fn resolveProtoTypeSubSelf(self: *Lowering, node: *const Node, concrete_ty: TypeId, proto_src: ?[]const u8) TypeId {
switch (node.data) {
.type_expr => |te| {
if (std.mem.eql(u8, te.name, "Self")) return concrete_ty;
return self.resolveTypeInSource(proto_src, node);
},
.pointer_type_expr => |pt| return self.module.types.ptrTo(resolveProtoTypeSubSelf(self, pt.pointee_type, concrete_ty, proto_src)),
.many_pointer_type_expr => |mp| return self.module.types.manyPtrTo(resolveProtoTypeSubSelf(self, mp.element_type, concrete_ty, proto_src)),
.optional_type_expr => |opt| return self.module.types.optionalOf(resolveProtoTypeSubSelf(self, opt.inner_type, concrete_ty, proto_src)),
.slice_type_expr => |st| return self.module.types.sliceOf(resolveProtoTypeSubSelf(self, st.element_type, concrete_ty, proto_src)),
.array_type_expr => |at| {
const elem = resolveProtoTypeSubSelf(self, at.element_type, concrete_ty, proto_src);
// Fold the dimension WITHOUT emitting a diagnostic on failure (unlike
// `resolveArrayLen`, which is a hard error): a non-foldable dim here
// just means we can't build the array type for the gate, so yield the
// `.unresolved` sentinel — `typesClearlyDiffer` then treats it as
// not-clearly-different (conservative, never a false positive).
const dim = program_index_mod.foldDimU32(at.length, self, 0);
if (dim != .ok) return .unresolved;
return self.module.types.arrayOf(elem, dim.ok);
},
else => {
// Generic type-arg nodes (`Box(Self)`) and any other compound: if it
// mentions `Self` we can't rebuild the instance from substituted
// TypeIds without re-running the template machinery, so resolve it
// ONLY when it does NOT contain `Self` (the common case — a fully
// concrete `Box(i64)` protocol type). When it DOES contain `Self`,
// yield `.unresolved` so the gate stays conservative (a correct
// `Box(T)` impl is NOT falsely flagged). A genuine generic-arg
// mismatch through `Self` is then simply not caught here — acceptable:
// the gate's job is to never produce a FALSE positive; the 0178
// miscompiles it guards are leaf / pointer / slice / array shaped.
if (containsSelf(node)) return .unresolved;
return self.resolveTypeInSource(proto_src, node);
},
}
}
/// Compare the impl method's signature (params after the `self` receiver, plus
/// the return type) against the protocol method's declaration. Returns a
/// `NonConformance` on a CLEAR mismatch, else null.
///
/// Self-substitution: the protocol writes `*Self` / `Self`; the REQUIRED impl
/// form is `*T` / `T`. `resolveProtoTypeSubSelf` substitutes `Self → concrete`
/// before resolving, so a correct impl matches and is NOT flagged.
///
/// Comparison is by STRUCTURAL NAME (`formatTypeName`), which is independent of
/// the resolving module's visibility context — so the same type resolved in the
/// protocol's module (protocol side) vs the erasure site (impl side) compares
/// equal. We only flag when BOTH sides resolve to concrete, fully-known types
/// AND their structural names differ — conservative against false positives on
/// any shape we can't confidently resolve.
fn signatureMismatch(self: *Lowering, mast: ast.ProtocolMethodDecl, m: ProtocolMethodInfo, fd: *const ast.FnDecl, concrete_ty: TypeId, proto_src: ?[]const u8) ?NonConformance {
// The concrete VALUE type (strip a single pointer): `*Self` substitutes to
// `*T`, so `Self` itself must substitute to the value `T`.
const value_ty: TypeId = blk: {
if (!concrete_ty.isBuiltin()) {
const info = self.module.types.get(concrete_ty);
if (info == .pointer) break :blk info.pointer.pointee;
}
break :blk concrete_ty;
};
// Impl params after the `self` receiver (params[0]).
if (fd.params.len == 0) return null; // no receiver at all — leave other gates to handle.
const impl_extra = fd.params[1..];
// Arity (after self) must equal the protocol method's param count.
if (impl_extra.len != m.param_types.len) {
const detail = std.fmt.allocPrint(self.alloc, "expects {d} parameter{s} (after self), but the impl declares {d}", .{
m.param_types.len, if (m.param_types.len == 1) "" else "s", impl_extra.len,
}) catch "";
return .{ .method = m.name, .kind = .arity_mismatch, .detail = detail };
}
// Per-parameter type check. Protocol types resolve in the protocol's own
// module (`proto_src`) with `Self → value_ty`; impl types resolve in the
// current (erasure-site) context. Comparison is structural-name based, so
// the same type resolved either way compares equal.
for (mast.params, impl_extra) |proto_pnode, impl_param| {
const proto_pty = resolveProtoTypeSubSelf(self, proto_pnode, value_ty, proto_src);
const impl_pty = self.resolveType(impl_param.type_expr);
if (typesClearlyDiffer(self, proto_pty, impl_pty)) {
const detail = std.fmt.allocPrint(self.alloc, "parameter '{s}': protocol declares '{s}', impl declares '{s}'", .{
impl_param.name, self.formatTypeName(proto_pty), self.formatTypeName(impl_pty),
}) catch "";
return .{ .method = m.name, .kind = .type_mismatch, .detail = detail };
}
}
// Return type check.
const proto_ret: TypeId = if (mast.return_type) |rt| resolveProtoTypeSubSelf(self, rt, value_ty, proto_src) else .void;
const impl_ret: TypeId = if (fd.return_type) |rt| self.resolveType(rt) else .void;
if (typesClearlyDiffer(self, proto_ret, impl_ret)) {
const detail = std.fmt.allocPrint(self.alloc, "return type: protocol declares '{s}', impl declares '{s}'", .{
self.formatTypeName(proto_ret), self.formatTypeName(impl_ret),
}) catch "";
return .{ .method = m.name, .kind = .type_mismatch, .detail = detail };
}
return null;
}
/// True when `ty` is `.unresolved` OR wraps an `.unresolved` leaf at ANY depth
/// (`[]unresolved`, `?*unresolved`, `[2]unresolved`, …). The outer TypeId of a
/// compound built over an unresolved element is itself a real `.slice` /
/// `.array` / `.pointer`, so the bare `== .unresolved` check at the top level
/// would MISS it — this recursion is the belt-and-suspenders that keeps a
/// `Self`-derived nesting we couldn't fully substitute from ever producing a
/// false positive in the conformance gate.
fn typeContainsUnresolved(self: *Lowering, ty: TypeId) bool {
if (ty == .unresolved) return true;
if (ty.isBuiltin()) return false;
return switch (self.module.types.get(ty)) {
.pointer => |p| typeContainsUnresolved(self, p.pointee),
.many_pointer => |p| typeContainsUnresolved(self, p.element),
.slice => |s| typeContainsUnresolved(self, s.element),
.array => |a| typeContainsUnresolved(self, a.element),
.optional => |o| typeContainsUnresolved(self, o.child),
else => false,
};
}
/// True when both `a` and `b` resolve to concrete, fully-known types whose
/// STRUCTURAL names differ. Conservative: if EITHER side contains an unresolved
/// leaf at any depth (a type — or a nesting — we couldn't resolve / fully
/// substitute), returns false, so we never flag a shape outside our resolver's
/// reach. A REAL mismatch between two fully-resolved compounds (e.g. `[]i64` vs
/// `[]i32`) still differs in structural name and IS caught.
fn typesClearlyDiffer(self: *Lowering, a: TypeId, b: TypeId) bool {
if (typeContainsUnresolved(self, a) or typeContainsUnresolved(self, b)) return false;
if (a == b) return false;
const an = self.formatTypeName(a);
const bn = self.formatTypeName(b);
return !std.mem.eql(u8, an, bn);
}
/// Build a protocol value from a concrete pointer.
/// For inline protocols: struct_init { ctx, thunk1, thunk2, ... }
/// For vtable protocols: struct_init { ctx, vtable_ptr } where vtable is stack-allocated
@@ -504,13 +709,15 @@ pub fn buildProtocolValue(self: *Lowering, concrete_ptr: Ref, proto_name: []cons
// happily synthesize a vtable whose thunks fall through to `unreachable`
// (no resolvable concrete method) — a SILENT SIGABRT at the first dispatch
// with no diagnostic (issue 0176). Surface it as a hard error instead.
if (firstUnimplementedMethod(self, proto_name, concrete_type_name)) |nc| {
if (firstUnimplementedMethod(self, proto_name, concrete_type_name, concrete_ty)) |nc| {
if (self.diagnostics) |d| {
const cs = self.builder.current_span;
const span = ast.Span{ .start = cs.start, .end = cs.end };
switch (nc.kind) {
.missing => d.addFmt(.err, span, "'{s}' does not implement protocol '{s}': no `impl {s} for {s}` provides method '{s}' (protocol erasure is impl-driven — a plain or `ufcs` free function with a matching receiver does not satisfy a protocol)", .{ concrete_type_name, proto_name, proto_name, concrete_type_name, nc.method }),
.signature_mismatch => d.addFmt(.err, span, "'{s}' does not implement protocol '{s}': method '{s}' has a mismatched signature — a protocol-method impl must not introduce its own type parameters (e.g. `$T: Type`); it must match the protocol's signature exactly", .{ concrete_type_name, proto_name, nc.method }),
.type_mismatch => d.addFmt(.err, span, "'{s}' does not implement protocol '{s}': method '{s}' has a mismatched signature — {s} (a protocol-method impl must match the protocol's declared types exactly, with `Self` written as `{s}`)", .{ concrete_type_name, proto_name, nc.method, nc.detail, concrete_type_name }),
.arity_mismatch => d.addFmt(.err, span, "'{s}' does not implement protocol '{s}': method '{s}' {s}", .{ concrete_type_name, proto_name, nc.method, nc.detail }),
}
} else {
// Gap 2 — no diagnostics channel (e.g. a comptime sub-lowering that