feat: multiple return values — bare-paren signatures, named returns, must-set, defaults

A function may return multiple values via a bare-paren return signature:
`-> (A, B)` / `-> (x: A, y: B)` / `-> (A, B, !)` (error always the last slot),
and `-> ()` is `void`. This is DISTINCT from a `Tuple(…)` value — return-position
only (a dedicated `ReturnTypeExpr` AST node resolving to a reused `.tuple`
TypeId); a parameter / field / variable annotation `x: (A, B)` is rejected. A
single-value `-> (T, !)` stays a plain failable (= `-> T !`).

Returns use the bare comma form `return a, b` / `return x = a, y = b` (no `.( … )`
literal). Consume by destructuring (`a, b := f()`) or single-bind + field access
(`c := f(); c.sum`); a failable bound value holds only the value slots (the error
stays on the `!` channel).

Named return slots are in-scope assignable locals; with no explicit `return` the
implicit return is synthesized from them. Path-sensitive definite-assignment
enforces the must-set rule, and a slot may carry a default that exempts it.
Validation rejects arity mismatches, out-of-slot-order named elements, a
slot/parameter name collision, a comma list from a single-value function, and a
multi-return signature used as a value type.

Examples 0202-0213; readme + specs updated. issues/0197 files a pre-existing
annotated-assignment type-check gap (`x: i32 = "hi"` segfaults) surfaced by the
adversarial review.
This commit is contained in:
agra
2026-06-27 12:31:23 +03:00
parent c94f878e7e
commit 76689a1ea6
65 changed files with 1236 additions and 48 deletions

View File

@@ -163,6 +163,16 @@ pub fn lowerValueBody(self: *Lowering, body: *const Node, ret_ty: TypeId) void {
return;
}
}
// A NAMED multi-return function (`-> (x: A, y: B)`) with no explicit
// `return`: synthesize the implicit return from the named slot LOCALS (which
// the body assigned). The must-set rule is checked here — an unset, undefaulted
// slot is a loud error, not a silent fill. This takes precedence over the
// "produces no value" diagnostic below (the body legitimately produces its
// result by assigning the slots, not via a trailing expression).
if (self.named_return_names) |names| {
self.synthesizeNamedReturn(body, ret_ty, names);
return;
}
// A PURE-failable function (`-> !` / `-> !Named`, whose entire return IS
// the error channel) carries no success value — a void body is a normal
// success exit, not a missing value. `ensureTerminator` emits the
@@ -196,6 +206,150 @@ pub fn lowerValueBody(self: *Lowering, body: *const Node, ret_ty: TypeId) void {
self.ensureTerminator(ret_ty);
}
/// Definite-assignment check for the named-return must-set rule: true iff every
/// non-diverging path through `node` assigns the bare identifier `name` (or
/// diverges via `return`/`raise` before reaching the implicit return). PATH-
/// SENSITIVE — a slot set in only ONE branch of an `if` (no `else`) is NOT
/// definitely assigned, so it errors instead of returning a stale/garbage value.
/// - `return`/`raise` → vacuously true (that path never reaches the implicit
/// return, so the slot need not be set on it).
/// - block → the FIRST statement that definitely assigns (or diverges) settles
/// it (sequential composition).
/// - `if` → both branches must (an `if` with no `else` cannot).
/// - `push { … }` → always runs its body.
/// - `match` → all arms must AND there is an `else` arm (exhaustiveness).
/// - `while`/`for`/`defer`/`catch` and everything else → not guaranteed.
/// Does not descend into nested function / lambda bodies (their `return`s own).
fn definitelyAssigns(node: *const Node, name: []const u8) bool {
return switch (node.data) {
.assignment => |a| a.target.data == .identifier and std.mem.eql(u8, a.target.data.identifier.name, name),
.multi_assign => |ma| blk: {
for (ma.targets) |t| {
if (t.data == .identifier and std.mem.eql(u8, t.data.identifier.name, name)) break :blk true;
}
break :blk false;
},
// Function-level divergence — this path never reaches the implicit return.
.return_stmt, .raise_stmt => true,
.block => |blk| {
for (blk.stmts) |s| if (definitelyAssigns(s, name)) return true;
return false;
},
.if_expr => |ie| ie.else_branch != null and
definitelyAssigns(ie.then_branch, name) and definitelyAssigns(ie.else_branch.?, name),
.push_stmt => |ps| definitelyAssigns(ps.body, name),
.match_expr => |me| blk: {
var has_else = false;
for (me.arms) |arm| {
if (arm.pattern == null) has_else = true;
if (!definitelyAssigns(arm.body, name)) break :blk false;
}
break :blk has_else;
},
else => false,
};
}
/// Bind a NAMED multi-return signature's value slots (`-> (x: A, y: B)`) as
/// in-scope assignable locals, so the body's `x = …` writes to them. Each slot
/// is a zero-initialized alloca (deterministic value if a path misses it — see
/// `bodyAssignsTo`). Sets `self.named_return_names`; the caller restores it.
/// No-op for a positional multi-return (no names → use an explicit `return`).
pub fn bindNamedReturnSlots(self: *Lowering, fd: *const ast.FnDecl, ret_ty: TypeId, scope: *Scope) void {
const rt = fd.return_type orelse return;
if (rt.data != .return_type_expr) return;
const names = rt.data.return_type_expr.field_names orelse return; // positional → no locals
const defaults = rt.data.return_type_expr.field_defaults;
if (ret_ty.isBuiltin()) return;
const ti = self.module.types.get(ret_ty);
if (ti != .tuple) return;
const fields = ti.tuple.fields;
const value_count = if (self.errorChannelOf(ret_ty) != null) fields.len - 1 else fields.len;
var i: usize = 0;
while (i < value_count and i < names.len) : (i += 1) {
const nm = names[i];
if (nm.len == 0 or std.mem.eql(u8, nm, "!")) continue;
// A named-return slot that shadows a PARAMETER of the same name would
// silently hide the parameter behind a fresh local — reject the collision.
for (fd.params) |p| {
if (std.mem.eql(u8, p.name, nm)) {
if (self.diagnostics) |d| {
d.addFmt(.err, rt.span, "named return '{s}' collides with a parameter of the same name — rename one", .{nm});
}
}
}
const fty = fields[i];
const slot = self.builder.alloca(fty);
// Seed the slot. A slot with a DEFAULT gets it (type-checked, lowered,
// coerced). Otherwise zero/default-init for ANY type (a deterministic
// value if the path-insensitive must-set can't prove a path sets it —
// never raw garbage; covers string / struct / float slots too).
const dflt: ?*const Node = if (defaults) |ds| (if (i < ds.len) ds[i] else null) else null;
if (dflt) |dn| {
const saved_target = self.target_type;
self.target_type = fty;
const dval = self.lowerExpr(dn);
self.target_type = saved_target;
const dval_ty = self.builder.getRefType(dval);
// Reject a default whose type has NO coercion to the slot type (e.g.
// `sum: i32 = "hi"`) — a `.none` plan would pass the value through
// unchanged and bit-mangle / segfault. (The same hole exists for any
// annotated assignment `x: i32 = "hi"` — a broader pre-existing gap.)
if (dval_ty != .unresolved and self.coercionResolver().classify(dval_ty, fty) == .none and dval_ty != fty) {
if (self.diagnostics) |d| {
d.addFmt(.err, dn.span, "named return '{s}' has a default of type '{s}' that does not match its declared type '{s}'", .{ nm, self.formatTypeName(dval_ty), self.formatTypeName(fty) });
}
self.builder.store(slot, self.buildDefaultValue(fty));
} else {
self.builder.store(slot, self.coerceToType(dval, dval_ty, fty));
}
} else {
self.builder.store(slot, self.buildDefaultValue(fty));
}
scope.put(nm, .{ .ref = slot, .ty = fty, .is_alloca = true });
}
self.named_return_names = names;
self.named_return_defaults = defaults;
}
/// Emit the implicit return of a NAMED multi-return body: enforce the must-set
/// rule on each value slot, then synthesize and lower `return n0 = n0, n1 = n1`
/// over the slot locals — reusing the ordinary return path (tuple build +
/// value-carrying-failable assembly), so failable named multi-returns work too.
pub fn synthesizeNamedReturn(self: *Lowering, body: *const Node, ret_ty: TypeId, names: []const []const u8) void {
const ti = self.module.types.get(ret_ty);
if (ti != .tuple) {
self.ensureTerminator(ret_ty);
return;
}
const fields = ti.tuple.fields;
const value_count = if (self.errorChannelOf(ret_ty) != null) fields.len - 1 else fields.len;
var elems = std.ArrayList(ast.TupleElement).empty;
defer elems.deinit(self.alloc);
var i: usize = 0;
while (i < value_count and i < names.len) : (i += 1) {
const nm = names[i];
if (nm.len == 0 or std.mem.eql(u8, nm, "!")) continue;
// Must-set: a slot not DEFINITELY assigned (on every non-diverging path)
// and with no default is an error. A defaulted slot is exempt — its
// default seeds the local in `bindNamedReturnSlots`.
const has_default = if (self.named_return_defaults) |ds| (i < ds.len and ds[i] != null) else false;
if (!has_default and !definitelyAssigns(body, nm)) {
if (self.diagnostics) |d| {
d.addFmt(.err, body.span, "named return '{s}' may be unset (not assigned on every path) and has no default — assign it on every path, give it a default, or end with an explicit `return`", .{nm});
}
}
const id_node = self.alloc.create(Node) catch return;
id_node.* = .{ .span = body.span, .data = .{ .identifier = .{ .name = nm } } };
elems.append(self.alloc, .{ .name = nm, .value = id_node }) catch return;
}
const tl = self.alloc.create(Node) catch return;
tl.* = .{ .span = body.span, .data = .{ .tuple_literal = .{ .elements = elems.toOwnedSlice(self.alloc) catch return } } };
const rs = ast.ReturnStmt{ .value = tl };
self.lowerReturn(&rs);
}
/// Try to lower a node as an expression, returning its value.
/// Statement nodes are lowered as statements (returning null).
pub fn tryLowerAsExpr(self: *Lowering, node: *const Node) ?Ref {
@@ -295,6 +449,7 @@ pub fn lowerVarDecl(self: *Lowering, vd: *const ast.VarDecl) void {
}
if (vd.type_annotation) |ta| {
// Explicit type annotation — resolve type first, then lower value
_ = self.rejectMultiReturnValueType(ta, "variable");
const ty = self.resolveType(ta);
const slot = self.builder.alloca(ty);
if (vd.value) |val| {
@@ -535,12 +690,80 @@ pub fn lowerConstDecl(self: *Lowering, cd: *const ast.ConstDecl) void {
}
}
/// Validate an explicit `return` value against a multi-VALUE return type (≥2
/// value slots). Emits diagnostics; does not rewrite. Covers: a bare value where
/// multiple are required (`return 5` for `-> (i64, i64)`), wrong arity (too few /
/// too many), and named elements that disagree with the slot at their position
/// (named return elements must currently be IN SLOT ORDER — reordering by name is
/// a future nicety, but a mismatch is an error, never a silent wrong result).
/// A single-value or single-failable return is left to the existing path.
pub fn validateMultiReturn(self: *Lowering, value_node: *const Node, ret_ty: TypeId) void {
const diags = self.diagnostics orelse return;
const ret_is_tuple = !ret_ty.isBuiltin() and self.module.types.get(ret_ty) == .tuple;
// A comma list / multi-element literal returned from a SINGLE-value
// (non-tuple) function would silently drop the extra values — reject it.
if (!ret_is_tuple and value_node.data == .tuple_literal) {
const els = value_node.data.tuple_literal.elements;
if (els.len > 1) {
for (els) |e| if (e.value.data == .spread_expr) return; // can't count a spread
diags.addFmt(.err, value_node.span, "this function returns a single value, but a list of {d} was given", .{els.len});
}
return;
}
if (!ret_is_tuple) return;
const ti = self.module.types.get(ret_ty);
const fields = ti.tuple.fields;
const is_failable = self.errorChannelOf(ret_ty) != null;
const value_count = if (is_failable) fields.len - 1 else fields.len;
if (value_count < 2) return; // single value / single failable — not multi-return
if (value_node.data == .tuple_literal) {
const els = value_node.data.tuple_literal.elements;
// A spread (`..xs`) can expand to any arity — can't check statically.
for (els) |e| if (e.value.data == .spread_expr) return;
// The value-only list (n == value_count) is the bare-comma form; the full
// failable tuple (n == fields, including the error slot) is also allowed.
if (els.len != value_count and els.len != fields.len) {
diags.addFmt(.err, value_node.span, "this function returns {d} values, but {d} {s} given", .{ value_count, els.len, if (els.len == 1) @as([]const u8, "is") else @as([]const u8, "are") });
return;
}
// Named elements must line up with the slots positionally.
if (ti.tuple.names) |slot_names| {
for (els, 0..) |e, idx| {
const en = e.name orelse continue;
if (idx >= slot_names.len) continue;
const sn = self.module.types.getString(slot_names[idx]);
if (sn.len != 0 and !std.mem.eql(u8, en, sn)) {
diags.addFmt(.err, value_node.span, "named return element '{s}' does not match the slot '{s}' at position {d} — name the elements in slot order", .{ en, sn, idx });
}
}
}
} else {
// A bare value (not a comma list) where ≥2 are required is valid only if
// it already PRODUCES the whole multi-value tuple — forwarding another
// multi-return's result, or a multi-output `asm { … }`. Any TUPLE-typed
// value qualifies (names may differ from the slots); a non-tuple scalar
// does not — that is the `return 5` for `-> (i64, i64)` garbage case.
const vty = self.inferExprType(value_node);
const v_is_tuple = vty != .unresolved and !vty.isBuiltin() and self.module.types.get(vty) == .tuple;
if (vty != .unresolved and !v_is_tuple) {
diags.addFmt(.err, value_node.span, "this function returns {d} values — return them as `return a, b`, not a single value", .{value_count});
}
}
}
pub fn lowerReturn(self: *Lowering, rs: *const ast.ReturnStmt) void {
if (rs.value) |val| {
if (val.data == .identifier and self.isPackName(val.data.identifier.name)) {
_ = self.diagPackAsValue(val.data.identifier.name, val.span, .return_value);
return;
}
// Validate a multi-value return against the function's slots: arity, a
// bare value where multiple are required, and named-element/slot
// agreement. Catches silent garbage (`return 5` for `-> (i64, i64)`) and
// silently-wrong named returns (`return b = …, a = …` ignoring names).
if (self.builder.func) |fid| {
self.validateMultiReturn(val, self.module.functions.items[@intFromEnum(fid)].ret);
}
}
// Set target_type to function return type so null_literal etc. get the right type.
// When inlining a comptime body, the *inlined* fn's declared return type wins