feat(lang): block value requires no trailing ; (Rust-style)

A block's value is now its last statement ONLY when that statement is a
trailing expression with no `;`. A trailing `;` discards the value,
leaving the block void. This makes value-vs-statement explicit and lets
the compiler reject "this block was supposed to produce a value".

Compiler:
- Parser records `Block.produces_value` (last stmt is a no-`;` trailing
  expression) + `Block.discarded_semi` (the `;` that discarded a value),
  via `expectSemicolonAfter`. A trailing expression before `}` may now
  omit its `;` (previously a parse error). Match-arm and else-arm bodies
  are built value-producing regardless of the arm `;` (arms are exempt —
  the `;` is an arm terminator).
- Lowering: `lowerBlockValue` / the block-expr path / `inferExprType`
  respect `produces_value`. A value-position block that discards its value
  is a hard error (`lowerValueBody` for function bodies; the value-context
  `.block` path for if/else branches, `catch` bodies, value bindings,
  match arms). Pure-failable `-> !` bodies (value rides the error channel)
  and a value-if whose branches are void are handled without false errors.
- `defer`/`onfail` cleanup bodies lower as statements (void), so a
  trailing `;` there is fine.

Migration (behavior-preserving — output unchanged):
- stdlib + ~210 examples: dropped the trailing `;` on value-position last
  expressions. `format` now ends with an explicit `#insert "return
  result;"` (it relied on `#insert`-as-block-value, which `;` discards).
- Two `main :: () -> s32` examples that relied on the old silent
  default-return got an explicit trailing `0`.
- Rejection snapshots 0412 / 1013 regenerated (their quoted source lines
  lost a `;`); the diagnostics themselves are unchanged.

Docs/tests: specs.md "Block values" section; examples 0040 (rules) + 0041
(rejection); 3 parser unit tests. Filed issue 0066 (pre-existing
match-arm negated-literal phi-width quirk, surfaced not caused here).

Gates: zig build, zig build test, run_examples.sh -> 343 passed,
cross_compile.sh -> 7 passed (also refreshed its stale example names).
This commit is contained in:
agra
2026-06-02 09:23:50 +03:00
parent 634cf9bc7f
commit bdd0e96d78
265 changed files with 1070 additions and 761 deletions

View File

@@ -152,6 +152,17 @@ pub const Param = struct {
pub const Block = struct {
stmts: []const *Node,
/// True when the block's last statement is its value — i.e. a trailing
/// expression with NO `;`. A trailing `;` (or a non-expression last
/// statement) discards the value and leaves the block void. Match-arm and
/// else-arm bodies are built with this forced true (the arm `;` is an arm
/// terminator, not a value-discard).
produces_value: bool = false,
/// When `produces_value` is false *because* the last statement was an
/// expression terminated by `;` (as opposed to a decl/return/empty block),
/// the span of that discarding `;`. Lets a value-position diagnostic point
/// precisely at the semicolon to drop. Null otherwise.
discarded_semi: ?Span = null,
};
pub const IntLiteral = struct {

View File

@@ -808,7 +808,7 @@ test "E1.4c noreturn typing: divergence shapes + if-else unification + block pro
const five = mk.node(alloc, .{ .int_literal = .{ .value = 5 } });
defer alloc.destroy(five);
const blk_stmts: []const *Node = &.{ five, ret };
const blk = mk.node(alloc, .{ .block = .{ .stmts = blk_stmts } });
const blk = mk.node(alloc, .{ .block = .{ .stmts = blk_stmts, .produces_value = true } });
defer alloc.destroy(blk);
try std.testing.expectEqual(TypeId.noreturn, lowering.inferExprType(blk));

View File

@@ -1676,21 +1676,7 @@ pub const Lowering = struct {
const saved_target = self.target_type;
self.target_type = if (ret_ty != .void and ret_ty != .noreturn) ret_ty else null;
if (ret_ty != .void and ret_ty != .noreturn) {
const body_val = self.lowerBlockValue(fd.body);
if (!self.currentBlockHasTerminator()) {
if (body_val) |val| {
// Check if the body value is void (e.g., last stmt is a void call)
const val_ty = self.builder.getRefType(val);
if (val_ty == .void) {
self.ensureTerminator(ret_ty);
} else {
const coerced = self.coerceToType(val, val_ty, ret_ty);
self.builder.ret(coerced, ret_ty);
}
} else {
self.ensureTerminator(ret_ty);
}
}
self.lowerValueBody(fd.body, ret_ty);
} else {
// void / noreturn: no value to return — lower as statements and
// let `ensureTerminator` close the block (ret void / unreachable).
@@ -1834,21 +1820,7 @@ pub const Lowering = struct {
const saved_target = self.target_type;
self.target_type = if (ret_ty != .void and ret_ty != .noreturn) ret_ty else null;
if (ret_ty != .void and ret_ty != .noreturn) {
const body_val = self.lowerBlockValue(fd.body);
if (!self.currentBlockHasTerminator()) {
if (body_val) |val| {
// Check if body value is void (e.g., last stmt is a void call)
const val_ty = self.builder.getRefType(val);
if (val_ty == .void) {
self.ensureTerminator(ret_ty);
} else {
const coerced = self.coerceToType(val, val_ty, ret_ty);
self.builder.ret(coerced, ret_ty);
}
} else {
self.ensureTerminator(ret_ty);
}
}
self.lowerValueBody(fd.body, ret_ty);
} else {
// void / noreturn: no value to return — lower as statements and
// let `ensureTerminator` close the block (ret void / unreachable).
@@ -1929,6 +1901,18 @@ pub const Lowering = struct {
self.scope = saved_scope;
block_scope.deinit();
}
// A block whose last statement is `;`-terminated (or not an
// expression) discards its value: lower every statement as a
// statement and yield nothing.
if (!blk.produces_value) {
self.force_block_value = false;
for (blk.stmts) |stmt| {
if (self.block_terminated) return null;
self.lowerStmt(stmt);
if (self.currentBlockHasTerminator()) return null;
}
return null;
}
// Lower all statements except the last normally
self.force_block_value = false; // don't force for non-last statements
for (blk.stmts[0 .. blk.stmts.len - 1]) |stmt| {
@@ -1940,7 +1924,7 @@ pub const Lowering = struct {
if (self.currentBlockHasTerminator()) return null;
}
if (self.block_terminated) return null;
// Last statement: if it's an expression, return its value
// Last statement (no trailing `;`): its value is the block's.
self.force_block_value = true;
const last = blk.stmts[blk.stmts.len - 1];
return self.tryLowerAsExpr(last);
@@ -1952,6 +1936,48 @@ pub const Lowering = struct {
}
}
/// Lower a value-returning function body and emit the implicit return.
/// Emits a hard error when the body yields no value — its last statement is
/// `;`-terminated (value discarded) or void — and the body doesn't already
/// terminate via `return`/`raise`. Replaces the old silent default-return.
fn lowerValueBody(self: *Lowering, body: *const Node, ret_ty: TypeId) void {
const body_val = self.lowerBlockValue(body);
if (self.currentBlockHasTerminator()) return;
if (body_val) |val| {
const val_ty = self.builder.getRefType(val);
if (val_ty != .void) {
const coerced = self.coerceToType(val, val_ty, ret_ty);
self.builder.ret(coerced, ret_ty);
return;
}
}
// A PURE-failable function (`-> !` / `-> !Named`, whose entire return IS
// the error channel) carries no success value — a void body is a normal
// success exit, not a missing value. `ensureTerminator` emits the
// error-slot-zero success return.
if (self.errorChannelOf(ret_ty)) |chan| {
if (chan == ret_ty) {
self.ensureTerminator(ret_ty);
return;
}
}
if (self.diagnostics) |diags| {
if (body.data == .block and body.data.block.discarded_semi != null) {
diags.addFmt(.err, body.data.block.discarded_semi.?, "function returns '{s}' but the last expression's value is discarded by this `;` — drop the `;` to return it (or use an explicit `return`)", .{self.formatTypeName(ret_ty)});
} else {
const span = blk: {
if (body.data == .block) {
const stmts = body.data.block.stmts;
if (stmts.len > 0) break :blk stmts[stmts.len - 1].span;
}
break :blk body.span;
};
diags.addFmt(.err, span, "function returns '{s}' but its body produces no value — end it with a trailing expression (no `;`) or an explicit `return`", .{self.formatTypeName(ret_ty)});
}
}
self.ensureTerminator(ret_ty);
}
/// Try to lower a node as an expression, returning its value.
/// Statement nodes are lowered as statements (returning null).
fn tryLowerAsExpr(self: *Lowering, node: *const Node) ?Ref {
@@ -3050,8 +3076,19 @@ pub const Lowering = struct {
self.scope = saved_scope;
block_scope.deinit();
}
if (self.force_block_value and blk.stmts.len > 0) {
// Extract last expression value (for if-else branch blocks)
// This block sits in value position (lowerExpr is reached only
// for value contexts — statement blocks go through lowerBlock).
// If its last expression's value is discarded by a `;`, the
// surrounding expression has no value to use: report it.
if (!blk.produces_value and blk.discarded_semi != null) {
if (self.diagnostics) |diags| {
diags.addFmt(.err, blk.discarded_semi.?, "this block is used as a value but its last expression's value is discarded by this `;` — drop the `;`", .{});
}
}
// A block in expression position yields its last statement's
// value only when it produces one (no trailing `;`); otherwise
// it runs as statements and evaluates to void.
if (blk.produces_value and blk.stmts.len > 0) {
for (blk.stmts[0 .. blk.stmts.len - 1]) |stmt| {
self.lowerStmt(stmt);
}
@@ -3577,12 +3614,12 @@ pub const Lowering = struct {
break :blk self.builder.emit(.{ .optional_has_value = .{ .operand = opt_val } }, .bool);
} else opt_val;
const has_else = ie.else_branch != null;
// If-else produces a value when inline OR when then-branch has a non-void type
const is_value = (ie.is_inline or self.force_block_value) and has_else;
// If-else produces a value when inline OR when in value position (force_block_value)
var is_value = (ie.is_inline or self.force_block_value) and has_else;
// Infer result type from then branch for value if-exprs
// If then_branch is null/void, try else_branch (e.g., `if cond then null else val`)
const result_type: TypeId = if (is_value) blk: {
var result_type: TypeId = if (is_value) blk: {
var t = self.inferExprType(ie.then_branch);
if ((t == .void or t == .unresolved) and ie.else_branch != null) {
t = self.inferExprType(ie.else_branch.?);
@@ -3595,6 +3632,14 @@ pub const Lowering = struct {
break :blk t;
} else .void;
// A value-position if/else whose branches yield no value (both are
// `;`-terminated / void blocks) is really a statement-if — lowering it
// as a value would build a `phi void`. Demote it.
if (is_value and result_type == .void) {
is_value = false;
result_type = .void;
}
const then_bb = self.freshBlock("if.then");
const else_bb: ?BlockId = if (has_else) self.freshBlock("if.else") else null;
const merge_params: []const TypeId = if (is_value) &.{result_type} else &.{};
@@ -8813,11 +8858,18 @@ pub const Lowering = struct {
var i = stack.len;
while (i > saved_len) {
i -= 1;
if (!stack[i].is_onfail) _ = self.lowerExpr(stack[i].body);
if (!stack[i].is_onfail) self.lowerCleanupBody(stack[i].body);
}
self.defer_stack.shrinkRetainingCapacity(saved_len);
}
/// Run a `defer`/`onfail` cleanup body for its side effects (void context).
/// A braced body lowers as statements (NOT as a value) so a trailing-`;`
/// last expression is fine here — cleanup bodies never yield a value.
fn lowerCleanupBody(self: *Lowering, body: *const Node) void {
if (body.data == .block) self.lowerBlock(body) else _ = self.lowerExpr(body);
}
/// Emit cleanups from `base`..current in reverse order on an ERROR exit
/// (raise / try-propagation): BOTH `defer` and `onfail` entries run,
/// interleaved in reverse declaration order. `err_tag` is the in-flight
@@ -8838,14 +8890,14 @@ pub const Lowering = struct {
const saved = self.scope;
self.scope = &ofscope;
ofscope.put(name, .{ .ref = err_tag, .ty = tag_ty, .is_alloca = false });
_ = self.lowerExpr(entry.body);
self.lowerCleanupBody(entry.body);
self.scope = saved;
ofscope.deinit();
} else {
_ = self.lowerExpr(entry.body);
self.lowerCleanupBody(entry.body);
}
} else {
_ = self.lowerExpr(entry.body);
self.lowerCleanupBody(entry.body);
}
}
}
@@ -14579,8 +14631,9 @@ pub const Lowering = struct {
// success type (ERR E1.4c / E1.5).
.return_stmt, .raise_stmt, .break_expr, .continue_expr => .noreturn,
.block => |blk| {
// Block type is the type of the last expression / statement.
if (blk.stmts.len > 0) {
// A block's type is its last expression's type only when it
// produces a value (no trailing `;`); otherwise it is void.
if (blk.produces_value and blk.stmts.len > 0) {
return self.inferExprType(blk.stmts[blk.stmts.len - 1]);
}
return .void;

View File

@@ -37,6 +37,18 @@ pub const Parser = struct {
/// already exiting, so there is nothing to propagate to. E1.7 extends this
/// to the full {try, return, break, continue} set.
in_defer_body: bool = false,
/// Set by `expectSemicolonAfter` for the statement just parsed: true when the
/// statement is a trailing value (an expression / block-form with NO `;`),
/// false when a `;` terminated it (value discarded). `parseBlock` reads it
/// after the last statement to set `Block.produces_value`. Reset at the top
/// of `parseStmt` so non-expression statements (decls, return, …) leave it
/// false.
last_stmt_produces_value: bool = false,
/// Span of the `;` that discarded the just-parsed statement's value, when
/// that statement was an expression terminated by `;` (so the value could
/// have been kept by dropping it). Null when the statement kept its value or
/// wasn't a value expression. Read by `parseBlock` into `Block.discarded_semi`.
last_stmt_semi_loc: ?ast.Span = null,
pub fn init(allocator: std.mem.Allocator, source: [:0]const u8) Parser {
var lexer = Lexer.init(source);
@@ -1401,7 +1413,7 @@ pub const Parser = struct {
try self.expect(.semicolon);
const stmts = try self.allocator.alloc(*Node, 1);
stmts[0] = expr_node;
const block_node = try self.createNode(expr_node.span.start, .{ .block = .{ .stmts = stmts } });
const block_node = try self.createNode(expr_node.span.start, .{ .block = .{ .stmts = stmts, .produces_value = true } });
try members.append(self.allocator, .{ .method = .{
.name = member_name,
.params = &.{},
@@ -1500,7 +1512,7 @@ pub const Parser = struct {
try self.expect(.semicolon);
const stmts = try self.allocator.alloc(*Node, 1);
stmts[0] = expr;
body_node = try self.createNode(expr.span.start, .{ .block = .{ .stmts = stmts } });
body_node = try self.createNode(expr.span.start, .{ .block = .{ .stmts = stmts, .produces_value = true } });
} else {
try self.expect(.semicolon);
}
@@ -1911,7 +1923,7 @@ pub const Parser = struct {
const stmts = try self.allocator.alloc(*Node, 1);
stmts[0] = expr;
const block_start = expr.span.start;
const block = try self.createNode(block_start, .{ .block = .{ .stmts = stmts } });
const block = try self.createNode(block_start, .{ .block = .{ .stmts = stmts, .produces_value = true } });
break :blk block;
} else try self.parseBlock();
@@ -1932,33 +1944,54 @@ pub const Parser = struct {
const start = self.current.loc.start;
try self.expect(.l_brace);
var stmts = std.ArrayList(*Node).empty;
var produces_value = false;
var discarded_semi: ?ast.Span = null;
while (self.current.tag != .r_brace and self.current.tag != .eof) {
const stmt = try self.parseStmt();
try stmts.append(self.allocator, stmt);
// The block's value-ness is its LAST statement's value-ness.
produces_value = self.last_stmt_produces_value;
// A discarding `;` is only meaningful when the block has no value.
discarded_semi = if (produces_value) null else self.last_stmt_semi_loc;
}
try self.expect(.r_brace);
return try self.createNode(start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } });
return try self.createNode(start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator), .produces_value = produces_value, .discarded_semi = discarded_semi } });
}
/// Block-form if/match/while/bare blocks don't require trailing semicolon.
/// Consume the terminator after an expression/block-form statement and
/// record whether the statement is a trailing VALUE (no `;`) or a discarded
/// statement (`;`). A trailing `;` always discards; otherwise the statement
/// is the (potential) block value — allowed when it is block-form (where
/// `;` is optional) or when it is the last thing before `}`.
fn expectSemicolonAfter(self: *Parser, expr: *Node) anyerror!void {
const needs_semi = switch (expr.data) {
.if_expr => |ie| ie.is_inline,
.match_expr => false,
.while_expr => false,
.for_expr => false,
.block => false,
.jni_env_block => false,
else => true,
const block_form = switch (expr.data) {
.if_expr => |ie| !ie.is_inline,
.match_expr, .while_expr, .for_expr, .block, .jni_env_block => true,
else => false,
};
if (needs_semi) {
try self.expect(.semicolon);
} else if (self.current.tag == .semicolon) {
self.advance(); // consume optional ;
if (self.current.tag == .semicolon) {
self.last_stmt_semi_loc = .{ .start = self.current.loc.start, .end = self.current.loc.end };
self.advance(); // explicit terminator → value discarded
self.last_stmt_produces_value = false;
} else if (block_form or self.current.tag == .r_brace) {
// Block-form statements never require `;`; a plain expression may
// omit it only as the trailing value before `}`. Either way this
// statement is the block's value (and discards nothing — clear any
// stale semi location from a nested statement).
self.last_stmt_produces_value = true;
self.last_stmt_semi_loc = null;
} else {
try self.expect(.semicolon); // emits "expected ;"
}
}
pub fn parseStmt(self: *Parser) anyerror!*Node {
// Default: a statement discards its value unless `expectSemicolonAfter`
// marks it a trailing value (no `;`). Non-expression statements (decls,
// return/raise, break/continue, defer/onfail) never set it, so they
// correctly leave the enclosing block value-less.
self.last_stmt_produces_value = false;
self.last_stmt_semi_loc = null;
// Check if this is a declaration (IDENT followed by ::, :=, or : type)
if (self.isIdentLike()) {
const saved_lexer = self.lexer;
@@ -3138,7 +3171,10 @@ pub const Parser = struct {
self.advance();
const expr = try self.parseExpr();
try self.expect(.semicolon);
const body = try self.createNode(arm_start, .{ .block = .{ .stmts = try self.allocator.dupe(*Node, &.{expr}) } });
// Arm bodies are value-producing regardless of the arm `;` (the
// `;` is an arm terminator, not a value-discard — match arms are
// exempt from the block trailing-`;` rule).
const body = try self.createNode(arm_start, .{ .block = .{ .stmts = try self.allocator.dupe(*Node, &.{expr}), .produces_value = true } });
try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture });
} else {
const stmts_start = self.current.loc.start;
@@ -3146,7 +3182,10 @@ pub const Parser = struct {
while (self.current.tag != .kw_case and self.current.tag != .kw_else and self.current.tag != .r_brace and self.current.tag != .eof) {
try stmts.append(self.allocator, try self.parseStmt());
}
const body = try self.createNode(stmts_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } });
// Arm exempt from the trailing-`;` rule (see above); the wrapper
// yields its last statement's value — which, for a braced-block
// arm body, still respects that inner block's own flag.
const body = try self.createNode(stmts_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator), .produces_value = true } });
try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false, .capture = capture });
}
}
@@ -3159,7 +3198,7 @@ pub const Parser = struct {
while (self.current.tag != .r_brace and self.current.tag != .eof) {
try stmts.append(self.allocator, try self.parseStmt());
}
const body = try self.createNode(else_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } });
const body = try self.createNode(else_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator), .produces_value = true } });
try arms.append(self.allocator, .{ .pattern = null, .body = body, .is_break = false });
}
try self.expect(.r_brace);
@@ -3692,6 +3731,42 @@ test "parse minimal main" {
try std.testing.expectEqual(@as(i64, 42), body.data.block.stmts[0].data.int_literal.value);
}
test "block value: trailing expr without `;` produces a value" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
var parser = Parser.init(arena.allocator(), "f :: () -> s32 { 42 }");
const root = try parser.parse();
const body = root.data.root.decls[0].data.fn_decl.body;
try std.testing.expect(body.data.block.produces_value);
try std.testing.expect(body.data.block.discarded_semi == null);
}
test "block value: trailing `;` discards the value" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
var parser = Parser.init(arena.allocator(), "f :: () -> s32 { 42; }");
const root = try parser.parse();
const body = root.data.root.decls[0].data.fn_decl.body;
try std.testing.expect(!body.data.block.produces_value);
try std.testing.expect(body.data.block.discarded_semi != null);
}
test "block value: match arms are exempt (keep `;`, still produce a value)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
var parser = Parser.init(arena.allocator(), "f :: (n: s32) -> s32 { if n == { case 1: 5; else: 0; } }");
const root = try parser.parse();
const body = root.data.root.decls[0].data.fn_decl.body;
// Function body's trailing match has no `;` → the body is a value.
try std.testing.expect(body.data.block.produces_value);
const match = body.data.block.stmts[0];
try std.testing.expect(match.data == .match_expr);
// Each arm body (built with `;`) is still value-producing (exempt).
for (match.data.match_expr.arms) |arm| {
try std.testing.expect(arm.body.data.block.produces_value);
}
}
test "parse #run const binding" {
const source = "x :: #run compute(5);";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);