lang: multi-iterable for loops — drop ':', add '..=', open ranges, arrow bodies

The for header is now a comma-separated list of iterables with a
positional capture group and no ':' separator:

    for xs (x) { }                    // collection
    for 0..n (i) { }                  // range (end exclusive)
    for 1..=5 (a) { }                 // ..= inclusive end
    for xs, 0.. (x, i) { }            // index idiom (replaces (x, i))
    for xs, ys (x, y) { }             // parallel (zip) iteration
    for xs (x) => sum += x;           // arrow body (full statement)

First-iterable-wins: the first iterable's length drives the loop and
must be bounded; the other positions follow by their own cursors (a
non-first range's end is not consulted or evaluated; a shorter
non-first collection is read past its length on mismatch). The old
single-iterable index capture is replaced by the trailing open range.

Capture/call disambiguation is positional: the paren group immediately
before '{' or '=>' is the capture, every earlier top-level group is a
call. 'for zip(a, b) (x, y)' calls zip; 'for f(n) { }' reads (n) as
the capture and errors with a parenthesize/add-capture hint. The old
':' form errors with a migration hint.

Lowering is unified across forms: one cursor slot per position (ranges
start at their start, collections at 0), all advanced together, the
first position's bound terminating. inline for keeps the single
bounded comptime range.

Migrated the full corpus (examples, library modules, issue repros,
in-source test strings). New coverage: examples/0050 (the full feature
surface) and examples/1149-1155 (seven diagnostic faces). specs.md For
Loop section + grammar rewritten; readme teaser updated.
This commit is contained in:
agra
2026-06-10 20:30:55 +03:00
parent c640e88513
commit 116af2359e
75 changed files with 701 additions and 391 deletions

View File

@@ -24,10 +24,13 @@ pub const Parser = struct {
/// a `.compiler_expr` body so the per-method `#compiler` suffix can be
/// omitted.
struct_default_compiler: bool = false,
/// When true, parsePostfix does not treat a trailing `(` as a call. Set
/// while parsing a `for` range bound so `for 0..N (i)` reads `N` as the
/// end and leaves `(i)` for the cursor rather than parsing `N(i)`.
suppress_call: bool = false,
/// When true (set while parsing a `for` header's iterable expressions),
/// a top-level `(` group immediately followed by `{` or `=>` is the loop
/// CAPTURE, never call arguments — `for xs (x) { }` reads `(x)` as the
/// capture, while `for zip(a, b) (x, y) { }` still calls `zip(a, b)`
/// because that group is not the trailing one. Cleared inside any nested
/// bracket/paren/argument context.
in_for_header: bool = false,
/// When true (set while parsing an `onfail` body), a `raise` statement is
/// rejected — an error during cleanup has no propagation target. E1.7
/// extends this to the full {try, return, break, continue} set.
@@ -2477,9 +2480,13 @@ pub const Parser = struct {
var expr = try self.parsePrimary();
while (true) {
if (self.current.tag == .l_paren and !self.suppress_call) {
// Call
if (self.current.tag == .l_paren and !self.parenGroupIsForCapture()) {
// Call. Argument expressions are an ordinary nested context —
// the for-header capture rule does not apply inside them.
self.advance();
const saved_hdr_args = self.in_for_header;
self.in_for_header = false;
defer self.in_for_header = saved_hdr_args;
var args = std.ArrayList(*Node).empty;
while (self.current.tag != .r_paren and self.current.tag != .eof) {
if (args.items.len > 0) {
@@ -2564,10 +2571,10 @@ pub const Parser = struct {
} else if (self.current.tag == .l_bracket) {
// Index or slice access: expr[expr] or expr[start..end]
self.advance();
// Inside `[...]`, calls parse normally even within a range bound.
const saved_suppress_idx = self.suppress_call;
self.suppress_call = false;
defer self.suppress_call = saved_suppress_idx;
// Inside `[...]`, calls parse normally even within a for header.
const saved_hdr_idx = self.in_for_header;
self.in_for_header = false;
defer self.in_for_header = saved_hdr_idx;
if (self.current.tag == .dot_dot) {
// [..end]
self.advance();
@@ -2794,11 +2801,11 @@ pub const Parser = struct {
}
self.advance(); // skip '('
// A `(` here opens a grouping/tuple, not a `for` range bound, so
// calls inside it parse normally even within a range bound.
const saved_suppress_grp = self.suppress_call;
self.suppress_call = false;
defer self.suppress_call = saved_suppress_grp;
// A `(` here opens a grouping/tuple, so calls inside it parse
// normally even within a for header.
const saved_hdr_grp = self.in_for_header;
self.in_for_header = false;
defer self.in_for_header = saved_hdr_grp;
// Check for named tuple: (name: expr, ...)
if (self.current.tag == .identifier and self.peekNext() == .colon) {
@@ -3163,79 +3170,94 @@ pub const Parser = struct {
const start = self.current.loc.start;
self.advance(); // skip 'for'
const iterable = try self.parseExpr();
var iterables = std.ArrayList(ast.ForIterable).empty;
var captures = std.ArrayList(ast.ForCapture).empty;
// Range form: `for start..end (i)? { }`. The `..` only appears here for a
// range (slice ranges live inside `[]`), so it's unambiguous.
var range_end: ?*Node = null;
if (self.current.tag == .dot_dot) {
self.advance(); // skip '..'
const saved_suppress = self.suppress_call;
self.suppress_call = true;
range_end = try self.parseExpr();
self.suppress_call = saved_suppress;
// Header: comma-separated iterables, each a collection expression or
// a range (`a..b`, `a..=b`, open `a..`). Top-level trailing call
// parens are read as the capture (see parenGroupIsForCapture).
const saved_hdr = self.in_for_header;
self.in_for_header = true;
while (true) {
const expr = try self.parseExpr();
var it = ast.ForIterable{ .expr = expr };
if (self.current.tag == .dot_dot or self.current.tag == .dot_dot_eq) {
it.is_range = true;
it.inclusive = self.current.tag == .dot_dot_eq;
self.advance();
// End expression — absent for the open range `a..`, i.e. when
// the header continues (`,`), the body starts (`{` / `=>`),
// or the capture group follows.
const open = switch (self.current.tag) {
.comma, .l_brace, .fat_arrow => true,
.l_paren => self.parenGroupIsForCapture(),
else => false,
};
if (open) {
if (it.inclusive) return self.fail("'..=' requires an end expression — the open form is 'a..'");
} else {
it.range_end = try self.parseExpr();
}
}
try iterables.append(self.allocator, it);
if (self.current.tag != .comma) break;
self.advance();
}
self.in_for_header = saved_hdr;
// Migration aid for the pre-multi-iterable syntax.
if (self.current.tag == .colon) {
return self.fail("for-loop syntax: the ':' before the capture was removed — write `for xs (x) { }` (index via `for xs, 0.. (x, i)`)");
}
var capture_name: []const u8 = "";
var capture_span: ?ast.Span = null;
var capture_is_raw = false;
var index_name: ?[]const u8 = null;
var index_span: ?ast.Span = null;
var index_is_raw = false;
var capture_by_ref = false;
if (range_end != null) {
// Optional cursor, introduced by `:` for symmetry with the
// collection form: `for 0..N: (i)` (or `for 0..N` with no cursor).
// The colon is required when a cursor is present.
if (self.current.tag == .colon) {
self.advance();
try self.expect(.l_paren);
if (self.current.tag != .identifier) return self.fail("expected cursor variable name");
capture_name = self.tokenSlice(self.current);
capture_span = .{ .start = self.current.loc.start, .end = self.current.loc.end };
capture_is_raw = self.current.is_raw;
self.advance();
try self.expect(.r_paren);
}
} else {
// Collection form: `: (capture, index?)`. A leading `*` on the
// capture (`(*x)`) binds it by pointer into the collection.
try self.expect(.colon);
try self.expect(.l_paren);
if (self.current.tag == .star) {
capture_by_ref = true;
self.advance();
}
if (self.current.tag != .identifier) return self.fail("expected capture variable name");
capture_name = self.tokenSlice(self.current);
capture_span = .{ .start = self.current.loc.start, .end = self.current.loc.end };
capture_is_raw = self.current.is_raw;
// Capture group: `(x)`, `(*x)`, `(a, b, ...)` — positional, one
// capture per iterable.
if (self.current.tag == .l_paren) {
self.advance();
if (self.current.tag == .comma) {
while (true) {
var cap = ast.ForCapture{ .name = "" };
if (self.current.tag == .star) {
cap.by_ref = true;
self.advance();
}
if (self.current.tag != .identifier) return self.fail("expected capture variable name (a call iterable also needs a capture: `for f(n) (x) { }`)");
cap.name = self.tokenSlice(self.current);
cap.span = .{ .start = self.current.loc.start, .end = self.current.loc.end };
cap.is_raw = self.current.is_raw;
self.advance();
if (self.current.tag != .identifier) return self.fail("expected index variable name");
index_name = self.tokenSlice(self.current);
index_span = .{ .start = self.current.loc.start, .end = self.current.loc.end };
index_is_raw = self.current.is_raw;
try captures.append(self.allocator, cap);
if (self.current.tag != .comma) break;
self.advance();
}
try self.expect(.r_paren);
}
const body = try self.parseBlock();
if (captures.items.len != 0 and captures.items.len != iterables.items.len) {
return self.fail("for capture count must match the iterable count — one capture per iterable");
}
if (iterables.items[0].is_range and iterables.items[0].range_end == null) {
return self.fail("the first iterable must have a bounded length (it drives the loop) — an open range 'a..' may only follow it");
}
for (iterables.items, 0..) |it, i| {
if (it.is_range and i < captures.items.len and captures.items[i].by_ref) {
return self.fail("a range element cannot be captured by reference");
}
}
// Body: a block, or the arrow form `=> stmt` (a full statement, so
// assignments like `=> s += x;` work; parseStmt owns the `;`).
var body: *Node = undefined;
if (self.current.tag == .fat_arrow) {
self.advance();
body = try self.parseStmt();
} else {
body = try self.parseBlock();
}
return try self.createNode(start, .{ .for_expr = .{
.iterable = iterable,
.iterables = try iterables.toOwnedSlice(self.allocator),
.captures = try captures.toOwnedSlice(self.allocator),
.body = body,
.capture_name = capture_name,
.capture_span = capture_span,
.capture_is_raw = capture_is_raw,
.index_name = index_name,
.index_span = index_span,
.index_is_raw = index_is_raw,
.range_end = range_end,
.capture_by_ref = capture_by_ref,
} });
}
@@ -3780,6 +3802,35 @@ pub const Parser = struct {
return tok.tag;
}
/// With `current` on `(`: the tag of the token right after the matching
/// `)`, scanning a throwaway copy of the lexer. Only parens are counted —
/// they must balance lexically regardless of what nests inside.
fn tagAfterParenGroup(self: *Parser) Tag {
var lex = self.lexer;
var depth: u32 = 1;
while (true) {
const tok = lex.next();
switch (tok.tag) {
.l_paren => depth += 1,
.r_paren => {
depth -= 1;
if (depth == 0) return lex.next().tag;
},
.eof => return .eof,
else => {},
}
}
}
/// For-header capture rule: a top-level `(` group immediately followed by
/// `{` or `=>` is the loop capture, so parsePostfix must not consume it
/// as call arguments.
fn parenGroupIsForCapture(self: *Parser) bool {
if (!self.in_for_header) return false;
const after = self.tagAfterParenGroup();
return after == .l_brace or after == .fat_arrow;
}
fn advance(self: *Parser) void {
self.prev_end = self.current.loc.end;
self.current = self.lexer.next();
@@ -4541,7 +4592,7 @@ test "E1.7 try rejected inside an onfail body" {
test "E1.7 break rejected inside a defer body (transitive through a loop)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
var parser = Parser.init(arena.allocator(), "f :: () { defer { for 0..1: (i) { break; } } }");
var parser = Parser.init(arena.allocator(), "f :: () { defer { for 0..1 (i) { break; } } }");
try std.testing.expectError(error.ParseError, parser.parse());
}