From 27c88d4d26ca82396fc80f0e25ea91cd9e8f23ec Mon Sep 17 00:00:00 2001 From: agra Date: Fri, 29 May 2026 21:36:17 +0300 Subject: [PATCH] lang F1: range-based for + inline-for unroll over packs Add range loop syntax: - runtime for start..end (i) { } counting loop, cursor optional, end exclusive - comptime inline for start..end (i) { } comptime-unrolled body The inline form binds the cursor as an int_val comptime constant per iteration, so xs[i] over a heterogeneous pack substitutes the concrete per-position element -- the canonical's pack-iteration vehicle (inline for 0..sources.len (i) { sources[i].addListener(...) }). - AST: ForExpr.range_end, ForExpr.is_inline - parser: parseForExpr range vs collection form; suppress_call flag so N (i) is not read as a call N(i) while parsing a range bound - lower: lowerRuntimeRangeFor / lowerInlineRangeFor; evalComptimeInt; comptimeIndexOf extends pack-index resolution beyond int literals Revises spec's inline for i in 0..N to the no-in, range-first, paren-cursor form. Regression: examples/200-for-range.sx. --- examples/200-for-range.sx | 39 +++++++ specs.md | 22 +++- src/ast.zig | 7 ++ src/ir/lower.zig | 177 ++++++++++++++++++++++++++++-- src/parser.zig | 78 ++++++++++--- tests/expected/200-for-range.exit | 1 + tests/expected/200-for-range.txt | 10 ++ 7 files changed, 305 insertions(+), 29 deletions(-) create mode 100644 examples/200-for-range.sx create mode 100644 tests/expected/200-for-range.exit create mode 100644 tests/expected/200-for-range.txt diff --git a/examples/200-for-range.sx b/examples/200-for-range.sx new file mode 100644 index 0000000..369eeb2 --- /dev/null +++ b/examples/200-for-range.sx @@ -0,0 +1,39 @@ +// Range-based for loops: `for start..end (i) { }` (cursor optional, `end` +// exclusive) is a runtime counting loop; `inline for start..end (i) { }` +// is comptime-unrolled — the cursor is a compile-time constant each +// iteration, so `xs[i]` over a heterogeneous pack substitutes the concrete +// per-position element (this is what drives pack iteration). + +#import "modules/std.sx"; + +Show :: protocol { + show :: () -> string; +} +A :: struct { x: s64; } +B :: struct { s: string; } +impl Show for A { show :: (self: *A) -> string => "A"; } +impl Show for B { show :: (self: *B) -> string => "B"; } + +// Comptime-unrolled iteration over a pack; cursor `i` indexes the pack. +each :: (..xs: Show) -> void { + inline for 0..xs.len (i) { + print("[{}]={}\n", i, xs[i].show()); + } +} + +main :: () -> s32 { + // Runtime range, cursor used. + for 0..3 (i) { print("i={}\n", i); } + + // Runtime range, no cursor — body runs `end - start` times. + n := 0; + for 0..5 { n = n + 1; } + print("n={}\n", n); + + // Non-zero start. + for 2..5 (j) { print("j={}\n", j); } + + // Inline unroll over a heterogeneous pack. + each(A.{ x = 1 }, B.{ s = "hi" }, A.{ x = 3 }); + 0; +} diff --git a/specs.md b/specs.md index f9fffcc..d6654c8 100644 --- a/specs.md +++ b/specs.md @@ -1006,8 +1006,7 @@ may do, regardless of the concrete arg types at any particular call site. |---|---|---| | Length | `xs.len` | comptime int (field-style, not `len(xs)`) | | Index | `xs[i]` | i-th element; `i` must be comptime | -| Comptime unroll (index) | `inline for i in 0..xs.len { ... }` | unrolled loop; not `#for` | -| Comptime unroll (element) | `inline for x in xs { ... }` | desugars to index form; `x`'s type varies per iteration | +| Comptime unroll (index) | `inline for 0..xs.len (i) { ... }` | unrolled loop; cursor `i` is a comptime constant per iteration; not `#for` | | Projection | `xs.field` | see "Pack projection" | | Spread → call args | `..xs` / `..xs.field` | expands to N positional args | | Spread → tuple value | `(..xs)` / `(..xs.field)` | materializes a tuple | @@ -1057,7 +1056,7 @@ value-requiring position is a compile error with a tailored suggestion: - storing/binding it (`let x = xs;`, `self.f = xs;`) → suggest `(..xs)`; - passing to a non-pack-taking call (`f(xs)`) → suggest `..xs`; - returning it (`return xs;`) → suggest a tuple return with `(..xs)`; -- iterating at runtime (`for x in xs`, `xs[runtime_i]`) → suggest `inline for`. +- iterating at runtime (`for xs : (x)`, `xs[runtime_i]`) → suggest `inline for`. #### Storage and protocol conformance @@ -1089,7 +1088,7 @@ map :: (mapper: Closure(..sources.T) -> $R, ..sources: ValueListenable) c.own_allocator = context.allocator; c.mapper = mapper; c.sources = (..sources); // pack-to-tuple materialization - inline for i in 0..sources.len { // comptime unroll over the pack + inline for 0..sources.len (i) { // comptime unroll over the pack sources[i].addListener((_) => c.recompute()); } c.value = mapper(..sources.value); // pack spread + projection in a call @@ -1510,6 +1509,21 @@ while i < 10 { ``` ### For Loop + +#### Range form +```sx +for start..end (i) { } // counting loop, cursor `i` (s64), `end` exclusive +for start..end { } // no cursor — body runs `end - start` times +inline for start..end (i) { } // comptime-unrolled; `i` is a comptime constant per iteration +``` +`start` and `end` are `s64` expressions; the loop counts `start, start+1, …, end-1`. +The cursor parens are optional — omit them when the body doesn't need the index. +The `inline` variant requires comptime-known bounds and unrolls the body once per +value, binding the cursor as a compile-time constant (so it can index a pack: +`inline for 0..xs.len (i) { xs[i].m() }`). `break;` / `continue;` work in the +runtime form. + +#### Collection form ```sx for iterable: (elem) { } // element alias (no copy) for iterable: (elem, ix) { } // element + index diff --git a/src/ast.zig b/src/ast.zig index 51f5fc8..0323c19 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -482,6 +482,13 @@ pub const ForExpr = struct { body: *Node, capture_name: []const u8, index_name: ?[]const u8 = null, + /// Range form `for start..end (i) { }`: `iterable` is the start, `range_end` + /// the (exclusive) end. Null for the iterate-a-collection form + /// (`for coll : (x) { }`). For the range form `capture_name` is the cursor + /// (empty when omitted, `for 0..N { }`). + range_end: ?*Node = null, + /// `inline for` — comptime-unrolled (range bounds must be comptime). + is_inline: bool = false, }; pub const SpreadExpr = struct { diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 5a9673d..51ab210 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -3201,6 +3201,11 @@ pub const Lowering = struct { } fn lowerFor(self: *Lowering, fe: *const ast.ForExpr) Ref { + if (fe.range_end) |end_node| { + if (fe.is_inline) return self.lowerInlineRangeFor(fe, end_node); + return self.lowerRuntimeRangeFor(fe, end_node); + } + // Lower iterable const iterable = self.lowerExpr(fe.iterable); @@ -3277,6 +3282,145 @@ pub const Lowering = struct { return self.builder.constInt(0, .void); } + /// Runtime counting loop `for start..end (i) { }` — `i` (optional) is the + /// cursor, `end` is exclusive. Lowers to the same header/inc/exit shape as + /// the collection form, minus the element fetch. + fn lowerRuntimeRangeFor(self: *Lowering, fe: *const ast.ForExpr, end_node: *Node) Ref { + const start = self.lowerExpr(fe.iterable); + const end = self.lowerExpr(end_node); + + const idx_slot = self.builder.alloca(.s64); + self.builder.store(idx_slot, start); + + const header_bb = self.freshBlock("for.hdr"); + const body_bb = self.freshBlock("for.body"); + const inc_bb = self.freshBlock("for.inc"); + const exit_bb = self.freshBlock("for.exit"); + + self.builder.br(header_bb, &.{}); + + self.builder.switchToBlock(header_bb); + const idx_val = self.builder.load(idx_slot, .s64); + const cmp = self.builder.cmpLt(idx_val, end); + self.builder.condBr(cmp, body_bb, &.{}, exit_bb, &.{}); + + self.builder.switchToBlock(body_bb); + var body_scope = Scope.init(self.alloc, self.scope); + const old_scope = self.scope; + self.scope = &body_scope; + if (fe.capture_name.len > 0) { + body_scope.put(fe.capture_name, .{ .ref = idx_val, .ty = .s64, .is_alloca = false }); + } + + const old_break = self.break_target; + const old_continue = self.continue_target; + self.break_target = exit_bb; + self.continue_target = inc_bb; + + self.lowerBlock(fe.body); + + self.break_target = old_break; + self.continue_target = old_continue; + self.scope = old_scope; + body_scope.deinit(); + + if (!self.currentBlockHasTerminator()) { + self.builder.br(inc_bb, &.{}); + } + + self.builder.switchToBlock(inc_bb); + { + const cur_idx = self.builder.load(idx_slot, .s64); + const one = self.builder.constInt(1, .s64); + const next_idx = self.builder.add(cur_idx, one, .s64); + self.builder.store(idx_slot, next_idx); + self.builder.br(header_bb, &.{}); + } + + self.builder.switchToBlock(exit_bb); + return self.builder.constInt(0, .void); + } + + /// Comptime-unrolled `inline for start..end (i) { }`. `start`/`end` must be + /// comptime-known. The body is lowered `end - start` times with the cursor + /// bound as an `int_val` comptime constant, so `xs[i]` over a pack + /// substitutes the concrete per-position argument each iteration. + fn lowerInlineRangeFor(self: *Lowering, fe: *const ast.ForExpr, end_node: *Node) Ref { + const start = self.evalComptimeInt(fe.iterable) orelse { + if (self.diagnostics) |d| d.addFmt(.err, fe.iterable.span, "inline for: range start is not a compile-time integer", .{}); + return self.builder.constInt(0, .void); + }; + const end = self.evalComptimeInt(end_node) orelse { + if (self.diagnostics) |d| d.addFmt(.err, end_node.span, "inline for: range end is not a compile-time integer", .{}); + return self.builder.constInt(0, .void); + }; + + var i: i64 = start; + while (i < end) : (i += 1) { + var body_scope = Scope.init(self.alloc, self.scope); + const old_scope = self.scope; + self.scope = &body_scope; + + // Bind the cursor both as a runtime value (constInt, for uses like + // `print(i)`) and as a comptime constant (for `xs[i]` substitution). + var had_prev = false; + var prev: ComptimeValue = undefined; + if (fe.capture_name.len > 0) { + body_scope.put(fe.capture_name, .{ .ref = self.builder.constInt(i, .s64), .ty = .s64, .is_alloca = false }); + if (self.comptime_constants.get(fe.capture_name)) |p| { + had_prev = true; + prev = p; + } + self.comptime_constants.put(fe.capture_name, .{ .int_val = i }) catch {}; + } + + self.lowerBlock(fe.body); + + if (fe.capture_name.len > 0) { + if (had_prev) { + self.comptime_constants.put(fe.capture_name, prev) catch {}; + } else { + _ = self.comptime_constants.remove(fe.capture_name); + } + } + + self.scope = old_scope; + body_scope.deinit(); + + if (self.currentBlockHasTerminator()) break; + } + + return self.builder.constInt(0, .void); + } + + /// Evaluate a node to a comptime integer: literal, comptime-constant + /// identifier, or `.len` (resolves to the monomorphised arity). + fn evalComptimeInt(self: *Lowering, node: *const Node) ?i64 { + switch (node.data) { + .int_literal => |lit| return lit.value, + .identifier => |id| { + if (self.comptime_constants.get(id.name)) |cv| { + switch (cv) { + .int_val => |iv| return iv, + else => return null, + } + } + return null; + }, + .field_access => |fa| { + if (self.pack_param_count) |ppc| { + if (fa.object.data == .identifier and std.mem.eql(u8, fa.field, "len")) { + if (ppc.get(fa.object.data.identifier.name)) |n| { + return @as(i64, @intCast(n)); + } + } + } + return null; + }, + else => return null, + } + } + fn lowerMatch(self: *Lowering, me: *const ast.MatchExpr) Ref { // inline if match: evaluate at compile time, only lower the matching arm if (me.is_comptime) { @@ -4664,14 +4808,32 @@ pub const Lowering = struct { const pan = self.pack_arg_nodes orelse return null; if (ie.object.data != .identifier) return null; const arg_nodes = pan.get(ie.object.data.identifier.name) orelse return null; - if (ie.index.data != .int_literal) return null; - const raw: i64 = ie.index.data.int_literal.value; + const raw: i64 = self.comptimeIndexOf(ie.index) orelse return null; if (raw < 0) return null; const i: usize = @intCast(raw); if (i >= arg_nodes.len) return null; return arg_nodes[i]; } + /// Resolve an index expression to a comptime-known integer: a literal, + /// or an identifier bound to an `int_val` in `comptime_constants` (e.g. + /// the cursor of an `inline for 0..N (i)` unroll). Otherwise null. + fn comptimeIndexOf(self: *Lowering, index: *const Node) ?i64 { + switch (index.data) { + .int_literal => |lit| return lit.value, + .identifier => |id| { + if (self.comptime_constants.get(id.name)) |cv| { + switch (cv) { + .int_val => |iv| return iv, + else => return null, + } + } + return null; + }, + else => return null, + } + } + fn lowerSliceExpr(self: *Lowering, se: *const ast.SliceExpr) Ref { const obj = self.lowerExpr(se.object); const lo = if (se.start) |s| self.lowerExpr(s) else self.builder.constInt(0, .s64); @@ -12882,12 +13044,13 @@ pub const Lowering = struct { // would otherwise lose the type when the mono's // scope isn't set up yet (generic-`$R` pre-inference). if (self.pack_arg_types) |pat| { - if (ie.object.data == .identifier and ie.index.data == .int_literal) { + if (ie.object.data == .identifier) { if (pat.get(ie.object.data.identifier.name)) |arg_tys| { - const raw: i64 = ie.index.data.int_literal.value; - if (raw >= 0) { - const i: usize = @intCast(raw); - if (i < arg_tys.len) return arg_tys[i]; + if (self.comptimeIndexOf(ie.index)) |raw| { + if (raw >= 0) { + const i: usize = @intCast(raw); + if (i < arg_tys.len) return arg_tys[i]; + } } } } diff --git a/src/parser.zig b/src/parser.zig index f0e2193..a8f3ccd 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -23,6 +23,10 @@ pub const Parser = struct { /// a `.compiler_expr` body so the per-method `#compiler` suffix can be /// omitted. struct_default_compiler: bool = false, + /// When true, parsePostfix does not treat a trailing `(` as a call. Set + /// while parsing a `for` range bound so `for 0..N (i)` reads `N` as the + /// end and leaves `(i)` for the cursor rather than parsing `N(i)`. + suppress_call: bool = false, pub fn init(allocator: std.mem.Allocator, source: [:0]const u8) Parser { var lexer = Lexer.init(source); @@ -1995,6 +1999,13 @@ pub const Parser = struct { try self.expectSemicolonAfter(expr); return expr; } + if (self.peekNext() == .kw_for) { + self.advance(); // skip 'inline' + const expr = try self.parseForExpr(); + expr.data.for_expr.is_inline = true; + try self.expectSemicolonAfter(expr); + return expr; + } } // Block-form if/while/for as statements — parse directly to prevent @@ -2187,7 +2198,7 @@ pub const Parser = struct { var expr = try self.parsePrimary(); while (true) { - if (self.current.tag == .l_paren) { + if (self.current.tag == .l_paren and !self.suppress_call) { // Call self.advance(); var args = std.ArrayList(*Node).empty; @@ -2274,6 +2285,10 @@ pub const Parser = struct { } else if (self.current.tag == .l_bracket) { // Index or slice access: expr[expr] or expr[start..end] self.advance(); + // Inside `[...]`, calls parse normally even within a range bound. + const saved_suppress_idx = self.suppress_call; + self.suppress_call = false; + defer self.suppress_call = saved_suppress_idx; if (self.current.tag == .dot_dot) { // [..end] self.advance(); @@ -2458,6 +2473,12 @@ pub const Parser = struct { } self.advance(); // skip '(' + // A `(` here opens a grouping/tuple, not a `for` range bound, so + // calls inside it parse normally even within a range bound. + const saved_suppress_grp = self.suppress_call; + self.suppress_call = false; + defer self.suppress_call = saved_suppress_grp; + // Check for named tuple: (name: expr, ...) if (self.current.tag == .identifier and self.peekNext() == .colon) { return self.parseTupleLiteralNamed(start); @@ -2803,25 +2824,45 @@ pub const Parser = struct { const iterable = try self.parseExpr(); - // Expect ': (' capture clause - try self.expect(.colon); - try self.expect(.l_paren); - - // Capture variable name - if (self.current.tag != .identifier) return self.fail("expected capture variable name"); - const capture_name = self.tokenSlice(self.current); - self.advance(); - - // Optional ', index_name' - var index_name: ?[]const u8 = null; - if (self.current.tag == .comma) { - self.advance(); - if (self.current.tag != .identifier) return self.fail("expected index variable name"); - index_name = self.tokenSlice(self.current); - self.advance(); + // Range form: `for start..end (i)? { }`. The `..` only appears here for a + // range (slice ranges live inside `[]`), so it's unambiguous. + var range_end: ?*Node = null; + if (self.current.tag == .dot_dot) { + self.advance(); // skip '..' + const saved_suppress = self.suppress_call; + self.suppress_call = true; + range_end = try self.parseExpr(); + self.suppress_call = saved_suppress; + } + + var capture_name: []const u8 = ""; + var index_name: ?[]const u8 = null; + + if (range_end != null) { + // Range capture is the optional cursor: `(i)` or nothing. + if (self.current.tag == .l_paren) { + self.advance(); + if (self.current.tag != .identifier) return self.fail("expected cursor variable name"); + capture_name = self.tokenSlice(self.current); + self.advance(); + try self.expect(.r_paren); + } + } else { + // Collection form: `: (capture, index?)`. + try self.expect(.colon); + try self.expect(.l_paren); + if (self.current.tag != .identifier) return self.fail("expected capture variable name"); + capture_name = self.tokenSlice(self.current); + self.advance(); + if (self.current.tag == .comma) { + self.advance(); + if (self.current.tag != .identifier) return self.fail("expected index variable name"); + index_name = self.tokenSlice(self.current); + self.advance(); + } + try self.expect(.r_paren); } - try self.expect(.r_paren); const body = try self.parseBlock(); return try self.createNode(start, .{ .for_expr = .{ @@ -2829,6 +2870,7 @@ pub const Parser = struct { .body = body, .capture_name = capture_name, .index_name = index_name, + .range_end = range_end, } }); } diff --git a/tests/expected/200-for-range.exit b/tests/expected/200-for-range.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/200-for-range.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/200-for-range.txt b/tests/expected/200-for-range.txt new file mode 100644 index 0000000..d1b6cb9 --- /dev/null +++ b/tests/expected/200-for-range.txt @@ -0,0 +1,10 @@ +i=0 +i=1 +i=2 +n=5 +j=2 +j=3 +j=4 +[0]=A +[1]=B +[2]=A