From 185df9afb7157dadaabd5685bebe2955e43d814b Mon Sep 17 00:00:00 2001 From: agra Date: Sun, 31 May 2026 10:29:16 +0300 Subject: [PATCH] lang: for-loop by-ref element capture (for xs: (*x)) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (*x) binds x to a pointer into the collection (index_gep) instead of a per-element value copy: passing it on (e.g. to a *T param) is zero-copy and mutations write back. In a value position x auto-derefs — a binary-op operand loads the element, a pointer-typed slot keeps the pointer, and an 'if x == {...}' match derefs the pointee for its tag/payload. Arrays GEP through their storage so writes hit the original. Regression test: examples/for-by-ref-capture.sx. --- examples/for-by-ref-capture.sx | 25 +++++++++++ src/ast.zig | 3 ++ src/ir/lower.zig | 57 ++++++++++++++++++++++---- src/parser.zig | 9 +++- tests/expected/for-by-ref-capture.exit | 1 + tests/expected/for-by-ref-capture.txt | 3 ++ 6 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 examples/for-by-ref-capture.sx create mode 100644 tests/expected/for-by-ref-capture.exit create mode 100644 tests/expected/for-by-ref-capture.txt diff --git a/examples/for-by-ref-capture.sx b/examples/for-by-ref-capture.sx new file mode 100644 index 0000000..7178ebd --- /dev/null +++ b/examples/for-by-ref-capture.sx @@ -0,0 +1,25 @@ +// `for xs: (*x)` binds each element by pointer — no per-element copy. +// Mutations write back, and a pointer subject matches through the deref. +#import "modules/std.sx"; + +Shape :: enum { + circle: f32; + none; +} + +main :: () -> s32 { + // By-ref mutation writes back into the array (impossible with a value copy). + xs : [3]s64 = .[1, 2, 3]; + for xs: (*x) { x.* = x + 100; } + print("{} {} {}\n", xs[0], xs[1], xs[2]); + + // Pointer subject matches through the deref; payload reads through the ref. + shapes : [2]Shape = .[.circle(2.0), .none]; + for shapes: (*s) { + if s == { + case .circle: (r) { print("circle {}\n", r); } + case .none: { print("none\n"); } + } + } + 0; +} diff --git a/src/ast.zig b/src/ast.zig index 25c75ea..e79d64d 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -492,6 +492,9 @@ pub const ForExpr = struct { range_end: ?*Node = null, /// `inline for` — comptime-unrolled (range bounds must be comptime). is_inline: bool = false, + /// `for xs: (*x)` — bind `x` to a pointer into the collection (no per-element + /// copy) rather than a value copy of each element. + capture_by_ref: bool = false, }; pub const SpreadExpr = struct { diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 31c92da..62e9d2e 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -38,6 +38,7 @@ const Binding = struct { ref: Ref, ty: TypeId, is_alloca: bool, // true if ref is a pointer that needs load + is_ref_capture: bool = false, // `for xs: (*x)` — `ref` is `*elem`; auto-deref in value positions }; const Scope = struct { @@ -2542,6 +2543,17 @@ pub const Lowering = struct { }; } + /// If `node` names a `for xs: (*x)` by-ref capture (an `*elem`), returns + /// the element (pointee) type so a value-position use can auto-deref it. + fn refCapturePointee(self: *Lowering, node: *const Node) ?TypeId { + if (node.data != .identifier) return null; + const scope = self.scope orelse return null; + const binding = scope.lookup(node.data.identifier.name) orelse return null; + if (!binding.is_ref_capture or binding.ty.isBuiltin()) return null; + const info = self.module.types.get(binding.ty); + return if (info == .pointer) info.pointer.pointee else null; + } + fn lowerBinaryOp(self: *Lowering, bop: *const ast.BinaryOp) Ref { // Short-circuit: `a and b` → if a then b else false if (bop.op == .and_op) { @@ -2656,10 +2668,15 @@ pub const Lowering = struct { } } var lhs = self.lowerExpr(bop.lhs); + // A `for xs: (*x)` capture is a pointer; in a value position (here, an + // operand) it auto-derefs to the element. + const lhs_ref_pointee = self.refCapturePointee(bop.lhs); + if (lhs_ref_pointee) |p| lhs = self.builder.load(lhs, p); // Set target_type from LHS so enum literals on RHS resolve correctly. // When the LHS isn't statically inferable (e.g. `#objc_call(...)`), use // the lowered operand's concrete type rather than a guess. const lhs_ty = blk: { + if (lhs_ref_pointee) |p| break :blk p; const it = self.inferExprType(bop.lhs); break :blk if (it == .unresolved) self.builder.getRefType(lhs) else it; }; @@ -2675,6 +2692,8 @@ pub const Lowering = struct { } } var rhs = self.lowerExpr(bop.rhs); + const rhs_ref_pointee = self.refCapturePointee(bop.rhs); + if (rhs_ref_pointee) |p| rhs = self.builder.load(rhs, p); self.target_type = saved_tt; // Infer result type from LHS operand (covers float, bool, etc.) var ty = lhs_ty; @@ -2682,7 +2701,7 @@ pub const Lowering = struct { // Promote int×float → float (e.g., s64 * f32 → f32) // Only for scalar int LHS — don't affect vectors or structs. { - const rhs_inferred = self.inferExprType(bop.rhs); + const rhs_inferred = rhs_ref_pointee orelse self.inferExprType(bop.rhs); const l_int = isInt(ty); const r_float = (rhs_inferred == .f32 or rhs_inferred == .f64); if (l_int and r_float) { @@ -2698,7 +2717,7 @@ pub const Lowering = struct { lhs = self.builder.emit(.{ .optional_unwrap = .{ .operand = lhs } }, ty); } } - const rhs_ty = self.inferExprType(bop.rhs); + const rhs_ty = rhs_ref_pointee orelse self.inferExprType(bop.rhs); if (!rhs_ty.isBuiltin()) { const rhs_info = self.module.types.get(rhs_ty); if (rhs_info == .optional) { @@ -3365,16 +3384,26 @@ pub const Lowering = struct { // Body self.builder.switchToBlock(body_bb); - // Bind element — resolve element type from iterable + // Bind element — resolve element type from iterable. `for xs: (*x)` + // binds a pointer into the collection (no per-element copy); `(x)` + // binds a value copy. const iterable_ty = self.inferExprType(fe.iterable); const elem_ty = self.getElementType(iterable_ty); - const elem = self.builder.emit(.{ .index_get = .{ .lhs = iterable, .rhs = idx_val } }, elem_ty); + const bind_ty = if (fe.capture_by_ref) self.module.types.ptrTo(elem_ty) else elem_ty; + const elem = if (fe.capture_by_ref) blk: { + // A slice value carries its backing pointer, so GEP on it writes + // through. An array is a value — GEP needs its storage (alloca) or + // mutations would hit a copy. + const is_array = !iterable_ty.isBuiltin() and self.module.types.get(iterable_ty) == .array; + const base = if (is_array) (self.getExprAlloca(fe.iterable) orelse iterable) else iterable; + break :blk self.builder.emit(.{ .index_gep = .{ .lhs = base, .rhs = idx_val } }, bind_ty); + } else self.builder.emit(.{ .index_get = .{ .lhs = iterable, .rhs = idx_val } }, bind_ty); var body_scope = Scope.init(self.alloc, self.scope); const old_scope = self.scope; self.scope = &body_scope; - body_scope.put(fe.capture_name, .{ .ref = elem, .ty = elem_ty, .is_alloca = false }); + body_scope.put(fe.capture_name, .{ .ref = elem, .ty = bind_ty, .is_alloca = false, .is_ref_capture = fe.capture_by_ref }); // Bind index if requested if (fe.index_name) |iname| { @@ -3563,10 +3592,20 @@ pub const Lowering = struct { } const is_type_match = isTypeCategoryMatch(me); - const subject = self.lowerExpr(me.subject); - - // Detect optional subject type - const subject_ty = self.inferExprType(me.subject); + var subject = self.lowerExpr(me.subject); + var subject_ty = self.inferExprType(me.subject); + // A pointer subject (e.g. a `for xs: (*x)` element capture) — deref to + // the pointed-to union/enum so tag/payload extraction works. + if (!subject_ty.isBuiltin()) { + const sinfo = self.module.types.get(subject_ty); + if (sinfo == .pointer and !sinfo.pointer.pointee.isBuiltin()) { + const pinfo = self.module.types.get(sinfo.pointer.pointee); + if (pinfo == .tagged_union or pinfo == .@"enum") { + subject = self.builder.load(subject, sinfo.pointer.pointee); + subject_ty = sinfo.pointer.pointee; + } + } + } const is_optional_match = blk: { if (!subject_ty.isBuiltin()) { const info = self.module.types.get(subject_ty); diff --git a/src/parser.zig b/src/parser.zig index 6ef1920..41614b4 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -2864,6 +2864,7 @@ pub const Parser = struct { var capture_name: []const u8 = ""; var index_name: ?[]const u8 = null; + var capture_by_ref = false; if (range_end != null) { // Range capture is the optional cursor: `(i)` or nothing. @@ -2875,9 +2876,14 @@ pub const Parser = struct { try self.expect(.r_paren); } } else { - // Collection form: `: (capture, index?)`. + // Collection form: `: (capture, index?)`. A leading `*` on the + // capture (`(*x)`) binds it by pointer into the collection. try self.expect(.colon); try self.expect(.l_paren); + if (self.current.tag == .star) { + capture_by_ref = true; + self.advance(); + } if (self.current.tag != .identifier) return self.fail("expected capture variable name"); capture_name = self.tokenSlice(self.current); self.advance(); @@ -2898,6 +2904,7 @@ pub const Parser = struct { .capture_name = capture_name, .index_name = index_name, .range_end = range_end, + .capture_by_ref = capture_by_ref, } }); } diff --git a/tests/expected/for-by-ref-capture.exit b/tests/expected/for-by-ref-capture.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/for-by-ref-capture.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/for-by-ref-capture.txt b/tests/expected/for-by-ref-capture.txt new file mode 100644 index 0000000..ff30b1f --- /dev/null +++ b/tests/expected/for-by-ref-capture.txt @@ -0,0 +1,3 @@ +101 102 103 +circle 2.000000 +none