From b710a0a42a45682c652eb6a93bbc96caadb18abb Mon Sep 17 00:00:00 2001 From: agra Date: Mon, 25 May 2026 15:23:13 +0300 Subject: [PATCH] lang: `xx ` borrows the operand's storage instead of heap-copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `xx ` used to heap-copy the value through context.allocator. The protocol value's `ctx` pointed at the heap copy; the original local was left behind, untouched. Mutations through the protocol never reached the original, and direct reads of the original never saw protocol mutations. Two-fork bug, silent, easy to write by mistake. New rule (Option 3 in the discussion): - `xx ` — identifier, field access, index expression, deref — borrows the operand's storage. No heap copy, no `free` needed. - `xx ` — struct literal, function-call result, arithmetic, etc. — heap-copies through context.allocator. Unchanged from today. - `xx @ptr` and `xx ` — borrows the pointee. Unchanged. Single switch in `buildProtocolErasure` ([lower.zig:10334](src/ir/lower.zig#L10334)) gated by a new `isLvalueExpr` helper ([lower.zig:10322](src/ir/lower.zig#L10322)). Struct-typed operand: if the AST shape is identifier/field/index/deref, emit `lowerExprAsPtr(operand_node)` and skip the heap-copy; otherwise keep the alloca-store-heap_copy path. specs.md §3 ownership table extended to three rows (rvalue, lvalue, pointer) with examples and rationale per row. Regressions: - `examples/130-xx-value-routes-through-context-allocator.sx` — the Phase 1.1 witness for heap-copy-via-context-allocator. Previous shape (`xx `) is now a borrow under Option 3 and no longer exercises the heap-copy path. Rewritten to use a struct literal (`xx ByValue.{...}`) which still heap-copies through context.allocator — Tracer.count = 1 as before. - `examples/135-xx-lvalue-borrows.sx` — new test. Dereferences a TrackingAllocator into a stack value, does `xx tracker` inside a push Context, and asserts alloc_count/dealloc_count on the LOCAL go up. Under old semantics this would have stayed at 0 (heap copy got the increments, local stayed stale). 157/157 example tests pass; chess clean on macOS / iOS sim / Android (`tools/verify-step.sh` ran green immediately before this work). --- current/CHECKPOINT-MEM.md | 36 ++++++++++++++++- ...-value-routes-through-context-allocator.sx | 15 ++++--- examples/135-xx-lvalue-borrows.sx | 28 +++++++++++++ specs.md | 40 ++++++++++++++----- src/ir/lower.zig | 36 ++++++++++++++--- tests/expected/135-xx-lvalue-borrows.exit | 1 + tests/expected/135-xx-lvalue-borrows.txt | 2 + 7 files changed, 135 insertions(+), 23 deletions(-) create mode 100644 examples/135-xx-lvalue-borrows.sx create mode 100644 tests/expected/135-xx-lvalue-borrows.exit create mode 100644 tests/expected/135-xx-lvalue-borrows.txt diff --git a/current/CHECKPOINT-MEM.md b/current/CHECKPOINT-MEM.md index 9d9b322..2aee981 100644 --- a/current/CHECKPOINT-MEM.md +++ b/current/CHECKPOINT-MEM.md @@ -5,6 +5,28 @@ Tracking checkpoint for the mem.sx Zig-aligned implementation ## Last completed step +- **`xx ` borrows the operand's storage** (Option 3 in the + protocol-erasure design discussion). Today's behavior — `xx + ` heap-copies the value — was a silent footgun: + the protocol value pointed at the heap copy, the original local + stayed stale, mutations through the protocol weren't visible to the + original (and vice versa). Under the new rule, when the operand + names existing storage (identifier, field access, index expression, + dereferenced pointer), `xx` takes its address and the protocol + borrows. Heap-copy is reserved for `xx ` — struct literals, + function-call results, arithmetic expressions, anything without its + own storage. + + Single point of change at `buildProtocolErasure` in `lower.zig:10334`, + via a new `isLvalueExpr` helper at `lower.zig:10322`. specs.md §3 + ownership table updated. The `examples/130-...` regression that + previously tested heap-copy on `xx ` now tests `xx + ` (still the heap-copy path); new regression + `examples/135-xx-lvalue-borrows.sx` witnesses the borrow path via + TrackingAllocator. 157/157 example tests + chess clean across all + three platforms (`tools/verify-step.sh` gate ran green right + before this work landed). + - **Phase 1.4 — `valueToLLVMConst` upgraded to handle every interp `Value` variant.** The serializer at `emit_llvm.zig:734` used to collapse anything past int/float/boolean into `LLVMConstNull(ty)` — @@ -220,7 +242,19 @@ Allocator value naturally. ## Log -- **2026-05-25 (latest)** — Phase 1.4 shipped. `valueToLLVMConst` +- **2026-05-25 (latest)** — `xx ` semantics changed to borrow. + Single change at `lower.zig:10334` (`buildProtocolErasure`) gated by + new `isLvalueExpr` helper at `lower.zig:10322`. specs.md §3 + ownership table extended (three modes: rvalue / lvalue / pointer). + `examples/130-xx-value-routes-through-context-allocator.sx` updated + to use a struct literal (rvalue) as the operand — the heap-copy + routing through `context.allocator` is what Phase 1.1 actually + proves, and that path is still active for rvalues. New regression + at `examples/135-xx-lvalue-borrows.sx` witnesses the borrow path + via TrackingAllocator counts on the local. 157/157 + chess green + on all three platforms (`tools/verify-step.sh` ran green + immediately before this). +- **2026-05-25 (penultimate)** — Phase 1.4 shipped. `valueToLLVMConst` (`emit_llvm.zig:734`) replaced the primitive-only switch with a full serializer covering null_val, void_val, undef, func_ref, string, and aggregate (struct + array via diff --git a/examples/130-xx-value-routes-through-context-allocator.sx b/examples/130-xx-value-routes-through-context-allocator.sx index f72cc3e..112846d 100644 --- a/examples/130-xx-value-routes-through-context-allocator.sx +++ b/examples/130-xx-value-routes-through-context-allocator.sx @@ -1,8 +1,13 @@ -// Phase 1.1 — the compiler-internal heap-copy that backs `xx value` +// Phase 1.1 — the compiler-internal heap-copy that backs `xx ` // protocol erasure must dispatch through `context.allocator`, not call // libc malloc directly. So when a `push Context.{ allocator = tracer }` -// block is active, a `xx struct_value` inside it MUST be allocated by -// the tracker. +// block is active, a `xx StructLiteral.{}` inside it MUST be allocated +// by the tracker. +// +// Note: `xx` only heap-copies for RVALUES (struct literals, call results). +// `xx ` (an identifier, field access, index, or deref) borrows +// the operand's storage, so it never allocates and never reaches this +// path. See specs.md §3 — Protocol value ownership and lifetime. #import "modules/std.sx"; Tracer :: struct { @@ -30,8 +35,8 @@ ByValue :: struct { x: s64; y: s64; } main :: () -> s32 { tracer := Tracer.init(); push Context.{ allocator = xx tracer, data = null } { - bv : ByValue = .{ x = 1, y = 2 }; - ignore : Allocator = xx bv; + // Struct-literal operand: rvalue → heap-copy through context.allocator. + ignore : Allocator = xx ByValue.{ x = 1, y = 2 }; _ = ignore; } print("Tracer.count = {}\n", tracer.count); diff --git a/examples/135-xx-lvalue-borrows.sx b/examples/135-xx-lvalue-borrows.sx new file mode 100644 index 0000000..f630572 --- /dev/null +++ b/examples/135-xx-lvalue-borrows.sx @@ -0,0 +1,28 @@ +// Option 3 — `xx ` borrows the operand's storage instead of +// heap-copying. The protocol value's `ctx` points directly at the local; +// mutations through the protocol are visible to the original. +// +// The witness is TrackingAllocator: incrementing the parent allocator's +// counter happens through the Allocator protocol value. If `xx tracker` +// heap-copied the Tracker, the parent counter would land in the copy +// and the local would stay at zero. With Option 3 the local sees the +// increments because they ARE the local. +#import "modules/std.sx"; +#import "modules/allocators.sx"; + +main :: () -> s32 { + gpa := GPA.init(); + tracker_ptr := TrackingAllocator.init(xx gpa); + tracker := tracker_ptr.*; // dereference into a stack-local VALUE + + // xx tracker — operand is an identifier (lvalue), so the protocol + // borrows tracker's storage. No heap copy. Mutations propagate. + push Context.{ allocator = xx tracker, data = null } { + p := context.allocator.alloc(128); + context.allocator.dealloc(p); + } + + print("alloc_count = {}\n", tracker.alloc_count); + print("dealloc_count = {}\n", tracker.dealloc_count); + return 0; +} diff --git a/specs.md b/specs.md index 312a525..06ae692 100644 --- a/specs.md +++ b/specs.md @@ -372,32 +372,50 @@ allocators : [2]Allocator = .[xx gpa, xx arena]; // protocol values in array #### Ownership and Lifetime -Protocol values have two ownership modes depending on how they are created: +Protocol values have two ownership modes. The mode is selected by the +shape of the operand to `xx`: -| Conversion | `ctx` points to | Lifetime | Who frees | -|------------|----------------|----------|-----------| -| `xx value` | Heap-allocated copy | Until `free(p)` | Caller | -| `xx @ptr` | Original pointee | Tied to pointee | Caller manages pointee | +| Operand shape | `ctx` points to | Lifetime | Who frees | +|---|---|---|---| +| `xx ` (struct literal, call result, etc.) | Heap-allocated copy | Until `free(p)` | Caller | +| `xx ` (identifier, field, index, deref) | The named storage | Tied to that storage's scope | Caller manages the storage | +| `xx ` / `xx @ptr` | Original pointee | Tied to pointee | Caller manages pointee | -**`xx value`** — the concrete data is heap-copied so the protocol value is self-contained. -It can be stored in containers, returned from functions, and outlives the scope where it was created. -Call `free(p)` to release the backing memory when done: +**`xx `** — when the operand has no storage of its own (struct +literal, function-call result, arithmetic expression, etc.) the concrete +data is heap-copied through `context.allocator` so the protocol value is +self-contained. It can be stored in containers, returned from functions, +and outlives the scope where it was created. Call `free(p)` to release +the backing memory when done: ```sx s : Sizable = xx Widget.{ value = 42 }; // heap-copies Widget print("{}\n", s.size()); free(s); // frees the heap-allocated Widget copy ``` -**`xx @ptr`** — the protocol borrows the pointer. The protocol value is only valid as long as -the pointee is alive. Mutations through the protocol are visible through the original pointer: +**`xx `** — when the operand names existing storage (a local +variable, struct field, array element, or dereferenced pointer) the +protocol borrows that storage directly. No heap copy, no allocation, +no `free` needed; mutations through the protocol are visible to the +original. The protocol value is only valid while the named storage is +alive: ```sx w := Widget.{ value = 0 }; -s : Sizable = xx @w; // borrows &w +s : Sizable = xx w; // borrows w's storage; no copy s.add(5); // modifies w through ctx print("{}\n", w.value); // 5 // do NOT free(s) — w owns the data ``` +**`xx @ptr`** is equivalent to `xx ` for the dereferenced +pointee — the protocol borrows. It's mostly redundant under the +lvalue rule above but stays valid for explicit clarity when the +operand is a pointer you want to make obvious is being borrowed: +```sx +w := Widget.{ value = 0 }; +s : Sizable = xx @w; // identical to `xx w` — borrows w +``` + **Vtables** are global constants — shared across all protocol values of the same `(Protocol, ConcreteType)` pair. They are never allocated or freed at runtime. #### Default Methods diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 56d6bf1..87cc0d7 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -10318,6 +10318,18 @@ pub const Lowering = struct { return self.builder.call(fid, final_args, ret_ty); } + /// True for expression shapes that name an addressable storage location + /// (variables, fields, array elements, dereferenced pointers). Used by + /// `xx ` to decide between borrow (lvalue → take the + /// address) and heap-copy (rvalue → allocate a fresh copy). + fn isLvalueExpr(self: *Lowering, node: *const Node) bool { + _ = self; + return switch (node.data) { + .identifier, .field_access, .index_expr, .deref_expr => true, + else => false, + }; + } + /// Build a protocol value from a concrete value via xx conversion. fn buildProtocolErasure(self: *Lowering, operand: Ref, operand_node: *const Node, src_ty: TypeId, dst_ty: TypeId) Ref { const dst_info = self.module.types.get(dst_ty); @@ -10339,14 +10351,26 @@ pub const Lowering = struct { concrete_ty = pointee; heap_copy = false; } else if (src_info == .@"struct") { - // xx acc — operand is a value, need to take address + heap-copy + // Struct-typed operand. Split on lvalue-ness: + // - lvalue (identifier, field, index, deref): borrow the + // storage the operand already names. No heap copy; the + // protocol value's ctx points at the caller's slot, and + // mutations through the protocol are visible to the + // original. Lifetime is the caller's responsibility. + // - rvalue (struct literal, call result, etc.): heap-copy + // into a fresh allocation so the protocol value is + // self-contained and outlives this expression. concrete_type_name = self.module.types.getString(src_info.@"struct".name); concrete_ty = src_ty; - heap_copy = true; - // Alloca + store to get a pointer (will be heap-copied in buildProtocolValue) - const slot = self.builder.alloca(src_ty); - self.builder.store(slot, operand); - concrete_ptr = slot; + if (self.isLvalueExpr(operand_node)) { + concrete_ptr = self.lowerExprAsPtr(operand_node); + heap_copy = false; + } else { + heap_copy = true; + const slot = self.builder.alloca(src_ty); + self.builder.store(slot, operand); + concrete_ptr = slot; + } } } diff --git a/tests/expected/135-xx-lvalue-borrows.exit b/tests/expected/135-xx-lvalue-borrows.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/135-xx-lvalue-borrows.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/135-xx-lvalue-borrows.txt b/tests/expected/135-xx-lvalue-borrows.txt new file mode 100644 index 0000000..51b4cfe --- /dev/null +++ b/tests/expected/135-xx-lvalue-borrows.txt @@ -0,0 +1,2 @@ +alloc_count = 1 +dealloc_count = 1