From 625ba0fb27bc7f0e67ac5e8fcb1e13b2608f9efb Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 18 Jun 2026 17:51:49 +0300 Subject: [PATCH] comptime VM: memory = arena of stable host allocations; Addr = real host pointer (Phase 4D.0) Replace the growable ArrayList(u8) flat buffer (reallocs/MOVES on growth) with a std.heap.ArenaAllocator. Each allocBytes is a separate arena allocation that never moves and is freed wholesale on deinit -- no per-object free, no cap, no fixed buffer. Addr is now the allocation's ABSOLUTE host pointer (@intFromPtr), not an offset, so a flat-memory pointer and an FFI-returned host pointer are the same kind of value -- the FFI bridge (4D.1) passes them to/from libc with zero translation and no per-call pinning (the moving-buffer hazard is gone by construction). readWord/writeWord/bytes deref the absolute pointer with a null-check bail (the malformed-IR / null-deref safety contract). Dropped the offset-based upper-bounds check (can't bound an absolute pointer; Frame.bad_ref still catches the dominant malformed-IR vector) and the test-only mark/reset (arena has no reset-to-mark; the VM never used them outside tests). 697/0 both gates + all unit tests (rewrote the two Machine tests). Pure refactor, no comptime behavior change. --- current/CHECKPOINT-COMPILER-API.md | 17 +++++ src/ir/comptime_vm.test.zig | 37 +++++------ src/ir/comptime_vm.zig | 101 +++++++++++++---------------- 3 files changed, 80 insertions(+), 75 deletions(-) diff --git a/current/CHECKPOINT-COMPILER-API.md b/current/CHECKPOINT-COMPILER-API.md index 8ec7f0a8..eea1144d 100644 --- a/current/CHECKPOINT-COMPILER-API.md +++ b/current/CHECKPOINT-COMPILER-API.md @@ -352,6 +352,23 @@ when reached (sentinels or accessor fns; see the design doc Risks). `List` growth; orthogonal, see `current/CHECKPOINT-METATYPE.md`.) ## Log +- **Phase 4D.0 (VM plan) — comptime VM memory = an ARENA of stable host allocations; `Addr` = real host pointer (2026-06-18).** + Replaced the growable `ArrayList(u8)` flat buffer (which reallocs/MOVES on growth) with a + `std.heap.ArenaAllocator`: each `allocBytes` is a separate arena allocation that never moves and + is freed wholesale on `deinit` (no per-object free, no cap, no fixed buffer). **`Addr` is now the + allocation's absolute host pointer** (`@intFromPtr`), not an offset — so a flat-memory pointer and + an FFI-returned host pointer are the SAME kind of value, and the FFI bridge (4D.1) can pass them + to/from libc with ZERO translation and no per-call pinning (the original moving-buffer hazard is + gone by construction). `Machine.readWord/writeWord/bytes` deref the absolute pointer directly, + keeping the null-check bail (the malformed-IR / null-deref safety contract). Dropped the + offset-based upper-bounds check (can't bound an absolute pointer; the `Frame.bad_ref` guard still + catches the dominant malformed-IR vector) and the test-only `mark`/`reset` (the arena has no + cheap reset-to-mark; the VM never used them outside tests). Decision rationale (user): use a + GPA-like allocator, no artificial buffer limits. **697/0 BOTH gates + all unit tests** (rewrote + the two Machine tests: null-deref bail + arena-stability-across-grows). Pure refactor, no + comptime behavior change. **Next (4D.1):** extern-call dispatch in `Vm.invoke` — marshal args + (scalars by value, pointers as the host pointer they already are), call via `host_ffi` + trampolines, return scalars/pointers; a new `#run` libc example as the corpus guard. - **Phase 4A.1 (VM plan) — `box_any`/`unbox_any` on the VM + `.any` as a 16-byte aggregate (2026-06-18).** Ported the Any-boxing conversion pair: `box_any` allocates the 16-byte `{ type_tag@0, value@8 }` box (tag = source TypeId index, matching the legacy comptime interp), writing a word source's diff --git a/src/ir/comptime_vm.test.zig b/src/ir/comptime_vm.test.zig index fb28eba8..3b2ed7ec 100644 --- a/src/ir/comptime_vm.test.zig +++ b/src/ir/comptime_vm.test.zig @@ -1396,24 +1396,21 @@ test "comptime_vm: a malformed operand TYPE ref bails (refTy), not a panic" { try std.testing.expectError(error.Unsupported, v.run(&fb.func, &.{})); } -test "comptime_vm: hardened accessors return OutOfBounds, not a panic" { +test "comptime_vm: hardened accessors return OutOfBounds on null, not a panic" { var m = vm.Machine.init(std.testing.allocator); defer m.deinit(); const addr = m.allocBytes(8, 8); + try std.testing.expect(addr != vm.null_addr); - // Null address (reserved guard) → OutOfBounds on every accessor. + // Null address → OutOfBounds on every accessor (the malformed-IR / null-deref + // safety contract `tryEval` relies on — bail, never crash). try std.testing.expectError(error.OutOfBounds, m.readWord(vm.null_addr, 8)); try std.testing.expectError(error.OutOfBounds, m.writeWord(vm.null_addr, 8, 0)); try std.testing.expectError(error.OutOfBounds, m.bytes(vm.null_addr, 4)); - // Past the end of allocated memory → OutOfBounds. - const past = m.mark() + 64; - try std.testing.expectError(error.OutOfBounds, m.readWord(@intCast(past), 1)); - try std.testing.expectError(error.OutOfBounds, m.bytes(@intCast(past), 1)); - - // Straddling the end (last valid byte + an oversized read) → OutOfBounds. - try std.testing.expectError(error.OutOfBounds, m.readWord(addr + 4, 8)); + // An oversized scalar read (> 8 bytes) → OutOfBounds. + try std.testing.expectError(error.OutOfBounds, m.readWord(addr, 16)); // A zero-length view is always valid (no memory touched), even at null. try std.testing.expectEqual(@as(usize, 0), (try m.bytes(vm.null_addr, 0)).len); @@ -1438,20 +1435,22 @@ test "comptime_vm tryEval: deref of a null pointer bails (null, not a crash)" { try std.testing.expect(vm.tryEval(alloc, &module, bad_id) == null); } -test "comptime_vm: mark/reset reclaims the stack region" { +test "comptime_vm: arena allocations are aligned, non-null, and stable across grows" { var m = vm.Machine.init(std.testing.allocator); defer m.deinit(); - _ = m.allocBytes(16, 8); - const top = m.mark(); - const reclaimed = m.allocBytes(64, 8); - try std.testing.expect(m.mark() > top); - m.reset(top); - try std.testing.expectEqual(top, m.mark()); + const a = m.allocBytes(16, 8); + try std.testing.expect(a != vm.null_addr); + try std.testing.expectEqual(@as(u64, 0), a % 8); + try m.writeWord(a, 8, 0xCAFEBABE); - // After reset the freed region is handed back out again (same address). - const reused = m.allocBytes(64, 8); - try std.testing.expectEqual(reclaimed, reused); + // A later (much larger) allocation must NOT move or clobber the first — the + // arena never relocates an existing allocation (the property the FFI bridge + // relies on). + const b = m.allocBytes(1 << 20, 16); + try std.testing.expect(b != vm.null_addr); + try std.testing.expectEqual(@as(u64, 0), b % 16); + try std.testing.expectEqual(@as(u64, 0xCAFEBABE), try m.readWord(a, 8)); } test "comptime_vm: Frame register file round-trips (no stack reclaim)" { diff --git a/src/ir/comptime_vm.zig b/src/ir/comptime_vm.zig index ce3a299d..75044171 100644 --- a/src/ir/comptime_vm.zig +++ b/src/ir/comptime_vm.zig @@ -45,9 +45,13 @@ extern fn sx_trace_push(frame: u64) void; extern fn sx_trace_clear() void; const Span = inst_mod.Span; -/// A byte offset into the machine's flat memory. `null_addr` (0) is reserved as a -/// never-allocated sentinel, so a zeroed register reads as null rather than a -/// valid object — mirroring how the legacy `Value` model distinguishes `null_val`. +/// A comptime memory address — a REAL host pointer (`@intFromPtr`), since the +/// machine allocates each object from an arena that never moves it. `null_addr` (0) +/// is the null sentinel (no allocation is ever at address 0), so a zeroed register +/// reads as null — mirroring how the legacy `Value` model distinguishes `null_val`. +/// Because addresses are absolute host pointers, a flat-memory pointer and an +/// FFI-returned host pointer are the SAME kind of value: the FFI bridge hands them +/// to / from real libc with no translation (Phase 4D). pub const Addr = u64; pub const null_addr: Addr = 0; @@ -55,85 +59,70 @@ pub const null_addr: Addr = 0; /// type tells the executor which. pub const Reg = u64; -/// The flat-memory machine: one linear byte buffer serving as both the comptime -/// stack and heap, with a bump allocator and stack-mark reclamation. +/// The comptime memory machine: an ARENA of host allocations serving as the +/// comptime stack + heap. Each `allocBytes` is a separate arena allocation that +/// NEVER moves and is freed wholesale on `deinit` (no per-object free — comptime is +/// short-lived). There is NO fixed buffer and NO size cap: the arena grows through +/// its backing allocator on demand. `Addr` is the allocation's REAL host pointer, +/// so a flat-memory pointer and an FFI-returned host pointer are interchangeable — +/// the FFI bridge passes them to / from libc untouched (Phase 4D). pub const Machine = struct { - mem: std.ArrayList(u8), - gpa: std.mem.Allocator, + arena: std.heap.ArenaAllocator, - /// Reserve a small guard prefix so `allocBytes` never returns `null_addr` (0) - /// — a zeroed register must read as null, not as a real object at offset 0. pub fn init(gpa: std.mem.Allocator) Machine { - var m = Machine{ .mem = .empty, .gpa = gpa }; - m.mem.appendNTimes(gpa, 0, 8) catch @panic("comptime VM: out of memory reserving guard"); - return m; + return .{ .arena = std.heap.ArenaAllocator.init(gpa) }; } pub fn deinit(self: *Machine) void { - self.mem.deinit(self.gpa); + self.arena.deinit(); } - /// Bump-allocate `size` bytes aligned to `alignment` (zero-initialised); - /// returns the address. `size == 0` still returns a valid (aligned) address - /// distinct from `null_addr`. Allocations are reclaimed wholesale by - /// `reset(mark())` — there is no per-object free (comptime is short-lived). + /// Allocate `size` ZEROED bytes aligned to `alignment`; returns the address (a + /// stable host pointer). `size == 0` still yields a valid, non-null address. + /// Over-allocates to honor a RUNTIME alignment (`Allocator.alignedAlloc` needs a + /// comptime alignment) and aligns the base up within the block. pub fn allocBytes(self: *Machine, size: usize, alignment: usize) Addr { const a = if (alignment == 0) 1 else alignment; - const cur = self.mem.items.len; - const aligned = std.mem.alignForward(usize, cur, a); - const pad = aligned - cur; - self.mem.appendNTimes(self.gpa, 0, pad + size) catch @panic("comptime VM: out of memory"); + const n = @max(size, 1); + const raw = self.arena.allocator().alloc(u8, n + a - 1) catch @panic("comptime VM: out of memory"); + @memset(raw, 0); + const aligned = std.mem.alignForward(usize, @intFromPtr(raw.ptr), a); return @intCast(aligned); } - /// Current stack high-water mark — pair with `reset` to reclaim a region. - pub fn mark(self: *const Machine) usize { - return self.mem.items.len; - } - - /// Reclaim everything allocated after `m` (a prior `mark()`), keeping the - /// backing capacity for reuse. - pub fn reset(self: *Machine, m: usize) void { - std.debug.assert(m <= self.mem.items.len); - self.mem.shrinkRetainingCapacity(m); - } - /// Read a `size`-byte (1/2/4/8) little-endian scalar at `addr` into a register - /// word (zero-extended). Bounds- and null-checked: a null / out-of-range / - /// oversized access returns `error.OutOfBounds` (NOT a debug panic) so a - /// malformed comptime run BAILS to the legacy fallback instead of crashing the - /// compiler. This is the safety contract `tryEval` relies on for arbitrary funcs. - pub fn readWord(self: *const Machine, addr: Addr, size: usize) error{OutOfBounds}!Reg { + /// word (zero-extended). A null / oversized access returns `error.OutOfBounds` + /// (NOT a panic) so a malformed comptime run BAILS to the legacy fallback rather + /// than crashing. (Addresses are absolute host pointers, so there is no + /// upper-bound check — a non-null wild address would fault; the `Frame` `bad_ref` + /// guard catches the dominant malformed-IR vector before any such deref.) + pub fn readWord(_: *const Machine, addr: Addr, size: usize) error{OutOfBounds}!Reg { if (addr == null_addr or size > 8) return error.OutOfBounds; - const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds; - if (a >= self.mem.items.len or size > self.mem.items.len - a) return error.OutOfBounds; + const p: [*]const u8 = @ptrFromInt(@as(usize, @intCast(addr))); var buf: [8]u8 = @splat(0); - @memcpy(buf[0..size], self.mem.items[a .. a + size]); + @memcpy(buf[0..size], p[0..size]); return std.mem.readInt(u64, &buf, .little); } - /// Write the low `size` bytes (1/2/4/8) of register word `val` little-endian - /// at `addr`. Bounds- and null-checked → `error.OutOfBounds` (not a panic). - pub fn writeWord(self: *Machine, addr: Addr, size: usize, val: Reg) error{OutOfBounds}!void { + /// Write the low `size` bytes (1/2/4/8) of register word `val` little-endian at + /// `addr`. Null-checked → `error.OutOfBounds` (not a panic). + pub fn writeWord(_: *Machine, addr: Addr, size: usize, val: Reg) error{OutOfBounds}!void { if (addr == null_addr or size > 8) return error.OutOfBounds; - const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds; - if (a >= self.mem.items.len or size > self.mem.items.len - a) return error.OutOfBounds; + const p: [*]u8 = @ptrFromInt(@as(usize, @intCast(addr))); var buf: [8]u8 = undefined; std.mem.writeInt(u64, &buf, val, .little); - @memcpy(self.mem.items[a .. a + size], buf[0..size]); + @memcpy(p[0..size], buf[0..size]); } /// A mutable byte view of `len` bytes at `addr` (for aggregate copies / slice - /// payloads). Bounds- and null-checked → `error.OutOfBounds` (not a panic). A - /// zero-length view is always valid (no memory is touched). The slice is - /// invalidated by any subsequent `allocBytes` that grows the backing — re-fetch - /// after allocating. - pub fn bytes(self: *Machine, addr: Addr, len: usize) error{OutOfBounds}![]u8 { - if (len == 0) return self.mem.items[0..0]; + /// payloads). Null-checked → `error.OutOfBounds`. A zero-length view is always + /// valid. The view stays valid across later `allocBytes` — the arena never moves + /// an allocation. + pub fn bytes(_: *Machine, addr: Addr, len: usize) error{OutOfBounds}![]u8 { + if (len == 0) return &[_]u8{}; if (addr == null_addr) return error.OutOfBounds; - const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds; - if (a >= self.mem.items.len or len > self.mem.items.len - a) return error.OutOfBounds; - return self.mem.items[a .. a + len]; + const p: [*]u8 = @ptrFromInt(@as(usize, @intCast(addr))); + return p[0..len]; } };