comptime VM: memory = arena of stable host allocations; Addr = real host pointer (Phase 4D.0)

Replace the growable ArrayList(u8) flat buffer (reallocs/MOVES on growth) with a
std.heap.ArenaAllocator. Each allocBytes is a separate arena allocation that never
moves and is freed wholesale on deinit -- no per-object free, no cap, no fixed
buffer. Addr is now the allocation's ABSOLUTE host pointer (@intFromPtr), not an
offset, so a flat-memory pointer and an FFI-returned host pointer are the same kind
of value -- the FFI bridge (4D.1) passes them to/from libc with zero translation and
no per-call pinning (the moving-buffer hazard is gone by construction).

readWord/writeWord/bytes deref the absolute pointer with a null-check bail (the
malformed-IR / null-deref safety contract). Dropped the offset-based upper-bounds
check (can't bound an absolute pointer; Frame.bad_ref still catches the dominant
malformed-IR vector) and the test-only mark/reset (arena has no reset-to-mark; the
VM never used them outside tests).

697/0 both gates + all unit tests (rewrote the two Machine tests). Pure refactor, no
comptime behavior change.
This commit is contained in:
agra
2026-06-18 17:51:49 +03:00
parent 1526d198e2
commit 625ba0fb27
3 changed files with 80 additions and 75 deletions

View File

@@ -352,6 +352,23 @@ when reached (sentinels or accessor fns; see the design doc Risks).
`List` growth; orthogonal, see `current/CHECKPOINT-METATYPE.md`.)
## Log
- **Phase 4D.0 (VM plan) — comptime VM memory = an ARENA of stable host allocations; `Addr` = real host pointer (2026-06-18).**
Replaced the growable `ArrayList(u8)` flat buffer (which reallocs/MOVES on growth) with a
`std.heap.ArenaAllocator`: each `allocBytes` is a separate arena allocation that never moves and
is freed wholesale on `deinit` (no per-object free, no cap, no fixed buffer). **`Addr` is now the
allocation's absolute host pointer** (`@intFromPtr`), not an offset — so a flat-memory pointer and
an FFI-returned host pointer are the SAME kind of value, and the FFI bridge (4D.1) can pass them
to/from libc with ZERO translation and no per-call pinning (the original moving-buffer hazard is
gone by construction). `Machine.readWord/writeWord/bytes` deref the absolute pointer directly,
keeping the null-check bail (the malformed-IR / null-deref safety contract). Dropped the
offset-based upper-bounds check (can't bound an absolute pointer; the `Frame.bad_ref` guard still
catches the dominant malformed-IR vector) and the test-only `mark`/`reset` (the arena has no
cheap reset-to-mark; the VM never used them outside tests). Decision rationale (user): use a
GPA-like allocator, no artificial buffer limits. **697/0 BOTH gates + all unit tests** (rewrote
the two Machine tests: null-deref bail + arena-stability-across-grows). Pure refactor, no
comptime behavior change. **Next (4D.1):** extern-call dispatch in `Vm.invoke` — marshal args
(scalars by value, pointers as the host pointer they already are), call via `host_ffi`
trampolines, return scalars/pointers; a new `#run` libc example as the corpus guard.
- **Phase 4A.1 (VM plan) — `box_any`/`unbox_any` on the VM + `.any` as a 16-byte aggregate (2026-06-18).**
Ported the Any-boxing conversion pair: `box_any` allocates the 16-byte `{ type_tag@0, value@8 }`
box (tag = source TypeId index, matching the legacy comptime interp), writing a word source's

View File

@@ -1396,24 +1396,21 @@ test "comptime_vm: a malformed operand TYPE ref bails (refTy), not a panic" {
try std.testing.expectError(error.Unsupported, v.run(&fb.func, &.{}));
}
test "comptime_vm: hardened accessors return OutOfBounds, not a panic" {
test "comptime_vm: hardened accessors return OutOfBounds on null, not a panic" {
var m = vm.Machine.init(std.testing.allocator);
defer m.deinit();
const addr = m.allocBytes(8, 8);
try std.testing.expect(addr != vm.null_addr);
// Null address (reserved guard) → OutOfBounds on every accessor.
// Null address → OutOfBounds on every accessor (the malformed-IR / null-deref
// safety contract `tryEval` relies on — bail, never crash).
try std.testing.expectError(error.OutOfBounds, m.readWord(vm.null_addr, 8));
try std.testing.expectError(error.OutOfBounds, m.writeWord(vm.null_addr, 8, 0));
try std.testing.expectError(error.OutOfBounds, m.bytes(vm.null_addr, 4));
// Past the end of allocated memory → OutOfBounds.
const past = m.mark() + 64;
try std.testing.expectError(error.OutOfBounds, m.readWord(@intCast(past), 1));
try std.testing.expectError(error.OutOfBounds, m.bytes(@intCast(past), 1));
// Straddling the end (last valid byte + an oversized read) → OutOfBounds.
try std.testing.expectError(error.OutOfBounds, m.readWord(addr + 4, 8));
// An oversized scalar read (> 8 bytes) → OutOfBounds.
try std.testing.expectError(error.OutOfBounds, m.readWord(addr, 16));
// A zero-length view is always valid (no memory touched), even at null.
try std.testing.expectEqual(@as(usize, 0), (try m.bytes(vm.null_addr, 0)).len);
@@ -1438,20 +1435,22 @@ test "comptime_vm tryEval: deref of a null pointer bails (null, not a crash)" {
try std.testing.expect(vm.tryEval(alloc, &module, bad_id) == null);
}
test "comptime_vm: mark/reset reclaims the stack region" {
test "comptime_vm: arena allocations are aligned, non-null, and stable across grows" {
var m = vm.Machine.init(std.testing.allocator);
defer m.deinit();
_ = m.allocBytes(16, 8);
const top = m.mark();
const reclaimed = m.allocBytes(64, 8);
try std.testing.expect(m.mark() > top);
m.reset(top);
try std.testing.expectEqual(top, m.mark());
const a = m.allocBytes(16, 8);
try std.testing.expect(a != vm.null_addr);
try std.testing.expectEqual(@as(u64, 0), a % 8);
try m.writeWord(a, 8, 0xCAFEBABE);
// After reset the freed region is handed back out again (same address).
const reused = m.allocBytes(64, 8);
try std.testing.expectEqual(reclaimed, reused);
// A later (much larger) allocation must NOT move or clobber the first — the
// arena never relocates an existing allocation (the property the FFI bridge
// relies on).
const b = m.allocBytes(1 << 20, 16);
try std.testing.expect(b != vm.null_addr);
try std.testing.expectEqual(@as(u64, 0), b % 16);
try std.testing.expectEqual(@as(u64, 0xCAFEBABE), try m.readWord(a, 8));
}
test "comptime_vm: Frame register file round-trips (no stack reclaim)" {

View File

@@ -45,9 +45,13 @@ extern fn sx_trace_push(frame: u64) void;
extern fn sx_trace_clear() void;
const Span = inst_mod.Span;
/// A byte offset into the machine's flat memory. `null_addr` (0) is reserved as a
/// never-allocated sentinel, so a zeroed register reads as null rather than a
/// valid object — mirroring how the legacy `Value` model distinguishes `null_val`.
/// A comptime memory address — a REAL host pointer (`@intFromPtr`), since the
/// machine allocates each object from an arena that never moves it. `null_addr` (0)
/// is the null sentinel (no allocation is ever at address 0), so a zeroed register
/// reads as null — mirroring how the legacy `Value` model distinguishes `null_val`.
/// Because addresses are absolute host pointers, a flat-memory pointer and an
/// FFI-returned host pointer are the SAME kind of value: the FFI bridge hands them
/// to / from real libc with no translation (Phase 4D).
pub const Addr = u64;
pub const null_addr: Addr = 0;
@@ -55,85 +59,70 @@ pub const null_addr: Addr = 0;
/// type tells the executor which.
pub const Reg = u64;
/// The flat-memory machine: one linear byte buffer serving as both the comptime
/// stack and heap, with a bump allocator and stack-mark reclamation.
/// The comptime memory machine: an ARENA of host allocations serving as the
/// comptime stack + heap. Each `allocBytes` is a separate arena allocation that
/// NEVER moves and is freed wholesale on `deinit` (no per-object free — comptime is
/// short-lived). There is NO fixed buffer and NO size cap: the arena grows through
/// its backing allocator on demand. `Addr` is the allocation's REAL host pointer,
/// so a flat-memory pointer and an FFI-returned host pointer are interchangeable —
/// the FFI bridge passes them to / from libc untouched (Phase 4D).
pub const Machine = struct {
mem: std.ArrayList(u8),
gpa: std.mem.Allocator,
arena: std.heap.ArenaAllocator,
/// Reserve a small guard prefix so `allocBytes` never returns `null_addr` (0)
/// — a zeroed register must read as null, not as a real object at offset 0.
pub fn init(gpa: std.mem.Allocator) Machine {
var m = Machine{ .mem = .empty, .gpa = gpa };
m.mem.appendNTimes(gpa, 0, 8) catch @panic("comptime VM: out of memory reserving guard");
return m;
return .{ .arena = std.heap.ArenaAllocator.init(gpa) };
}
pub fn deinit(self: *Machine) void {
self.mem.deinit(self.gpa);
self.arena.deinit();
}
/// Bump-allocate `size` bytes aligned to `alignment` (zero-initialised);
/// returns the address. `size == 0` still returns a valid (aligned) address
/// distinct from `null_addr`. Allocations are reclaimed wholesale by
/// `reset(mark())` — there is no per-object free (comptime is short-lived).
/// Allocate `size` ZEROED bytes aligned to `alignment`; returns the address (a
/// stable host pointer). `size == 0` still yields a valid, non-null address.
/// Over-allocates to honor a RUNTIME alignment (`Allocator.alignedAlloc` needs a
/// comptime alignment) and aligns the base up within the block.
pub fn allocBytes(self: *Machine, size: usize, alignment: usize) Addr {
const a = if (alignment == 0) 1 else alignment;
const cur = self.mem.items.len;
const aligned = std.mem.alignForward(usize, cur, a);
const pad = aligned - cur;
self.mem.appendNTimes(self.gpa, 0, pad + size) catch @panic("comptime VM: out of memory");
const n = @max(size, 1);
const raw = self.arena.allocator().alloc(u8, n + a - 1) catch @panic("comptime VM: out of memory");
@memset(raw, 0);
const aligned = std.mem.alignForward(usize, @intFromPtr(raw.ptr), a);
return @intCast(aligned);
}
/// Current stack high-water mark — pair with `reset` to reclaim a region.
pub fn mark(self: *const Machine) usize {
return self.mem.items.len;
}
/// Reclaim everything allocated after `m` (a prior `mark()`), keeping the
/// backing capacity for reuse.
pub fn reset(self: *Machine, m: usize) void {
std.debug.assert(m <= self.mem.items.len);
self.mem.shrinkRetainingCapacity(m);
}
/// Read a `size`-byte (1/2/4/8) little-endian scalar at `addr` into a register
/// word (zero-extended). Bounds- and null-checked: a null / out-of-range /
/// oversized access returns `error.OutOfBounds` (NOT a debug panic) so a
/// malformed comptime run BAILS to the legacy fallback instead of crashing the
/// compiler. This is the safety contract `tryEval` relies on for arbitrary funcs.
pub fn readWord(self: *const Machine, addr: Addr, size: usize) error{OutOfBounds}!Reg {
/// word (zero-extended). A null / oversized access returns `error.OutOfBounds`
/// (NOT a panic) so a malformed comptime run BAILS to the legacy fallback rather
/// than crashing. (Addresses are absolute host pointers, so there is no
/// upper-bound check — a non-null wild address would fault; the `Frame` `bad_ref`
/// guard catches the dominant malformed-IR vector before any such deref.)
pub fn readWord(_: *const Machine, addr: Addr, size: usize) error{OutOfBounds}!Reg {
if (addr == null_addr or size > 8) return error.OutOfBounds;
const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds;
if (a >= self.mem.items.len or size > self.mem.items.len - a) return error.OutOfBounds;
const p: [*]const u8 = @ptrFromInt(@as(usize, @intCast(addr)));
var buf: [8]u8 = @splat(0);
@memcpy(buf[0..size], self.mem.items[a .. a + size]);
@memcpy(buf[0..size], p[0..size]);
return std.mem.readInt(u64, &buf, .little);
}
/// Write the low `size` bytes (1/2/4/8) of register word `val` little-endian
/// at `addr`. Bounds- and null-checked → `error.OutOfBounds` (not a panic).
pub fn writeWord(self: *Machine, addr: Addr, size: usize, val: Reg) error{OutOfBounds}!void {
/// Write the low `size` bytes (1/2/4/8) of register word `val` little-endian at
/// `addr`. Null-checked → `error.OutOfBounds` (not a panic).
pub fn writeWord(_: *Machine, addr: Addr, size: usize, val: Reg) error{OutOfBounds}!void {
if (addr == null_addr or size > 8) return error.OutOfBounds;
const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds;
if (a >= self.mem.items.len or size > self.mem.items.len - a) return error.OutOfBounds;
const p: [*]u8 = @ptrFromInt(@as(usize, @intCast(addr)));
var buf: [8]u8 = undefined;
std.mem.writeInt(u64, &buf, val, .little);
@memcpy(self.mem.items[a .. a + size], buf[0..size]);
@memcpy(p[0..size], buf[0..size]);
}
/// A mutable byte view of `len` bytes at `addr` (for aggregate copies / slice
/// payloads). Bounds- and null-checked → `error.OutOfBounds` (not a panic). A
/// zero-length view is always valid (no memory is touched). The slice is
/// invalidated by any subsequent `allocBytes` that grows the backing — re-fetch
/// after allocating.
pub fn bytes(self: *Machine, addr: Addr, len: usize) error{OutOfBounds}![]u8 {
if (len == 0) return self.mem.items[0..0];
/// payloads). Null-checked → `error.OutOfBounds`. A zero-length view is always
/// valid. The view stays valid across later `allocBytes` — the arena never moves
/// an allocation.
pub fn bytes(_: *Machine, addr: Addr, len: usize) error{OutOfBounds}![]u8 {
if (len == 0) return &[_]u8{};
if (addr == null_addr) return error.OutOfBounds;
const a: usize = std.math.cast(usize, addr) orelse return error.OutOfBounds;
if (a >= self.mem.items.len or len > self.mem.items.len - a) return error.OutOfBounds;
return self.mem.items[a .. a + len];
const p: [*]u8 = @ptrFromInt(@as(usize, @intCast(addr)));
return p[0..len];
}
};