comptime VM: general host-FFI escape — call any extern libc fn via dlsym + host_ffi (Phase 4D.1)

Replace the "extern not ported -> bail" stub in Vm.invoke with callHostExtern:
resolve the symbol via host_ffi.lookupSymbol (dlsym RTLD_DEFAULT) and dispatch
through the host_ffi trampolines, like the legacy interp.callExtern.

Marshalling is trivial now that Addr is a real host pointer (4D.0): every WORD-kind
arg passes as usize verbatim (a scalar's bits OR a pointer, no translation), and a
pointer return is a valid Addr. Picks callPtrRet (void*-ABI) for pointer-ish
returns, callIntRet (i64-ABI) otherwise; honors variadic. Non-word
(aggregate/string/float) args+returns bail loudly (4D.2 adds them). One general
mechanism for all externs, not per-builtin special cases.

New example 0636-comptime-extern-libc (#run toupper(97)/tolower(90) -> 65/122) runs
HANDLED on the VM, output byte-matching legacy. 698/0 both gates.
This commit is contained in:
agra
2026-06-18 18:00:07 +03:00
parent 625ba0fb27
commit e7a8708287
6 changed files with 106 additions and 3 deletions

View File

@@ -352,6 +352,21 @@ when reached (sentinels or accessor fns; see the design doc Risks).
`List` growth; orthogonal, see `current/CHECKPOINT-METATYPE.md`.)
## Log
- **Phase 4D.1 (VM plan) — general host-FFI escape: the VM calls any extern libc fn via dlsym + host_ffi (2026-06-18).**
Replaced the "extern not ported → bail" stub in `Vm.invoke` with `callHostExtern`: resolve the
symbol via `host_ffi.lookupSymbol` (dlsym RTLD_DEFAULT) and dispatch through the `host_ffi`
trampolines, exactly like the legacy `interp.callExtern`. **Marshalling is now trivial because
`Addr` is a real host pointer (4D.0):** every WORD-kind arg passes as `usize` verbatim — a
scalar's bits OR a pointer, no translation — and a pointer return is a valid `Addr`. Picks
`callPtrRet` (void*-ABI) for pointer-ish returns, `callIntRet` (i64-ABI) otherwise; honors
variadic (`is_variadic and args > fixed`). Non-word (aggregate/string/float) args+returns bail
loudly (no silent miscall — 4D.2 adds NUL-term cstring marshalling + float). NOT per-builtin: ONE
general mechanism for all externs. New example `0636-comptime-extern-libc` (`#run toupper(97)`/
`tolower(90)` fold to 65/122) runs **HANDLED on the VM**, output byte-matching legacy. (`abs`
doesn't dlsym-resolve on macOS — a compiler builtin — and the VM fails identically to legacy,
confirming parity.) **698/0 BOTH gates** (one new example). On `reify`. **Next (4D.2):**
string/aggregate extern args (string→NUL-term cstring) + float args/returns, then `compiler_call`
(#compiler hooks, 4D.3).
- **Phase 4D.0 (VM plan) — comptime VM memory = an ARENA of stable host allocations; `Addr` = real host pointer (2026-06-18).**
Replaced the growable `ArrayList(u8)` flat buffer (which reallocs/MOVES on growth) with a
`std.heap.ArenaAllocator`: each `allocBytes` is a separate arena allocation that never moves and

View File

@@ -0,0 +1,17 @@
// Comptime host-FFI: a `#run` that calls real libc functions (`toupper`/`tolower`)
// at compile time. The comptime VM resolves the symbol via dlsym and dispatches
// through the host_ffi trampolines (Phase 4D) — a scalar arg in, a scalar return
// out — folding the result into a constant. (The legacy interpreter does the same
// via its own dlsym path; both agree.)
#import "modules/std.sx";
toupper :: (c: i32) -> i32 extern libc;
tolower :: (c: i32) -> i32 extern libc;
UP :: #run toupper(97); // 'a' -> 'A' = 65
LO :: #run tolower(90); // 'Z' -> 'z' = 122
main :: () -> i32 {
print("toupper(97)={} tolower(90)={}\n", UP, LO);
return 0;
}

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1 @@
toupper(97)=65 tolower(90)=122

View File

@@ -27,6 +27,7 @@ const inst_mod = @import("inst.zig");
const types = @import("types.zig");
const mod_mod = @import("module.zig");
const interp_mod = @import("interp.zig");
const host_ffi = @import("host_ffi.zig");
const Value = interp_mod.Value;
const Inst = inst_mod.Inst;
const Ref = inst_mod.Ref;
@@ -1029,9 +1030,12 @@ pub const Vm = struct {
if (callee.compiler_welded) {
if (try self.callCompilerFn(name, args, frame, ref_types)) |r| return r;
}
// Any other extern bails → the legacy interpreter's dlsym path.
self.detail = "comptime VM: call to an extern/builtin function not yet ported";
return error.Unsupported;
// General host-FFI escape: any other extern resolves via dlsym and is
// dispatched through the host_ffi trampolines. Because `Addr` is a real
// host pointer, args pass as `usize` untouched (a scalar's bits OR a
// pointer) and a pointer return comes back as a valid `Addr` — no
// translation. Aggregate/float args+returns aren't marshaled yet (4D.2).
return self.callHostExtern(callee, name, args, frame, ref_types);
}
const argbuf = self.gpa.alloc(Reg, args.len) catch @panic("comptime VM: out of memory (call args)");
defer self.gpa.free(argbuf);
@@ -1041,6 +1045,57 @@ pub const Vm = struct {
return self.run(callee, argbuf);
}
/// Call a real extern (libc / host) function via dlsym + the `host_ffi`
/// trampolines — the comptime VM's host-FFI escape (the legacy `interp.callExtern`
/// equivalent). Marshalling is trivial here because `Addr` is already a host
/// pointer: every WORD-kind arg (scalar OR pointer) passes as `usize` verbatim,
/// and a pointer return is a valid `Addr`. Non-word (aggregate/string/float)
/// args+returns bail loudly (4D.2 adds them) — never a silent miscall.
fn callHostExtern(self: *Vm, callee: *const Function, name: []const u8, args: []const Ref, frame: *Frame, ref_types: []const TypeId) Error!Reg {
const table = try self.requireTable();
if (args.len > 8) return self.failMsg("comptime extern call: more than 8 args (host_ffi trampolines max out at 8)");
const symbol = (host_ffi.lookupSymbol(self.gpa, name) catch return self.failMsg("comptime extern call: dlsym error looking up symbol")) orelse
return self.failMsg("comptime extern call: symbol not found via dlsym (target-specific binding called at compile time?)");
var packed_args: [8]usize = undefined;
for (args, 0..) |a, i| {
const aty = try self.refTy(ref_types, a);
if (kindOf(table, aty) != .word)
return self.failMsg("comptime extern call: non-word (aggregate/string/float) arg not yet marshaled on the VM");
packed_args[i] = @intCast(frame.get(a.index())); // scalar bits OR host pointer
}
const argv = packed_args[0..args.len];
const fixed = callee.params.len;
const variadic = callee.is_variadic and args.len > fixed;
const ret = callee.ret;
if (ret == .void or ret == .noreturn) {
if (variadic)
host_ffi.callVoidRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (void)")
else
host_ffi.callVoidRet(symbol, argv) catch return self.failMsg("comptime extern call failed (void)");
return @as(Reg, 0);
}
if (kindOf(table, ret) != .word)
return self.failMsg("comptime extern call: non-word (aggregate/string/float) return not yet supported on the VM");
// A pointer-ish return goes through callPtrRet (void* ABI); an integer-ish
// return through callIntRet (i64 ABI). Either way the result is a single
// word — a returned pointer is already a valid absolute `Addr`.
const r: u64 = if (isPointerish(table, ret)) blk: {
break :blk if (variadic)
host_ffi.callPtrRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (ptr)")
else
host_ffi.callPtrRet(symbol, argv) catch return self.failMsg("comptime extern call failed (ptr)");
} else blk: {
const v = if (variadic)
host_ffi.callIntRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (int)")
else
host_ffi.callIntRet(symbol, argv) catch return self.failMsg("comptime extern call failed (int)");
break :blk @bitCast(v);
};
return @as(Reg, r);
}
/// Largest single comptime allocation the VM will service natively. A bogus /
/// pathological comptime `malloc` above this bails to the legacy path (which
/// calls real libc) rather than OOM-panicking the compiler via `allocBytes`.
@@ -1723,6 +1778,19 @@ pub const Vm = struct {
};
}
/// A pointer-shaped (word) type — picks the `void*`-ABI extern-return trampoline
/// (`callPtrRet`) over the `i64`-ABI one. `cstring` plus any `pointer` /
/// `many_pointer` / `function`; a non-pointer optional folds to its child word.
fn isPointerish(table: *const types.TypeTable, ty: TypeId) bool {
if (ty == .cstring) return true;
if (ty.isBuiltin()) return false;
return switch (table.get(ty)) {
.pointer, .many_pointer, .function => true,
.optional => |o| optChildIsPtr(table, o.child),
else => false,
};
}
/// A `?T` whose child is a pointer/many-pointer/function is represented as a
/// bare pointer (null == 0), not a `{T, i1}` aggregate — mirrors `typeSizeBytes`.
fn optChildIsPtr(table: *const types.TypeTable, child: TypeId) bool {