comptime VM: general host-FFI escape — call any extern libc fn via dlsym + host_ffi (Phase 4D.1)

Replace the "extern not ported -> bail" stub in Vm.invoke with callHostExtern:
resolve the symbol via host_ffi.lookupSymbol (dlsym RTLD_DEFAULT) and dispatch
through the host_ffi trampolines, like the legacy interp.callExtern.

Marshalling is trivial now that Addr is a real host pointer (4D.0): every WORD-kind
arg passes as usize verbatim (a scalar's bits OR a pointer, no translation), and a
pointer return is a valid Addr. Picks callPtrRet (void*-ABI) for pointer-ish
returns, callIntRet (i64-ABI) otherwise; honors variadic. Non-word
(aggregate/string/float) args+returns bail loudly (4D.2 adds them). One general
mechanism for all externs, not per-builtin special cases.

New example 0636-comptime-extern-libc (#run toupper(97)/tolower(90) -> 65/122) runs
HANDLED on the VM, output byte-matching legacy. 698/0 both gates.
This commit is contained in:
agra
2026-06-18 18:00:07 +03:00
parent 625ba0fb27
commit e7a8708287
6 changed files with 106 additions and 3 deletions

View File

@@ -27,6 +27,7 @@ const inst_mod = @import("inst.zig");
const types = @import("types.zig");
const mod_mod = @import("module.zig");
const interp_mod = @import("interp.zig");
const host_ffi = @import("host_ffi.zig");
const Value = interp_mod.Value;
const Inst = inst_mod.Inst;
const Ref = inst_mod.Ref;
@@ -1029,9 +1030,12 @@ pub const Vm = struct {
if (callee.compiler_welded) {
if (try self.callCompilerFn(name, args, frame, ref_types)) |r| return r;
}
// Any other extern bails → the legacy interpreter's dlsym path.
self.detail = "comptime VM: call to an extern/builtin function not yet ported";
return error.Unsupported;
// General host-FFI escape: any other extern resolves via dlsym and is
// dispatched through the host_ffi trampolines. Because `Addr` is a real
// host pointer, args pass as `usize` untouched (a scalar's bits OR a
// pointer) and a pointer return comes back as a valid `Addr` — no
// translation. Aggregate/float args+returns aren't marshaled yet (4D.2).
return self.callHostExtern(callee, name, args, frame, ref_types);
}
const argbuf = self.gpa.alloc(Reg, args.len) catch @panic("comptime VM: out of memory (call args)");
defer self.gpa.free(argbuf);
@@ -1041,6 +1045,57 @@ pub const Vm = struct {
return self.run(callee, argbuf);
}
/// Call a real extern (libc / host) function via dlsym + the `host_ffi`
/// trampolines — the comptime VM's host-FFI escape (the legacy `interp.callExtern`
/// equivalent). Marshalling is trivial here because `Addr` is already a host
/// pointer: every WORD-kind arg (scalar OR pointer) passes as `usize` verbatim,
/// and a pointer return is a valid `Addr`. Non-word (aggregate/string/float)
/// args+returns bail loudly (4D.2 adds them) — never a silent miscall.
fn callHostExtern(self: *Vm, callee: *const Function, name: []const u8, args: []const Ref, frame: *Frame, ref_types: []const TypeId) Error!Reg {
const table = try self.requireTable();
if (args.len > 8) return self.failMsg("comptime extern call: more than 8 args (host_ffi trampolines max out at 8)");
const symbol = (host_ffi.lookupSymbol(self.gpa, name) catch return self.failMsg("comptime extern call: dlsym error looking up symbol")) orelse
return self.failMsg("comptime extern call: symbol not found via dlsym (target-specific binding called at compile time?)");
var packed_args: [8]usize = undefined;
for (args, 0..) |a, i| {
const aty = try self.refTy(ref_types, a);
if (kindOf(table, aty) != .word)
return self.failMsg("comptime extern call: non-word (aggregate/string/float) arg not yet marshaled on the VM");
packed_args[i] = @intCast(frame.get(a.index())); // scalar bits OR host pointer
}
const argv = packed_args[0..args.len];
const fixed = callee.params.len;
const variadic = callee.is_variadic and args.len > fixed;
const ret = callee.ret;
if (ret == .void or ret == .noreturn) {
if (variadic)
host_ffi.callVoidRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (void)")
else
host_ffi.callVoidRet(symbol, argv) catch return self.failMsg("comptime extern call failed (void)");
return @as(Reg, 0);
}
if (kindOf(table, ret) != .word)
return self.failMsg("comptime extern call: non-word (aggregate/string/float) return not yet supported on the VM");
// A pointer-ish return goes through callPtrRet (void* ABI); an integer-ish
// return through callIntRet (i64 ABI). Either way the result is a single
// word — a returned pointer is already a valid absolute `Addr`.
const r: u64 = if (isPointerish(table, ret)) blk: {
break :blk if (variadic)
host_ffi.callPtrRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (ptr)")
else
host_ffi.callPtrRet(symbol, argv) catch return self.failMsg("comptime extern call failed (ptr)");
} else blk: {
const v = if (variadic)
host_ffi.callIntRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (int)")
else
host_ffi.callIntRet(symbol, argv) catch return self.failMsg("comptime extern call failed (int)");
break :blk @bitCast(v);
};
return @as(Reg, r);
}
/// Largest single comptime allocation the VM will service natively. A bogus /
/// pathological comptime `malloc` above this bails to the legacy path (which
/// calls real libc) rather than OOM-panicking the compiler via `allocBytes`.
@@ -1723,6 +1778,19 @@ pub const Vm = struct {
};
}
/// A pointer-shaped (word) type — picks the `void*`-ABI extern-return trampoline
/// (`callPtrRet`) over the `i64`-ABI one. `cstring` plus any `pointer` /
/// `many_pointer` / `function`; a non-pointer optional folds to its child word.
fn isPointerish(table: *const types.TypeTable, ty: TypeId) bool {
if (ty == .cstring) return true;
if (ty.isBuiltin()) return false;
return switch (table.get(ty)) {
.pointer, .many_pointer, .function => true,
.optional => |o| optChildIsPtr(table, o.child),
else => false,
};
}
/// A `?T` whose child is a pointer/many-pointer/function is represented as a
/// bare pointer (null == 0), not a `{T, i1}` aggregate — mirrors `typeSizeBytes`.
fn optChildIsPtr(table: *const types.TypeTable, child: TypeId) bool {