From e7a87082877b51f3e1326af0c9cf313ae5d15f09 Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 18 Jun 2026 18:00:07 +0300 Subject: [PATCH] =?UTF-8?q?comptime=20VM:=20general=20host-FFI=20escape=20?= =?UTF-8?q?=E2=80=94=20call=20any=20extern=20libc=20fn=20via=20dlsym=20+?= =?UTF-8?q?=20host=5Fffi=20(Phase=204D.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the "extern not ported -> bail" stub in Vm.invoke with callHostExtern: resolve the symbol via host_ffi.lookupSymbol (dlsym RTLD_DEFAULT) and dispatch through the host_ffi trampolines, like the legacy interp.callExtern. Marshalling is trivial now that Addr is a real host pointer (4D.0): every WORD-kind arg passes as usize verbatim (a scalar's bits OR a pointer, no translation), and a pointer return is a valid Addr. Picks callPtrRet (void*-ABI) for pointer-ish returns, callIntRet (i64-ABI) otherwise; honors variadic. Non-word (aggregate/string/float) args+returns bail loudly (4D.2 adds them). One general mechanism for all externs, not per-builtin special cases. New example 0636-comptime-extern-libc (#run toupper(97)/tolower(90) -> 65/122) runs HANDLED on the VM, output byte-matching legacy. 698/0 both gates. --- current/CHECKPOINT-COMPILER-API.md | 15 ++++ examples/0636-comptime-extern-libc.sx | 17 +++++ .../expected/0636-comptime-extern-libc.exit | 1 + .../expected/0636-comptime-extern-libc.stderr | 1 + .../expected/0636-comptime-extern-libc.stdout | 1 + src/ir/comptime_vm.zig | 74 ++++++++++++++++++- 6 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 examples/0636-comptime-extern-libc.sx create mode 100644 examples/expected/0636-comptime-extern-libc.exit create mode 100644 examples/expected/0636-comptime-extern-libc.stderr create mode 100644 examples/expected/0636-comptime-extern-libc.stdout diff --git a/current/CHECKPOINT-COMPILER-API.md b/current/CHECKPOINT-COMPILER-API.md index eea1144d..37757205 100644 --- a/current/CHECKPOINT-COMPILER-API.md +++ b/current/CHECKPOINT-COMPILER-API.md @@ -352,6 +352,21 @@ when reached (sentinels or accessor fns; see the design doc Risks). `List` growth; orthogonal, see `current/CHECKPOINT-METATYPE.md`.) ## Log +- **Phase 4D.1 (VM plan) — general host-FFI escape: the VM calls any extern libc fn via dlsym + host_ffi (2026-06-18).** + Replaced the "extern not ported → bail" stub in `Vm.invoke` with `callHostExtern`: resolve the + symbol via `host_ffi.lookupSymbol` (dlsym RTLD_DEFAULT) and dispatch through the `host_ffi` + trampolines, exactly like the legacy `interp.callExtern`. **Marshalling is now trivial because + `Addr` is a real host pointer (4D.0):** every WORD-kind arg passes as `usize` verbatim — a + scalar's bits OR a pointer, no translation — and a pointer return is a valid `Addr`. Picks + `callPtrRet` (void*-ABI) for pointer-ish returns, `callIntRet` (i64-ABI) otherwise; honors + variadic (`is_variadic and args > fixed`). Non-word (aggregate/string/float) args+returns bail + loudly (no silent miscall — 4D.2 adds NUL-term cstring marshalling + float). NOT per-builtin: ONE + general mechanism for all externs. New example `0636-comptime-extern-libc` (`#run toupper(97)`/ + `tolower(90)` fold to 65/122) runs **HANDLED on the VM**, output byte-matching legacy. (`abs` + doesn't dlsym-resolve on macOS — a compiler builtin — and the VM fails identically to legacy, + confirming parity.) **698/0 BOTH gates** (one new example). On `reify`. **Next (4D.2):** + string/aggregate extern args (string→NUL-term cstring) + float args/returns, then `compiler_call` + (#compiler hooks, 4D.3). - **Phase 4D.0 (VM plan) — comptime VM memory = an ARENA of stable host allocations; `Addr` = real host pointer (2026-06-18).** Replaced the growable `ArrayList(u8)` flat buffer (which reallocs/MOVES on growth) with a `std.heap.ArenaAllocator`: each `allocBytes` is a separate arena allocation that never moves and diff --git a/examples/0636-comptime-extern-libc.sx b/examples/0636-comptime-extern-libc.sx new file mode 100644 index 00000000..7bf419e5 --- /dev/null +++ b/examples/0636-comptime-extern-libc.sx @@ -0,0 +1,17 @@ +// Comptime host-FFI: a `#run` that calls real libc functions (`toupper`/`tolower`) +// at compile time. The comptime VM resolves the symbol via dlsym and dispatches +// through the host_ffi trampolines (Phase 4D) — a scalar arg in, a scalar return +// out — folding the result into a constant. (The legacy interpreter does the same +// via its own dlsym path; both agree.) +#import "modules/std.sx"; + +toupper :: (c: i32) -> i32 extern libc; +tolower :: (c: i32) -> i32 extern libc; + +UP :: #run toupper(97); // 'a' -> 'A' = 65 +LO :: #run tolower(90); // 'Z' -> 'z' = 122 + +main :: () -> i32 { + print("toupper(97)={} tolower(90)={}\n", UP, LO); + return 0; +} diff --git a/examples/expected/0636-comptime-extern-libc.exit b/examples/expected/0636-comptime-extern-libc.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/expected/0636-comptime-extern-libc.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0636-comptime-extern-libc.stderr b/examples/expected/0636-comptime-extern-libc.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/expected/0636-comptime-extern-libc.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0636-comptime-extern-libc.stdout b/examples/expected/0636-comptime-extern-libc.stdout new file mode 100644 index 00000000..a2619de4 --- /dev/null +++ b/examples/expected/0636-comptime-extern-libc.stdout @@ -0,0 +1 @@ +toupper(97)=65 tolower(90)=122 diff --git a/src/ir/comptime_vm.zig b/src/ir/comptime_vm.zig index 75044171..f99f2bb1 100644 --- a/src/ir/comptime_vm.zig +++ b/src/ir/comptime_vm.zig @@ -27,6 +27,7 @@ const inst_mod = @import("inst.zig"); const types = @import("types.zig"); const mod_mod = @import("module.zig"); const interp_mod = @import("interp.zig"); +const host_ffi = @import("host_ffi.zig"); const Value = interp_mod.Value; const Inst = inst_mod.Inst; const Ref = inst_mod.Ref; @@ -1029,9 +1030,12 @@ pub const Vm = struct { if (callee.compiler_welded) { if (try self.callCompilerFn(name, args, frame, ref_types)) |r| return r; } - // Any other extern bails → the legacy interpreter's dlsym path. - self.detail = "comptime VM: call to an extern/builtin function not yet ported"; - return error.Unsupported; + // General host-FFI escape: any other extern resolves via dlsym and is + // dispatched through the host_ffi trampolines. Because `Addr` is a real + // host pointer, args pass as `usize` untouched (a scalar's bits OR a + // pointer) and a pointer return comes back as a valid `Addr` — no + // translation. Aggregate/float args+returns aren't marshaled yet (4D.2). + return self.callHostExtern(callee, name, args, frame, ref_types); } const argbuf = self.gpa.alloc(Reg, args.len) catch @panic("comptime VM: out of memory (call args)"); defer self.gpa.free(argbuf); @@ -1041,6 +1045,57 @@ pub const Vm = struct { return self.run(callee, argbuf); } + /// Call a real extern (libc / host) function via dlsym + the `host_ffi` + /// trampolines — the comptime VM's host-FFI escape (the legacy `interp.callExtern` + /// equivalent). Marshalling is trivial here because `Addr` is already a host + /// pointer: every WORD-kind arg (scalar OR pointer) passes as `usize` verbatim, + /// and a pointer return is a valid `Addr`. Non-word (aggregate/string/float) + /// args+returns bail loudly (4D.2 adds them) — never a silent miscall. + fn callHostExtern(self: *Vm, callee: *const Function, name: []const u8, args: []const Ref, frame: *Frame, ref_types: []const TypeId) Error!Reg { + const table = try self.requireTable(); + if (args.len > 8) return self.failMsg("comptime extern call: more than 8 args (host_ffi trampolines max out at 8)"); + const symbol = (host_ffi.lookupSymbol(self.gpa, name) catch return self.failMsg("comptime extern call: dlsym error looking up symbol")) orelse + return self.failMsg("comptime extern call: symbol not found via dlsym (target-specific binding called at compile time?)"); + + var packed_args: [8]usize = undefined; + for (args, 0..) |a, i| { + const aty = try self.refTy(ref_types, a); + if (kindOf(table, aty) != .word) + return self.failMsg("comptime extern call: non-word (aggregate/string/float) arg not yet marshaled on the VM"); + packed_args[i] = @intCast(frame.get(a.index())); // scalar bits OR host pointer + } + const argv = packed_args[0..args.len]; + const fixed = callee.params.len; + const variadic = callee.is_variadic and args.len > fixed; + const ret = callee.ret; + + if (ret == .void or ret == .noreturn) { + if (variadic) + host_ffi.callVoidRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (void)") + else + host_ffi.callVoidRet(symbol, argv) catch return self.failMsg("comptime extern call failed (void)"); + return @as(Reg, 0); + } + if (kindOf(table, ret) != .word) + return self.failMsg("comptime extern call: non-word (aggregate/string/float) return not yet supported on the VM"); + // A pointer-ish return goes through callPtrRet (void* ABI); an integer-ish + // return through callIntRet (i64 ABI). Either way the result is a single + // word — a returned pointer is already a valid absolute `Addr`. + const r: u64 = if (isPointerish(table, ret)) blk: { + break :blk if (variadic) + host_ffi.callPtrRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (ptr)") + else + host_ffi.callPtrRet(symbol, argv) catch return self.failMsg("comptime extern call failed (ptr)"); + } else blk: { + const v = if (variadic) + host_ffi.callIntRetVar(symbol, fixed, argv) catch return self.failMsg("comptime extern call failed (int)") + else + host_ffi.callIntRet(symbol, argv) catch return self.failMsg("comptime extern call failed (int)"); + break :blk @bitCast(v); + }; + return @as(Reg, r); + } + /// Largest single comptime allocation the VM will service natively. A bogus / /// pathological comptime `malloc` above this bails to the legacy path (which /// calls real libc) rather than OOM-panicking the compiler via `allocBytes`. @@ -1723,6 +1778,19 @@ pub const Vm = struct { }; } + /// A pointer-shaped (word) type — picks the `void*`-ABI extern-return trampoline + /// (`callPtrRet`) over the `i64`-ABI one. `cstring` plus any `pointer` / + /// `many_pointer` / `function`; a non-pointer optional folds to its child word. + fn isPointerish(table: *const types.TypeTable, ty: TypeId) bool { + if (ty == .cstring) return true; + if (ty.isBuiltin()) return false; + return switch (table.get(ty)) { + .pointer, .many_pointer, .function => true, + .optional => |o| optChildIsPtr(table, o.child), + else => false, + }; + } + /// A `?T` whose child is a pointer/many-pointer/function is represented as a /// bare pointer (null == 0), not a `{T, i1}` aggregate — mirrors `typeSizeBytes`. fn optChildIsPtr(table: *const types.TypeTable, child: TypeId) bool {