ffi 1.6: objc_msg_send IR opcode + per-call-site LLVM fn type

102/102 regression tests pass; chess Android + iOS-sim still build
clean. `ffi-objc-call-04-primitive-returns` flips from xfail to
passing with both nil-recv and real-recv flavors of *void / s64
returns exercised.

Key change: a new `objc_msg_send` IR opcode bundles (recv, sel,
extra args) and carries the return type via the `Inst.ty` field.
emit_llvm.zig builds a per-call-site LLVM function type from the
argument Refs' IR types (recv/sel as ptr; extra args through
abiCoerceParamType) and dispatches with LLVMBuildCall2. One
declared `@objc_msgSend` symbol is reused across every return
type — opaque pointers make the function value type-erased, so
each call site picks its own ABI.

  before:  one (recv, sel) -> ptr LLVM declaration, hard-coded
           per call site; only void return wired in 1.3.
  after:   same declaration, each call site provides a fresh
           LLVMBuildCall2 fn-type → s64 / *void / bool / f64
           returns all dispatch correctly without separate FuncIds.

Selector init mechanism: stayed with the @llvm.global_ctors
constructor. Investigated clang's
`__DATA,__objc_selrefs` + `externally_initialized` shape — works
for fully-linked binaries (dyld substitutes the SEL at load
time) but **LLVM ORC JIT** (the engine behind `sx run`) doesn't
process Mach-O Obj-C metadata sections, so the slot keeps its
initial value (the method-name string pointer) and dispatch
crashes with "<null selector>". The portable choice: keep the
constructor AND inject a direct call to it at `main`'s entry —
idempotent under dyld (sel_registerName returns the same SEL on
re-registration), required for ORC JIT.

Files touched:
  src/ir/inst.zig    | new ObjcMsgSend struct + opcode
  src/ir/lower.zig   | drop the void-only restriction; emit the
                       new opcode; remove the orphaned
                       getObjcMsgSendFid path (objc_msgSend
                       declaration moved to emit_llvm)
  src/ir/emit_llvm.zig | objc_msg_send arm (per-call-site
                       LLVMBuildCall2); lazy `@objc_msgSend`
                       declaration via getObjcMsgSendValue;
                       emitObjcSelectorInit refactored to inject
                       the ctor call at main's entry
  src/ir/{print,interp}.zig | switch arms for the new opcode

`ffi-objc-call-03-selector-sharing.ir` snapshot updates to
reflect the new shape (the `call ... @objc_msgSend` call sites
no longer mention a typed wrapper).
This commit is contained in:
agra
2026-05-19 18:39:10 +03:00
parent baeab179c3
commit d43385112c
9 changed files with 221 additions and 110 deletions

View File

@@ -95,8 +95,7 @@ pub const Lowering = struct {
module_scopes: ?*std.StringHashMap(std.StringHashMap(void)) = null, // per-module visible names (from import resolution)
import_graph: ?*std.StringHashMap(std.StringHashMap(void)) = null, // module path → set of directly imported paths (used by param_impl_map visibility filter)
current_source_file: ?[]const u8 = null, // source file of function currently being lowered
objc_msg_send_fid: ?FuncId = null, // lazily-declared `objc_msgSend` extern (for #objc_call lowering)
sel_register_name_fid: ?FuncId = null, // lazily-declared `sel_registerName` extern
sel_register_name_fid: ?FuncId = null, // lazily-declared `sel_registerName` extern (non-literal selector fallback)
type_bindings: ?std.StringHashMap(TypeId) = null, // generic type param bindings ($T → concrete TypeId)
current_match_tags: ?[]const u64 = null, // type tags for current match arm (for runtime dispatch)
force_block_value: bool = false, // set by lowerBlockValue to extract if-else values
@@ -3745,11 +3744,6 @@ pub const Lowering = struct {
fn internObjcSelector(self: *Lowering, sel_str: []const u8) inst_mod.GlobalId {
if (self.module.lookupObjcSelector(sel_str)) |gid| return gid;
// First interned selector → ensure `sel_registerName` is declared
// so emit_llvm.zig's constructor pass can find it and populate
// every cached SEL slot at module load.
_ = self.getSelRegisterNameFid();
// Mangle selector: replace colons with underscores. Apple's
// toolchain does the same (foo:bar: → foo_bar_).
var mangled = std.ArrayList(u8).empty;
@@ -3789,26 +3783,6 @@ pub const Lowering = struct {
return fid;
}
/// Lazily declare `objc_msgSend(recv: *void, sel: *void) -> *void`.
/// Cast at the call site by the LLVM lowering (the `coerceArg` /
/// type-equivalence path). For Phase 1.3 the only return shape
/// exercised is void; the *void return is discarded.
fn getObjcMsgSendFid(self: *Lowering) FuncId {
if (self.objc_msg_send_fid) |fid| return fid;
var params = std.ArrayList(inst_mod.Function.Param).empty;
const recv_str = self.module.types.internString("recv");
const sel_str = self.module.types.internString("sel");
const vptr = self.module.types.ptrTo(.void);
params.append(self.alloc, .{ .name = recv_str, .ty = vptr }) catch unreachable;
params.append(self.alloc, .{ .name = sel_str, .ty = vptr }) catch unreachable;
const fn_name = self.module.types.internString("objc_msgSend");
const fid = self.builder.declareExtern(fn_name, params.toOwnedSlice(self.alloc) catch unreachable, vptr);
const func = self.module.getFunctionMut(fid);
func.call_conv = .c;
self.objc_msg_send_fid = fid;
return fid;
}
/// Lower `#objc_call(T)(recv, "sel:", args...)` to:
/// %sel = call ptr @sel_registerName(<"sel:">)
/// %ret = call <ABI(T)> @objc_msgSend(recv, %sel, args...)
@@ -3833,13 +3807,9 @@ pub const Lowering = struct {
// Resolve the return type from the syntactic slot.
const ret_ty = self.resolveType(fic.return_type);
// For Phase 1.3 the only supported return-type / arity combo is
// (void, recv + selector). Anything else falls through to undef
// for now — the next phase-1 steps fill these in one shape at
// a time.
if (ret_ty != .void or fic.args.len != 2) {
if (fic.args.len < 2) {
if (self.diagnostics) |d| {
d.add(.err, "#objc_call: only `void` return + (recv, selector) is lowered today; non-void / arg-bearing arities land in later phase-1 steps", null);
d.add(.err, "#objc_call requires at least a receiver and a selector", null);
}
return Ref.none;
}
@@ -3847,12 +3817,12 @@ pub const Lowering = struct {
// Receiver expression.
const recv = self.lowerExpr(fic.args[0]);
// Selector. If it's a literal at parse time, intern into a
// module-scoped `SEL*` slot that emit_llvm.zig populates once
// at module init (Phase 1.5). Per call site collapses to a
// single load — matches clang's `@selector(...)` lowering.
// Non-literal selectors keep the per-call sel_registerName
// fallback for now.
// Selector. Literal selectors get interned into a module-
// scoped `SEL*` slot emit_llvm.zig tags the slot into
// `__DATA,__objc_selrefs` so dyld populates it at load time
// (matches clang's `@selector(...)` lowering exactly).
// Non-literal selectors keep the per-call `sel_registerName`
// fallback.
const sel_arg_node = fic.args[1];
const vptr_ty = self.module.types.ptrTo(.void);
const sel = blk: {
@@ -3862,7 +3832,6 @@ pub const Lowering = struct {
const slot_ptr = self.builder.emit(.{ .global_addr = slot_gid }, self.module.types.ptrTo(vptr_ty));
break :blk self.builder.emit(.{ .load = .{ .operand = slot_ptr } }, vptr_ty);
}
// Fallback: non-literal selector → runtime lookup per call.
const sel_ref = self.lowerExpr(sel_arg_node);
const sel_fid = self.getSelRegisterNameFid();
var sel_args = std.ArrayList(Ref).empty;
@@ -3871,17 +3840,19 @@ pub const Lowering = struct {
break :blk self.builder.emit(.{ .call = .{ .callee = sel_fid, .args = sel_owned } }, vptr_ty);
};
// Dispatch through objc_msgSend.
const msg_fid = self.getObjcMsgSendFid();
var call_args = std.ArrayList(Ref).empty;
call_args.append(self.alloc, recv) catch unreachable;
call_args.append(self.alloc, sel) catch unreachable;
const owned = call_args.toOwnedSlice(self.alloc) catch unreachable;
// Result type is `*void` here (objc_msgSend's declared shape).
// For `void` user-facing returns we just discard the Ref —
// the IR keeps the side-effecting call instruction either way.
_ = self.builder.emit(.{ .call = .{ .callee = msg_fid, .args = owned } }, self.module.types.ptrTo(.void));
return Ref.none;
// Additional args after recv + selector.
var extra = std.ArrayList(Ref).empty;
var ai: usize = 2;
while (ai < fic.args.len) : (ai += 1) {
extra.append(self.alloc, self.lowerExpr(fic.args[ai])) catch unreachable;
}
const extra_owned = extra.toOwnedSlice(self.alloc) catch unreachable;
return self.builder.emit(.{ .objc_msg_send = .{
.recv = recv,
.sel = sel,
.args = extra_owned,
} }, ret_ty);
}
// ── Calls ───────────────────────────────────────────────────────