From b8a412ddc7aea7f845e9f290d655a9ad9482f924 Mon Sep 17 00:00:00 2001 From: agra Date: Tue, 19 May 2026 13:09:34 +0300 Subject: [PATCH] =?UTF-8?q?ffi=201.5:=20intern=20Obj-C=20selectors=20?= =?UTF-8?q?=E2=80=94=20one=20static=20SEL=20slot=20per=20unique=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 101/101 regression tests pass; the IR snapshot for the selector- sharing test diff flips from four per-call `sel_registerName` calls to two (one per unique selector) routed through a module-init constructor — matching what clang emits for `@selector(...)`. Hot-path cost collapses from a libobjc hashtable lookup per call to a single load of a static `SEL*` slot: Before (Phase 1.3): %sel = call ptr @sel_registerName(<"init">) call ptr @objc_msgSend(, %sel) After (Phase 1.5): %sel = load ptr, ptr @OBJC_SELECTOR_REFERENCES_init call ptr @objc_msgSend(, %sel) + @OBJC_SELECTOR_REFERENCES_init = internal global ptr null + @OBJC_SELECTOR_REFERENCES_release = internal global ptr null + define internal void @__sx_objc_selector_init() { + %sel = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_) + store ptr %sel, ptr @OBJC_SELECTOR_REFERENCES_init + %sel1 = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_.2) + store ptr %sel1, ptr @OBJC_SELECTOR_REFERENCES_release + ret void + } + @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] + [{ ..., ptr @__sx_objc_selector_init, ptr null }] Implementation: module.zig | new `objc_selector_cache: ArrayList(ObjcSelectorEntry)` with `lookupObjcSelector` / `appendObjcSelector`. List (not hashmap) keeps emit order stable across builds so the IR snapshot doesn't flicker on rehash. lower.zig | `internObjcSelector(sel)` creates the slot on first use, returns the same `GlobalId` on every subsequent call to the same selector. lowerFfiIntrinsicCall now emits `global_addr + load` for literal selectors. Non-literal selectors keep the `sel_registerName` fallback. Declaring `sel_registerName` lazily on first intern so emit_llvm finds it for the constructor body. emit_llvm.zig | new `emitObjcSelectorInit` pass synthesizes a void constructor that loops over the cache, calls `sel_registerName` for each unique selector string, stores the result in the slot. Constructor is registered in `@llvm.global_ctors` with default priority (65535) so dyld runs it before main. The `@OBJC_METH_VAR_NAME_` private string globals and unnamed-addr flag match clang's exact emission shape — picked up by the system linker into the right Mach-O sections on macOS / iOS. Chess Android + iOS-sim still build clean (no `#objc_call` in chess yet — phase-3 migration will start exercising this). --- src/ir/emit_llvm.zig | 81 +++++++++++++++++++ src/ir/lower.zig | 68 +++++++++++++--- src/ir/module.zig | 25 ++++++ .../ffi-objc-call-03-selector-sharing.ir | 46 ++++++----- 4 files changed, 189 insertions(+), 31 deletions(-) diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index a6805e0..49307fd 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -201,10 +201,91 @@ pub const LLVMEmitter = struct { self.emitFunction(&func, @intCast(i)); } + // Pass 2.5: Emit Obj-C selector init constructor (Phase 1.5). + self.emitObjcSelectorInit(); + // Pass 3: Verify typeSizeBytes matches LLVM's ABI sizes self.verifySizes(); } + /// Synthesize a module constructor that populates each interned + /// Obj-C selector slot via `sel_registerName`, once at module load. + /// Registered in `@llvm.global_ctors` so dyld / ld.so runs it + /// before main. Per `#objc_call` site collapses to a single load + /// from the slot — matches clang's `@selector(...)` lowering. + fn emitObjcSelectorInit(self: *LLVMEmitter) void { + if (self.ir_mod.objc_selector_cache.items.len == 0) return; + + // Look up the `sel_registerName` extern that the lowerer already + // declared. If for some reason it's absent (shouldn't happen — + // every interned selector got there via the same lowering path), + // bail out and let the per-call fallback run. + const sel_reg_name = "sel_registerName"; + const sel_reg_z = self.alloc.dupeZ(u8, sel_reg_name) catch unreachable; + defer self.alloc.free(sel_reg_z); + const sel_reg_fn = c.LLVMGetNamedFunction(self.llvm_module, sel_reg_z.ptr); + if (sel_reg_fn == null) return; + const sel_reg_ty = c.LLVMGlobalGetValueType(sel_reg_fn); + + // Create the constructor: void __sx_objc_selector_init(). + const void_ty = self.cached_void; + var no_params: [0]c.LLVMTypeRef = .{}; + const ctor_ty = c.LLVMFunctionType(void_ty, &no_params, 0, 0); + const ctor = c.LLVMAddFunction(self.llvm_module, "__sx_objc_selector_init", ctor_ty); + c.LLVMSetLinkage(ctor, c.LLVMInternalLinkage); + + const entry = c.LLVMAppendBasicBlockInContext(self.context, ctor, "entry"); + c.LLVMPositionBuilderAtEnd(self.builder, entry); + + // For each (selector_str, slot_global): emit + // %sel = call ptr @sel_registerName(<"selector:">) + // store ptr %sel, ptr @OBJC_SELECTOR_REFERENCES_ + for (self.ir_mod.objc_selector_cache.items) |entry_kv| { + const sel_str = entry_kv.sel; + const slot_gid = entry_kv.slot; + const slot_global = self.global_map.get(@intCast(slot_gid.index())) orelse continue; + + // Selector string constant. Make it private so multiple + // constructors don't clash. `i8` array with NUL terminator. + const sel_str_z = self.alloc.allocSentinel(u8, sel_str.len, 0) catch continue; + defer self.alloc.free(sel_str_z); + @memcpy(sel_str_z[0..sel_str.len], sel_str); + const str_const = c.LLVMConstStringInContext(self.context, sel_str_z.ptr, @intCast(sel_str.len), 0); + const str_global = c.LLVMAddGlobal(self.llvm_module, c.LLVMTypeOf(str_const), "OBJC_METH_VAR_NAME_"); + c.LLVMSetInitializer(str_global, str_const); + c.LLVMSetLinkage(str_global, c.LLVMPrivateLinkage); + c.LLVMSetGlobalConstant(str_global, 1); + c.LLVMSetUnnamedAddress(str_global, c.LLVMGlobalUnnamedAddr); + + var sel_args: [1]c.LLVMValueRef = .{str_global}; + const sel_val = c.LLVMBuildCall2(self.builder, sel_reg_ty, sel_reg_fn, &sel_args, 1, "sel"); + _ = c.LLVMBuildStore(self.builder, sel_val, slot_global); + } + + _ = c.LLVMBuildRetVoid(self.builder); + + // Register in @llvm.global_ctors. Layout per LLVM Language + // Reference: `[N x { i32, void()*, i8* }]`. Priority 65535 = + // default; the third field carries an "associated data" + // pointer (null for our case). + const i32_ty = self.cached_i32; + const ptr_ty = self.cached_ptr; + var ctor_field_types: [3]c.LLVMTypeRef = .{ i32_ty, ptr_ty, ptr_ty }; + const ctor_struct_ty = c.LLVMStructTypeInContext(self.context, &ctor_field_types, 3, 0); + var ctor_fields: [3]c.LLVMValueRef = .{ + c.LLVMConstInt(i32_ty, 65535, 0), + ctor, + c.LLVMConstNull(ptr_ty), + }; + const ctor_entry = c.LLVMConstNamedStruct(ctor_struct_ty, &ctor_fields, 3); + const ctors_arr_ty = c.LLVMArrayType2(ctor_struct_ty, 1); + var ctor_entries: [1]c.LLVMValueRef = .{ctor_entry}; + const ctors_init = c.LLVMConstArray2(ctor_struct_ty, &ctor_entries, 1); + const ctors_global = c.LLVMAddGlobal(self.llvm_module, ctors_arr_ty, "llvm.global_ctors"); + c.LLVMSetInitializer(ctors_global, ctors_init); + c.LLVMSetLinkage(ctors_global, c.LLVMAppendingLinkage); + } + /// Compare IR typeSizeBytes against LLVMABISizeOfType for all user-defined types. fn verifySizes(self: *LLVMEmitter) void { // Skip for wasm32: 4-byte pointers vs IR's assumed 8-byte, diff --git a/src/ir/lower.zig b/src/ir/lower.zig index be0eedf..956a43a 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -3734,6 +3734,43 @@ pub const Lowering = struct { // ── FFI intrinsics (#objc_call / #jni_call / #jni_static_call) ─ + /// Intern an Obj-C selector string into a module-scoped `SEL*` slot. + /// First call creates the global; subsequent calls return the same + /// `GlobalId`. emit_llvm.zig walks `module.objc_selector_cache` and + /// synthesizes a constructor that populates each slot via + /// `sel_registerName` exactly once at module load. + /// + /// Slot name matches clang's convention: `OBJC_SELECTOR_REFERENCES_` + /// with `:` replaced by `_` to keep the symbol name valid. + fn internObjcSelector(self: *Lowering, sel_str: []const u8) inst_mod.GlobalId { + if (self.module.lookupObjcSelector(sel_str)) |gid| return gid; + + // First interned selector → ensure `sel_registerName` is declared + // so emit_llvm.zig's constructor pass can find it and populate + // every cached SEL slot at module load. + _ = self.getSelRegisterNameFid(); + + // Mangle selector: replace colons with underscores. Apple's + // toolchain does the same (foo:bar: → foo_bar_). + var mangled = std.ArrayList(u8).empty; + defer mangled.deinit(self.alloc); + mangled.appendSlice(self.alloc, "OBJC_SELECTOR_REFERENCES_") catch unreachable; + for (sel_str) |ch| { + mangled.append(self.alloc, if (ch == ':') '_' else ch) catch unreachable; + } + const slot_name = self.module.types.internString(mangled.items); + const vptr_ty = self.module.types.ptrTo(.void); + const gid = self.module.addGlobal(.{ + .name = slot_name, + .ty = vptr_ty, + .init_val = .null_val, + .is_extern = false, + .is_const = false, + }); + self.module.appendObjcSelector(sel_str, gid); + return gid; + } + /// Lazily declare `sel_registerName(name: *u8) -> *void` as an extern. /// Cached per Lowering instance so multiple `#objc_call` sites share /// one declaration. @@ -3810,25 +3847,30 @@ pub const Lowering = struct { // Receiver expression. const recv = self.lowerExpr(fic.args[0]); - // Selector must be a literal string at parse time so we can - // intern it (Phase 1.5 will cache the SEL too). For Phase 1.3 - // we accept any expression that lowers to a string Ref. + // Selector. If it's a literal at parse time, intern into a + // module-scoped `SEL*` slot that emit_llvm.zig populates once + // at module init (Phase 1.5). Per call site collapses to a + // single load — matches clang's `@selector(...)` lowering. + // Non-literal selectors keep the per-call sel_registerName + // fallback for now. const sel_arg_node = fic.args[1]; - const sel_ref = blk: { + const vptr_ty = self.module.types.ptrTo(.void); + const sel = blk: { if (sel_arg_node.data == .string_literal) { const raw = sel_arg_node.data.string_literal.raw; - break :blk self.builder.constString(self.module.types.internString(raw)); + const slot_gid = self.internObjcSelector(raw); + const slot_ptr = self.builder.emit(.{ .global_addr = slot_gid }, self.module.types.ptrTo(vptr_ty)); + break :blk self.builder.emit(.{ .load = .{ .operand = slot_ptr } }, vptr_ty); } - break :blk self.lowerExpr(sel_arg_node); + // Fallback: non-literal selector → runtime lookup per call. + const sel_ref = self.lowerExpr(sel_arg_node); + const sel_fid = self.getSelRegisterNameFid(); + var sel_args = std.ArrayList(Ref).empty; + sel_args.append(self.alloc, sel_ref) catch unreachable; + const sel_owned = sel_args.toOwnedSlice(self.alloc) catch unreachable; + break :blk self.builder.emit(.{ .call = .{ .callee = sel_fid, .args = sel_owned } }, vptr_ty); }; - // Resolve selector via the runtime — per call site for now. - const sel_fid = self.getSelRegisterNameFid(); - var sel_args = std.ArrayList(Ref).empty; - sel_args.append(self.alloc, sel_ref) catch unreachable; - const sel_owned = sel_args.toOwnedSlice(self.alloc) catch unreachable; - const sel = self.builder.emit(.{ .call = .{ .callee = sel_fid, .args = sel_owned } }, self.module.types.ptrTo(.void)); - // Dispatch through objc_msgSend. const msg_fid = self.getObjcMsgSendFid(); var call_args = std.ArrayList(Ref).empty; diff --git a/src/ir/module.zig b/src/ir/module.zig index fe92b24..3aebb1c 100644 --- a/src/ir/module.zig +++ b/src/ir/module.zig @@ -26,14 +26,24 @@ pub const Module = struct { globals: std.ArrayList(Global), /// Maps (protocol_ty, concrete_ty) → list of method FuncIds. impl_table: ImplTable, + /// Interned Obj-C selectors. Kept as an insertion-ordered list of + /// (selector_string, slot_GlobalId) so emit_llvm.zig produces the + /// init constructor in a stable order across builds (the + /// selector-sharing IR snapshot would otherwise flicker on + /// hashtable rehash). `#objc_call` lowering uses + /// `lookupObjcSelector` / `appendObjcSelector` to read/write it. + objc_selector_cache: std.ArrayList(ObjcSelectorEntry), alloc: Allocator, + pub const ObjcSelectorEntry = struct { sel: []const u8, slot: GlobalId }; + pub fn init(alloc: Allocator) Module { return .{ .types = TypeTable.init(alloc), .functions = std.ArrayList(Function).empty, .globals = std.ArrayList(Global).empty, .impl_table = ImplTable.init(alloc), + .objc_selector_cache = std.ArrayList(ObjcSelectorEntry).empty, .alloc = alloc, }; } @@ -45,9 +55,24 @@ pub const Module = struct { self.functions.deinit(self.alloc); self.globals.deinit(self.alloc); self.impl_table.deinit(); + self.objc_selector_cache.deinit(self.alloc); self.types.deinit(); } + /// Linear scan — N is the count of UNIQUE selectors per program, + /// not the count of call sites. Real programs hit dozens, not + /// millions; a hashmap would be premature here. + pub fn lookupObjcSelector(self: *const Module, sel: []const u8) ?GlobalId { + for (self.objc_selector_cache.items) |entry| { + if (std.mem.eql(u8, entry.sel, sel)) return entry.slot; + } + return null; + } + + pub fn appendObjcSelector(self: *Module, sel: []const u8, slot: GlobalId) void { + self.objc_selector_cache.append(self.alloc, .{ .sel = sel, .slot = slot }) catch unreachable; + } + pub fn addFunction(self: *Module, func: Function) FuncId { const id = FuncId.fromIndex(@intCast(self.functions.items.len)); self.functions.append(self.alloc, func) catch unreachable; diff --git a/tests/expected/ffi-objc-call-03-selector-sharing.ir b/tests/expected/ffi-objc-call-03-selector-sharing.ir index e1100f2..26fa046 100644 --- a/tests/expected/ffi-objc-call-03-selector-sharing.ir +++ b/tests/expected/ffi-objc-call-03-selector-sharing.ir @@ -3,12 +3,13 @@ @OS = internal global i64 0 @ARCH = internal global i64 0 @POINTER_SIZE = internal global i64 8 -@str = private unnamed_addr constant [5 x i8] c"init\00", align 1 -@str.1 = private unnamed_addr constant [5 x i8] c"init\00", align 1 -@str.2 = private unnamed_addr constant [5 x i8] c"init\00", align 1 -@str.3 = private unnamed_addr constant [8 x i8] c"release\00", align 1 -@str.4 = private unnamed_addr constant [4 x i8] c"ok\0A\00", align 1 -@str.5 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 +@OBJC_SELECTOR_REFERENCES_init = internal global ptr null +@OBJC_SELECTOR_REFERENCES_release = internal global ptr null +@str = private unnamed_addr constant [4 x i8] c"ok\0A\00", align 1 +@str.1 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [5 x i8] c"init\00" +@OBJC_METH_VAR_NAME_.2 = private unnamed_addr constant [8 x i8] c"release\00" +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__sx_objc_selector_init, ptr null }] ; Function Attrs: nounwind declare void @out(ptr) #0 @@ -225,27 +226,27 @@ entry: %siN = insertvalue { { ptr, ptr, ptr }, ptr } undef, { ptr, ptr, ptr } %siN, 0 %siN = insertvalue { { ptr, ptr, ptr }, ptr } %siN, ptr null, 1 store { { ptr, ptr, ptr }, ptr } %siN, ptr @context, align 8 - %call = call ptr @sel_registerName(ptr @str) - %callN = call ptr @objc_msgSend(ptr null, ptr %call) - %callN = call ptr @sel_registerName(ptr @str.1) - %callN = call ptr @objc_msgSend(ptr null, ptr %callN) - %callN = call ptr @sel_registerName(ptr @str.2) - %callN = call ptr @objc_msgSend(ptr null, ptr %callN) - %callN = call ptr @sel_registerName(ptr @str.3) - %callN = call ptr @objc_msgSend(ptr null, ptr %callN) + %load = load ptr, ptr @OBJC_SELECTOR_REFERENCES_init, align 8 + %call = call ptr @objc_msgSend(ptr null, ptr %load) + %loadN = load ptr, ptr @OBJC_SELECTOR_REFERENCES_init, align 8 + %callN = call ptr @objc_msgSend(ptr null, ptr %loadN) + %loadN = load ptr, ptr @OBJC_SELECTOR_REFERENCES_init, align 8 + %callN = call ptr @objc_msgSend(ptr null, ptr %loadN) + %loadN = load ptr, ptr @OBJC_SELECTOR_REFERENCES_release, align 8 + %callN = call ptr @objc_msgSend(ptr null, ptr %loadN) %allocaN = alloca { ptr, i64 }, align 8 %gep = getelementptr inbounds { ptr, i64 }, ptr %allocaN, i32 0, i32 0 store ptr null, ptr %gep, align 8 %gepN = getelementptr inbounds { ptr, i64 }, ptr %allocaN, i32 0, i32 1 store i64 0, ptr %gepN, align 8 %allocaN = alloca { ptr, i64 }, align 8 - store { ptr, i64 } { ptr @str.4, i64 3 }, ptr %allocaN, align 8 + store { ptr, i64 } { ptr @str, i64 3 }, ptr %allocaN, align 8 %allocaN = alloca { ptr, i64 }, align 8 - store { ptr, i64 } { ptr @str.5, i64 0 }, ptr %allocaN, align 8 - %load = load { ptr, i64 }, ptr %allocaN, align 8 + store { ptr, i64 } { ptr @str.1, i64 0 }, ptr %allocaN, align 8 + %loadN = load { ptr, i64 }, ptr %allocaN, align 8 %loadN = load { ptr, i64 }, ptr %allocaN, align 8 %callN = call { ptr, i64 } @substr({ ptr, i64 } %loadN, i64 0, i64 3) - %callN = call { ptr, i64 } @concat({ ptr, i64 } %load, { ptr, i64 } %callN) + %callN = call { ptr, i64 } @concat({ ptr, i64 } %loadN, { ptr, i64 } %callN) store { ptr, i64 } %callN, ptr %allocaN, align 8 %loadN = load { ptr, i64 }, ptr %allocaN, align 8 %str.ptr = extractvalue { ptr, i64 } %loadN, 0 @@ -275,3 +276,12 @@ declare ptr @sel_registerName(ptr) #0 declare ptr @objc_msgSend(ptr, ptr) #0 declare i64 @write(i32, ptr, i64) + +define internal void @__sx_objc_selector_init() { +entry: + %sel = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_) + store ptr %sel, ptr @OBJC_SELECTOR_REFERENCES_init, align 8 + %selN = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_.2) + store ptr %selN, ptr @OBJC_SELECTOR_REFERENCES_release, align 8 + ret void +}