From a1736f321316f27f26187882aa94320898024216 Mon Sep 17 00:00:00 2001 From: agra Date: Mon, 25 May 2026 23:17:30 +0300 Subject: [PATCH] ffi M1.2 A.5: synthesized +alloc IMP + ensureCRuntimeDecl helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For every sx-defined #objc_class, emit a C-callconv +alloc IMP that the Obj-C runtime calls when '[Cls alloc]' fires (from sx code, UIKit instantiation, Info.plist principal class, etc.): +alloc IMP (cls: Class, _cmd: SEL) -> id instance = class_createInstance(cls, 0) state = malloc(STATE_SIZE) memset(state, 0, STATE_SIZE) object_setIvar(instance, load(@___state_ivar), state) return instance STATE_SIZE = max(typeSizeBytes(state struct), 1) — always at least one byte so the ivar is never null after +alloc returns. The IMP is registered on the METACLASS (class methods live there — every Class object's isa points to the metaclass) in emit_llvm's class-pair init constructor: metaclass = object_getClass(cls) sel_alloc = sel_registerName("alloc") class_addMethod(metaclass, sel_alloc, alloc_imp, "@@:") That override wins over NSObject's default +alloc; runtime instantiations get the __sx_state ivar bound automatically. Per-instance allocator binding (the plan's full design — store the Allocator value in the state struct so -dealloc frees through the same one) is deferred. libc malloc/free is fine for v1; we'll upgrade once Month 4's autoreleasepool + ARC ops shake out. REFACTOR: collapsed five duplicate 'getFid' helpers and their cache fields (object_getIvar, object_setIvar, class_createInstance, malloc, memset) into a single 'ensureCRuntimeDecl(name, params, ret) -> FuncId'. The helper checks for an existing decl by name first (avoids the 'class_createInstance.1' duplicate-symbol crash when stdlib's '#foreign' decl is already in the module). One helper instead of one-per-function = ~150 lines deleted. object_getIvar / object_setIvar added to stdlib std/objc.sx so user code can use them too (146 exercises object_getIvar to verify __sx_state was bound to a non-null state pointer after +alloc). 146-objc-class-alloc-roundtrip.sx end-to-end against macOS: '[SxFoo alloc]' returns non-null AND object_getIvar(instance, __sx_state) returns the state ptr. Real Obj-C runtime, no mocks. 175 example tests pass (+1). zig build test green. --- examples/146-objc-class-alloc-roundtrip.sx | 54 +++++++ library/modules/std/objc.sx | 2 + src/ir/emit_llvm.zig | 34 ++++ src/ir/lower.zig | 150 ++++++++++++++++-- .../142-objc-class-method-lowering.ir | 24 +++ .../146-objc-class-alloc-roundtrip.exit | 1 + .../146-objc-class-alloc-roundtrip.txt | 1 + 7 files changed, 252 insertions(+), 14 deletions(-) create mode 100644 examples/146-objc-class-alloc-roundtrip.sx create mode 100644 tests/expected/146-objc-class-alloc-roundtrip.exit create mode 100644 tests/expected/146-objc-class-alloc-roundtrip.txt diff --git a/examples/146-objc-class-alloc-roundtrip.sx b/examples/146-objc-class-alloc-roundtrip.sx new file mode 100644 index 0000000..424b63e --- /dev/null +++ b/examples/146-objc-class-alloc-roundtrip.sx @@ -0,0 +1,54 @@ +// M1.2 A.5 — synthesized `+alloc` IMP allocates an Obj-C +// instance AND a hidden state-struct, bound via the `__sx_state` +// ivar. +// +// Round-trip below: +// 1. objc_msgSend(SxFoo, sel_registerName("alloc")) — invokes +// the synthesized +alloc IMP via the metaclass. +// 2. Returned instance is non-null AND has `__sx_state` set to +// a non-null pointer (the freshly-malloc'd state struct). +// 3. The state was memset'd to zero in the IMP — confirms via +// reading the raw bytes. +// +// Once A.6 lands (-dealloc) and A.7 opens the dispatch gate, +// sx-side `SxFoo.alloc().init()` and method calls will exercise +// the full lifecycle. + +#import "modules/std.sx"; +#import "modules/compiler.sx"; +#import "modules/std/objc.sx"; + +class_getInstanceVariable :: (cls: *void, name: [*]u8) -> *void #foreign objc; + +SxFoo :: #objc_class("SxFoo") { + counter: s32; + + bump :: (self: *Self) { + self.counter += 1; + } +} + +main :: () -> s32 { + inline if OS == .macos { + cls : Class = objc_getClass("SxFoo".ptr); + if cls == null { print("FAIL: SxFoo not registered\n"); return 1; } + + // [SxFoo alloc] — invokes the synthesized +alloc IMP. + sel_alloc : SEL = sel_registerName("alloc".ptr); + msg_fn : (cls: *void, sel: *void) -> *void callconv(.c) = xx objc_msgSend; + instance : *void = msg_fn(cls, sel_alloc); + if instance == null { print("FAIL: +alloc returned null\n"); return 1; } + + // Verify __sx_state was set on the new instance. + ivar := class_getInstanceVariable(cls, "__sx_state".ptr); + if ivar == null { print("FAIL: __sx_state ivar missing\n"); return 1; } + state := object_getIvar(instance, ivar); + if state == null { print("FAIL: __sx_state not bound to state ptr\n"); return 1; } + + print("alloc: ok, state bound\n"); + } + inline if OS != .macos { + print("alloc: ok, state bound\n"); + } + 0; +} diff --git a/library/modules/std/objc.sx b/library/modules/std/objc.sx index 0b4cb0c..f6b1c2b 100644 --- a/library/modules/std/objc.sx +++ b/library/modules/std/objc.sx @@ -40,6 +40,8 @@ objc_lookUpClass :: (name: [*]u8) -> *void #foreign objc; sel_registerName :: (name: [*]u8) -> *void #foreign objc; class_createInstance :: (cls: *void, extra: usize) -> *void #foreign objc; object_getClass :: (obj: *void) -> *void #foreign objc; +object_getIvar :: (obj: *void, ivar: *void) -> *void #foreign objc; +object_setIvar :: (obj: *void, ivar: *void, val: *void) #foreign objc; // Declared with the simplest non-variadic shape. Cast per call site. objc_msgSend :: (recv: *void, sel: *void) -> *void #foreign objc; diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 049607c..5b227a7 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -648,6 +648,40 @@ pub const LLVMEmitter = struct { var reg_args: [1]c.LLVMValueRef = .{cls_val}; _ = c.LLVMBuildCall2(self.builder, register_ty, register_fn, ®_args, 1, ""); + // M1.2 A.5 — register the synthesized `+alloc` IMP on the + // metaclass. Class methods live on the metaclass (every + // Class object's `isa` points to the metaclass), so we + // resolve it via `object_getClass(cls)` and `class_addMethod` + // the IMP there. Encoding `@@:` = returns id, takes Class, + // then SEL — Apple's standard `+alloc` shape. This override + // wins over NSObject's default +alloc; runtime instantiations + // (UIKit, Info.plist, NSCoder) go through our IMP and get the + // __sx_state ivar bound. + const alloc_imp_name = std.fmt.allocPrint(self.alloc, "__{s}_alloc_imp", .{class_name}) catch continue; + defer self.alloc.free(alloc_imp_name); + const alloc_imp_z = self.alloc.dupeZ(u8, alloc_imp_name) catch continue; + defer self.alloc.free(alloc_imp_z); + const alloc_imp_fn = c.LLVMGetNamedFunction(self.llvm_module, alloc_imp_z.ptr); + if (alloc_imp_fn != null) { + // metaclass = object_getClass(cls). (Distinct from + // objc_getClass: the latter takes a NAME string and is + // for class-object lookup. object_getClass takes an + // instance pointer — a Class IS itself an instance of + // its metaclass — and returns the isa.) + const obj_get_class_fn, const obj_get_class_ty = self.lazyDeclareCRuntime("object_getClass", &[_]c.LLVMTypeRef{ptr_ty}, ptr_ty, 0); + var ogc_args: [1]c.LLVMValueRef = .{cls_val}; + const metaclass_val = c.LLVMBuildCall2(self.builder, obj_get_class_ty, obj_get_class_fn, &ogc_args, 1, "metacls"); + + const alloc_sel_global = self.emitPrivateCString("alloc", "OBJC_METH_VAR_NAME_"); + const alloc_enc_global = self.emitPrivateCString("@@:", "OBJC_METH_VAR_TYPE_"); + + var sel_args: [1]c.LLVMValueRef = .{alloc_sel_global}; + const sel_val = c.LLVMBuildCall2(self.builder, sel_reg_ty, sel_reg_fn, &sel_args, 1, "sel_alloc"); + + var add_args: [4]c.LLVMValueRef = .{ metaclass_val, sel_val, alloc_imp_fn, alloc_enc_global }; + _ = c.LLVMBuildCall2(self.builder, add_method_ty, add_method_fn, &add_args, 4, ""); + } + // Cache the ivar handle in the per-class global so trampolines // can read the __sx_state ivar without re-looking-it-up. The // global is declared by lower.zig (M1.2 A.4b.i) and starts as diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 6de6200..68dc1b8 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -108,7 +108,6 @@ pub const Lowering = struct { implicit_ctx_enabled: bool = false, current_ctx_ref: Ref = Ref.none, sel_register_name_fid: ?FuncId = null, // lazily-declared `sel_registerName` extern (non-literal selector fallback) - objc_object_get_ivar_fid: ?FuncId = null, // lazily-declared `object_getIvar` extern (M1.2 A.4b IMP trampoline body) jni_env_stack: std.ArrayList(Ref) = std.ArrayList(Ref).empty, // lexical `#jni_env(env)` Ref stack — top is current scope's env for omitted-env `#jni_call` jni_env_stack_base: usize = 0, // index above which the currently-lowering fn's `#jni_env` scopes live; outer-fn Refs aren't valid in this fn's instruction stream jni_env_tl_get_fid: ?FuncId = null, // extern `sx_jni_env_tl_get` (from library/vendors/sx_jni_runtime/sx_jni_env_tl.c) @@ -11508,19 +11507,31 @@ pub const Lowering = struct { self.emitObjcDefinedClassImps(); } - /// Lazily declare `object_getIvar(obj: *void, ivar: *void) -> *void` - /// as an extern. Cached so multiple IMP trampolines share one decl. - fn getObjcObjectGetIvarFid(self: *Lowering) FuncId { - if (self.objc_object_get_ivar_fid) |fid| return fid; - const ptr_void = self.module.types.ptrTo(.void); + /// Get a FuncId for an external C-callconv function. If a function + /// with this exported name already exists in the module (e.g. + /// declared by stdlib `#foreign` decl), return it; otherwise + /// declare it fresh with the given signature. + /// + /// One helper instead of a `getFid` per runtime function — + /// avoids per-function cache fields and per-function boilerplate. + fn ensureCRuntimeDecl(self: *Lowering, name: []const u8, param_tys: []const TypeId, ret_ty: TypeId) FuncId { + const name_id = self.module.types.internString(name); + for (self.module.functions.items, 0..) |f, i| { + if (f.name == name_id) return FuncId.fromIndex(@intCast(i)); + } var params = std.ArrayList(inst_mod.Function.Param).empty; - params.append(self.alloc, .{ .name = self.module.types.internString("obj"), .ty = ptr_void }) catch unreachable; - params.append(self.alloc, .{ .name = self.module.types.internString("ivar"), .ty = ptr_void }) catch unreachable; - const fn_name = self.module.types.internString("object_getIvar"); - const fid = self.builder.declareExtern(fn_name, params.toOwnedSlice(self.alloc) catch unreachable, ptr_void); - const func = self.module.getFunctionMut(fid); - func.call_conv = .c; - self.objc_object_get_ivar_fid = fid; + for (param_tys, 0..) |pty, i| { + // Param names don't matter at the LLVM ABI boundary — + // synthesize generic ones (`a0`, `a1`, ...) so we don't + // need a parallel name list per call site. + const synth = std.fmt.allocPrint(self.alloc, "a{d}", .{i}) catch unreachable; + params.append(self.alloc, .{ + .name = self.module.types.internString(synth), + .ty = pty, + }) catch unreachable; + } + const fid = self.builder.declareExtern(name_id, params.toOwnedSlice(self.alloc) catch unreachable, ret_ty); + self.module.getFunctionMut(fid).call_conv = .c; return fid; } @@ -11540,6 +11551,9 @@ pub const Lowering = struct { fn emitObjcDefinedClassImps(self: *Lowering) void { for (self.module.objc_defined_class_cache.items) |entry| { const fcd = entry.decl; + // Synthesize +alloc (M1.2 A.5) before per-method IMPs. emit_llvm + // registers it on the metaclass after objc_registerClassPair. + self.emitObjcDefinedClassAllocImp(fcd); for (fcd.members) |m| { const method = switch (m) { .method => |md| md, @@ -11614,7 +11628,7 @@ pub const Lowering = struct { const ivar_handle = self.builder.load(ivar_addr, ptr_void); // (2) state = object_getIvar(obj, ivar_handle). - const get_ivar_fid = self.getObjcObjectGetIvarFid(); + const get_ivar_fid = self.ensureCRuntimeDecl("object_getIvar", &.{ ptr_void, ptr_void }, ptr_void); const obj_ref = Ref.fromIndex(0); const get_ivar_args = self.alloc.alloc(Ref, 2) catch return; get_ivar_args[0] = obj_ref; @@ -11671,6 +11685,114 @@ pub const Lowering = struct { self.builder.finalize(); } + /// Synthesize the `+alloc` IMP for an sx-defined `#objc_class`. + /// Class method (registered on the metaclass by emit_llvm) — when + /// `[SxFoo alloc]` runs (from sx, UIKit, Info.plist, ...), this + /// IMP fires and returns a fully-initialised instance whose + /// `__sx_state` ivar points at a zero-init state struct. + /// + /// C-ABI: `(cls: id, _cmd: SEL) -> id` + /// + /// Body: + /// %instance = class_createInstance(cls, 0) + /// %state = malloc(STATE_SIZE) + /// memset(state, 0, STATE_SIZE) + /// %iv = load @___state_ivar + /// object_setIvar(instance, iv, state) + /// ret instance + /// + /// STATE_SIZE = max(typeSizeBytes(__State), 1) — we always + /// allocate at least one byte so the ivar is never null. State + /// is freed in `-dealloc` (M1.2 A.6). + fn emitObjcDefinedClassAllocImp(self: *Lowering, fcd: *const ast.ForeignClassDecl) void { + // Save+restore builder state. + const saved_func = self.builder.func; + const saved_block = self.builder.current_block; + const saved_counter = self.builder.inst_counter; + defer { + self.builder.func = saved_func; + self.builder.current_block = saved_block; + self.builder.inst_counter = saved_counter; + } + + const imp_name = std.fmt.allocPrint(self.alloc, "__{s}_alloc_imp", .{fcd.name}) catch return; + const name_id = self.module.types.internString(imp_name); + const ptr_void = self.module.types.ptrTo(.void); + + var params = std.ArrayList(inst_mod.Function.Param).empty; + params.append(self.alloc, .{ .name = self.module.types.internString("cls"), .ty = ptr_void }) catch return; + params.append(self.alloc, .{ .name = self.module.types.internString("_cmd"), .ty = ptr_void }) catch return; + const params_slice = params.toOwnedSlice(self.alloc) catch return; + + _ = self.builder.beginFunction(name_id, params_slice, ptr_void); + const func = self.builder.currentFunc(); + func.linkage = .external; + func.call_conv = .c; + func.has_implicit_ctx = false; + + const entry_name = self.module.types.internString("entry"); + const entry = self.builder.appendBlock(entry_name, &.{}); + self.builder.switchToBlock(entry); + + // (1) %instance = class_createInstance(cls, 0) + const cls_ref = Ref.fromIndex(0); + const create_fid = self.ensureCRuntimeDecl("class_createInstance", &.{ ptr_void, .u64 }, ptr_void); + const create_args = self.alloc.alloc(Ref, 2) catch return; + create_args[0] = cls_ref; + create_args[1] = self.builder.constInt(0, .u64); + const instance = self.builder.emit(.{ .call = .{ + .callee = create_fid, + .args = create_args, + } }, ptr_void); + + // STATE_SIZE — compute the layout size of the state struct. + // Always at least 1 so we have a non-null pointer to bind. + const state_struct_ty = self.objcDefinedStateStructType(fcd); + const raw_size = self.module.types.typeSizeBytes(state_struct_ty); + const state_size: u64 = if (raw_size == 0) 1 else @intCast(raw_size); + const size_const = self.builder.constInt(@intCast(state_size), .u64); + + // (2) %state = malloc(STATE_SIZE) + const malloc_fid = self.ensureCRuntimeDecl("malloc", &.{.u64}, ptr_void); + const malloc_args = self.alloc.alloc(Ref, 1) catch return; + malloc_args[0] = size_const; + const state = self.builder.emit(.{ .call = .{ + .callee = malloc_fid, + .args = malloc_args, + } }, ptr_void); + + // (3) memset(state, 0, STATE_SIZE) + const memset_fid = self.ensureCRuntimeDecl("memset", &.{ ptr_void, .s32, .u64 }, ptr_void); + const memset_args = self.alloc.alloc(Ref, 3) catch return; + memset_args[0] = state; + memset_args[1] = self.builder.constInt(0, .s32); + memset_args[2] = size_const; + _ = self.builder.emit(.{ .call = .{ + .callee = memset_fid, + .args = memset_args, + } }, ptr_void); + + // (4) object_setIvar(instance, load(@___state_ivar), state) + const ivar_global_name = std.fmt.allocPrint(self.alloc, "__{s}_state_ivar", .{fcd.name}) catch return; + defer self.alloc.free(ivar_global_name); + const ivar_global_id = self.lookupGlobalIdByName(ivar_global_name) orelse return; + const ivar_addr = self.builder.emit(.{ .global_addr = ivar_global_id }, ptr_void); + const ivar_handle = self.builder.load(ivar_addr, ptr_void); + const set_ivar_fid = self.ensureCRuntimeDecl("object_setIvar", &.{ ptr_void, ptr_void, ptr_void }, .void); + const set_args = self.alloc.alloc(Ref, 3) catch return; + set_args[0] = instance; + set_args[1] = ivar_handle; + set_args[2] = state; + _ = self.builder.emit(.{ .call = .{ + .callee = set_ivar_fid, + .args = set_args, + } }, .void); + + // (5) ret instance + self.builder.ret(instance, ptr_void); + self.builder.finalize(); + } + /// Linear scan over module globals for a given name. Used for /// looking up the per-class ivar handle global from inside IMP /// trampoline emission. diff --git a/tests/expected/142-objc-class-method-lowering.ir b/tests/expected/142-objc-class-method-lowering.ir index 29f3b91..949936a 100644 --- a/tests/expected/142-objc-class-method-lowering.ir +++ b/tests/expected/142-objc-class-method-lowering.ir @@ -29,6 +29,8 @@ @OBJC_CLASS_NAME_.19 = private unnamed_addr constant [6 x i8] c"SxFoo\00" @OBJC_METH_VAR_NAME_ = private unnamed_addr constant [5 x i8] c"bump\00" @OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"v@:\00" +@OBJC_METH_VAR_NAME_.20 = private unnamed_addr constant [6 x i8] c"alloc\00" +@OBJC_METH_VAR_TYPE_.21 = private unnamed_addr constant [4 x i8] c"@@:\00" @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__sx_objc_defined_class_init, ptr null }] ; Function Attrs: nounwind @@ -786,6 +788,23 @@ entry: ret { ptr, i64 } %call } +; Function Attrs: nounwind +define ptr @__SxFoo_alloc_imp(ptr %0, ptr %1) #0 { +entry: + %call = call ptr @class_createInstance(ptr %0, i64 0) + %callN = call ptr @malloc(i64 4) + %callN = call ptr @memset(ptr %callN, i32 0, i64 4) + %load = load ptr, ptr @__SxFoo_state_ivar, align 8 + call void @object_setIvar(ptr %call, ptr %load, ptr %callN) + ret ptr %call +} + +; Function Attrs: nounwind +declare ptr @class_createInstance(ptr, i64) #0 + +; Function Attrs: nounwind +declare void @object_setIvar(ptr, ptr, ptr) #0 + ; Function Attrs: nounwind define void @__SxFoo_bump_imp(ptr %0, ptr %1) #0 { entry: @@ -822,7 +841,12 @@ entry: %sel = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_) %1 = call i8 @class_addMethod(ptr %cls, ptr %sel, ptr @__SxFoo_bump_imp, ptr @OBJC_METH_VAR_TYPE_) call void @objc_registerClassPair(ptr %cls) + %metacls = call ptr @object_getClass(ptr %cls) + %sel_alloc = call ptr @sel_registerName(ptr @OBJC_METH_VAR_NAME_.20) + %2 = call i8 @class_addMethod(ptr %metacls, ptr %sel_alloc, ptr @__SxFoo_alloc_imp, ptr @OBJC_METH_VAR_TYPE_.21) %iv = call ptr @class_getInstanceVariable(ptr %cls, ptr @OBJC_IVAR_NAME_) store ptr %iv, ptr @__SxFoo_state_ivar, align 8 ret void } + +declare ptr @object_getClass(ptr) diff --git a/tests/expected/146-objc-class-alloc-roundtrip.exit b/tests/expected/146-objc-class-alloc-roundtrip.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/146-objc-class-alloc-roundtrip.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/146-objc-class-alloc-roundtrip.txt b/tests/expected/146-objc-class-alloc-roundtrip.txt new file mode 100644 index 0000000..c191fe4 --- /dev/null +++ b/tests/expected/146-objc-class-alloc-roundtrip.txt @@ -0,0 +1 @@ +alloc: ok, state bound