abi: pass >16B aggregates by ptr-in-next-reg (Apple ARM64 ABI) + Path B for fn-ptr casts
Three stacked compiler bugs were causing iOS-sim chess to crash inside [MTLTexture replaceRegion:...]. Fixing them lets every replaceRegion call site succeed (1×1 RGBA8, 1MB R8 atlas, 440×440 chess pieces). Path B for callconv(.c) fn-pointer casts: - FunctionInfo now carries call_conv: CallConv (TypeInfo.CallConv) so function-type interning distinguishes sx-CC from C-CC. Inst.zig's Function.CallingConvention aliases the same enum. - Parser accepts an optional `callconv(.c)` suffix on fn-pointer type spellings (factored into parseOptionalCallConv() shared with parseFnDecl and parseLambda). - resolveFunctionType passes the parsed CC through functionTypeCC(). - .call_indirect reads fp.call_conv == .c and applies the C-ABI alloca+materialize for >16B aggregate args (Path A's behaviour at .call). Apple ARM64 ABI (drop LLVM byval): - Side-by-side asm diff vs clang's emission for the equivalent C call site showed LLVM's `byval` attribute lowers Apple-arm64 byval on the stack, while clang passes the struct via a pointer in the next int register (x2 for replaceRegion:). The runtime objc_msgSend dispatch path expects clang's convention. - Dropped the byval attribute from the function-signature emission and from both call sites (.call and .call_indirect). The materialize-into- alloca + pass-plain-ptr pattern stays — the call site now matches clang's `mov x2, sp` exactly. - Path A's sx-to-sx case continues to work since both ends use plain ptr (caller does alloca+store+pass, callee loads from the ptr in prologue). Protocol dispatch (emitProtocolDispatch): - Untargeted `null` lowers as const_null with type .void (per target_type orelse .void). The "wrap-value-in-alloca-pass-pointer" branch alloca'd a void slot, which LLVM's IRBuilder asserts on — EXC_BREAKPOINT in getTypeSizeInBits, manifesting as exit 133 / SIGTRAP when building the chess game. Fixed by re-emitting as constNull(void_ptr) when arg_ty == .void && expected_ty == void_ptr. - is_pointer_ty only recognized .pointer, so [*]T (many_pointer) was alloca-wrapped — the heap pixels pointer from stbi_load was stored into a stack slot and the slot's address was passed as the *void arg. Fixed by extending the check to `.pointer or .many_pointer`. metal.sx call sites + lifecycle guards: - msg_replace (replaceRegion:, MTLRegion = 48B) and the two setScissorRect: sites (MTLScissorRect = 32B) now spell their fn-pointer types with by-value params + callconv(.c) — the *MTLRegion/@local workaround is gone. - metal_begin_frame_ios bails before nextDrawable when pixel_w/h are 0 (drawableSize 0×0 makes nextDrawable abort via XPC). - metal_init_ios only sets drawableSize when dims are positive. - begin_frame's encoder/cmd_buffer failure paths now clear self.drawable so a partial failure doesn't leak a drawable back into the pool. Examples + tests: - examples/86-callconv-c-fnptr-large-aggregate.sx — new, covers Path B with C-CC fn-ptr cast. - examples/87-fnptr-cast-large-aggregate.sx — renamed from issue-0025.sx, covers Path B with default sx-CC (the negative case). - examples/85-cc-c-large-aggregate.sx — from Session 60, covers Path A. - examples/issue-0014.sx, issue-0024.sx, issue-0025.sx — removed (resolved earlier this work). 71 regression tests pass, 0 failed. Chess game builds clean for iOS sim and reaches its frame loop without aborting. Runtime: chess UI still doesn't render — remaining issue is in the UIKit lifecycle / CAMetalLayer setup (legacy-app vs scene-API hybrid), not a compiler bug. See current/CHECKPOINT.md "Next step" for the diagnosis + options.
This commit is contained in:
@@ -405,6 +405,13 @@ pub const LLVMEmitter = struct {
|
||||
c.LLVMAddAttributeAtIndex(llvm_func, func_idx_attr, nounwind_attr);
|
||||
}
|
||||
|
||||
// Apple ARM64 ABI for >16B non-HFA composites: pass by reference
|
||||
// via a pointer in the next int register (NOT via LLVM's `byval`
|
||||
// attribute, which lowers the struct on the stack — incompatible
|
||||
// with what `clang` emits and what foreign C callees expect).
|
||||
// abiCoerceParamType returned `ptr` for these slots, so the formal
|
||||
// param IS a plain pointer; the prologue loads the struct back.
|
||||
|
||||
self.func_map.put(func_idx, llvm_func) catch unreachable;
|
||||
}
|
||||
|
||||
@@ -436,6 +443,23 @@ pub const LLVMEmitter = struct {
|
||||
self.block_map.put(block_key, bb) catch unreachable;
|
||||
}
|
||||
|
||||
// byval params arrive as `ptr` in LLVM but the IR body expects struct values.
|
||||
// At entry, load each byval param into a struct SSA value and re-map its ref.
|
||||
const needs_c_abi = func.is_extern or func.call_conv == .c;
|
||||
if (needs_c_abi and func.blocks.items.len > 0) {
|
||||
const entry_key = makeBlockKey(func_idx, 0);
|
||||
const entry_bb = self.block_map.get(entry_key) orelse unreachable;
|
||||
c.LLVMPositionBuilderAtEnd(self.builder, entry_bb);
|
||||
for (func.params, 0..) |param, pi| {
|
||||
const raw_llvm_ty = self.toLLVMType(param.ty);
|
||||
if (self.needsByval(param.ty, raw_llvm_ty)) {
|
||||
const ptr_val = c.LLVMGetParam(llvm_func, @intCast(pi));
|
||||
const loaded = c.LLVMBuildLoad2(self.builder, raw_llvm_ty, ptr_val, "byval.load");
|
||||
self.ref_map.put(@intCast(pi), loaded) catch unreachable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear pending phis for this function
|
||||
self.pending_phis.clearRetainingCapacity();
|
||||
|
||||
@@ -883,11 +907,21 @@ pub const LLVMEmitter = struct {
|
||||
// Get the function type from LLVM and coerce arguments
|
||||
const fn_ty = c.LLVMGlobalGetValueType(callee);
|
||||
const param_count = c.LLVMCountParamTypes(fn_ty);
|
||||
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
|
||||
if (param_count > 0) {
|
||||
const param_types = self.alloc.alloc(c.LLVMTypeRef, param_count) catch unreachable;
|
||||
defer self.alloc.free(param_types);
|
||||
c.LLVMGetParamTypes(fn_ty, param_types.ptr);
|
||||
for (0..@min(args.len, param_count)) |j| {
|
||||
// Materialize byval args before coercion so we pass a ptr instead of the struct value.
|
||||
if (callee_needs_c_abi and j < callee_func.params.len) {
|
||||
const ir_ty = callee_func.params[j].ty;
|
||||
const raw_struct = self.toLLVMType(ir_ty);
|
||||
if (self.needsByval(ir_ty, raw_struct)) {
|
||||
args[j] = self.materializeByvalArg(args[j], raw_struct);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
args[j] = self.coerceArg(args[j], param_types[j]);
|
||||
}
|
||||
}
|
||||
@@ -922,6 +956,16 @@ pub const LLVMEmitter = struct {
|
||||
break :blk null;
|
||||
} else null;
|
||||
|
||||
// Read the fn-pointer type's calling convention. Only `.c` opts
|
||||
// into the C-ABI byval coercion for >16B aggregate params.
|
||||
const fp_is_c_abi: bool = if (callee_ir_ty) |cty| blk: {
|
||||
if (!cty.isBuiltin()) {
|
||||
const ci = self.ir_mod.types.get(cty);
|
||||
if (ci == .function and ci.function.call_conv == .c) break :blk true;
|
||||
}
|
||||
break :blk false;
|
||||
} else false;
|
||||
|
||||
const ret_ty = if (callee_ir_ty) |cty| blk: {
|
||||
if (!cty.isBuiltin()) {
|
||||
const ci = self.ir_mod.types.get(cty);
|
||||
@@ -939,7 +983,13 @@ pub const LLVMEmitter = struct {
|
||||
if (fn_params) |fp| {
|
||||
for (0..call_op.args.len) |j| {
|
||||
if (j < fp.len) {
|
||||
var llvm_pty = self.toLLVMType(fp[j]);
|
||||
const raw_struct = self.toLLVMType(fp[j]);
|
||||
if (fp_is_c_abi and self.needsByval(fp[j], raw_struct)) {
|
||||
args[j] = self.materializeByvalArg(args[j], raw_struct);
|
||||
param_tys[j] = self.cached_ptr;
|
||||
continue;
|
||||
}
|
||||
var llvm_pty = raw_struct;
|
||||
// Array params in fn-ptr calls decay to pointers (C ABI)
|
||||
if (c.LLVMGetTypeKind(llvm_pty) == c.LLVMArrayTypeKind) {
|
||||
llvm_pty = self.cached_ptr;
|
||||
@@ -2790,8 +2840,41 @@ pub const LLVMEmitter = struct {
|
||||
return c.LLVMArrayType2(self.cached_i64, 2);
|
||||
}
|
||||
|
||||
// Large struct (> 16 bytes) → leave as-is (should be indirect, but handle later)
|
||||
return llvm_ty;
|
||||
// Large composite (> 16 bytes) → pass by reference: ptr + byval(<T>) at
|
||||
// the call/sig sites. LLVM's AArch64/x86_64 backend lowers byval to
|
||||
// the right ABI sequence (caller copy + indirect arg).
|
||||
return self.cached_ptr;
|
||||
}
|
||||
|
||||
fn needsByval(self: *LLVMEmitter, ir_ty: TypeId, raw_llvm_ty: c.LLVMTypeRef) bool {
|
||||
if (self.target_config.isWasm32()) return false;
|
||||
if (ir_ty == .string) return false;
|
||||
if (!ir_ty.isBuiltin()) {
|
||||
const info = self.ir_mod.types.get(ir_ty);
|
||||
if (info == .slice) return false;
|
||||
}
|
||||
if (c.LLVMGetTypeKind(raw_llvm_ty) != c.LLVMStructTypeKind) return false;
|
||||
const n = c.LLVMCountStructElementTypes(raw_llvm_ty);
|
||||
if (n >= 1 and n <= 4) {
|
||||
var all_f = true;
|
||||
var all_d = true;
|
||||
var i: c_uint = 0;
|
||||
while (i < n) : (i += 1) {
|
||||
const ft = c.LLVMStructGetTypeAtIndex(raw_llvm_ty, i);
|
||||
const fk = c.LLVMGetTypeKind(ft);
|
||||
if (fk != c.LLVMFloatTypeKind) all_f = false;
|
||||
if (fk != c.LLVMDoubleTypeKind) all_d = false;
|
||||
}
|
||||
if (all_f or all_d) return false;
|
||||
}
|
||||
const size = c.LLVMABISizeOfType(c.LLVMGetModuleDataLayout(self.llvm_module), raw_llvm_ty);
|
||||
return size > 16;
|
||||
}
|
||||
|
||||
fn materializeByvalArg(self: *LLVMEmitter, val: c.LLVMValueRef, struct_ty: c.LLVMTypeRef) c.LLVMValueRef {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, struct_ty, "byval.tmp");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
// ── Cached composite types ──────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user