abi: pass >16B aggregates by ptr-in-next-reg (Apple ARM64 ABI) + Path B for fn-ptr casts

Three stacked compiler bugs were causing iOS-sim chess to crash inside
[MTLTexture replaceRegion:...]. Fixing them lets every replaceRegion call
site succeed (1×1 RGBA8, 1MB R8 atlas, 440×440 chess pieces).

Path B for callconv(.c) fn-pointer casts:
- FunctionInfo now carries call_conv: CallConv (TypeInfo.CallConv) so
  function-type interning distinguishes sx-CC from C-CC. Inst.zig's
  Function.CallingConvention aliases the same enum.
- Parser accepts an optional `callconv(.c)` suffix on fn-pointer type
  spellings (factored into parseOptionalCallConv() shared with parseFnDecl
  and parseLambda).
- resolveFunctionType passes the parsed CC through functionTypeCC().
- .call_indirect reads fp.call_conv == .c and applies the C-ABI
  alloca+materialize for >16B aggregate args (Path A's behaviour at .call).

Apple ARM64 ABI (drop LLVM byval):
- Side-by-side asm diff vs clang's emission for the equivalent C call site
  showed LLVM's `byval` attribute lowers Apple-arm64 byval on the stack,
  while clang passes the struct via a pointer in the next int register
  (x2 for replaceRegion:). The runtime objc_msgSend dispatch path expects
  clang's convention.
- Dropped the byval attribute from the function-signature emission and
  from both call sites (.call and .call_indirect). The materialize-into-
  alloca + pass-plain-ptr pattern stays — the call site now matches
  clang's `mov x2, sp` exactly.
- Path A's sx-to-sx case continues to work since both ends use plain ptr
  (caller does alloca+store+pass, callee loads from the ptr in prologue).

Protocol dispatch (emitProtocolDispatch):
- Untargeted `null` lowers as const_null with type .void (per
  target_type orelse .void). The "wrap-value-in-alloca-pass-pointer"
  branch alloca'd a void slot, which LLVM's IRBuilder asserts on —
  EXC_BREAKPOINT in getTypeSizeInBits, manifesting as exit 133 / SIGTRAP
  when building the chess game. Fixed by re-emitting as
  constNull(void_ptr) when arg_ty == .void && expected_ty == void_ptr.
- is_pointer_ty only recognized .pointer, so [*]T (many_pointer) was
  alloca-wrapped — the heap pixels pointer from stbi_load was stored
  into a stack slot and the slot's address was passed as the *void arg.
  Fixed by extending the check to `.pointer or .many_pointer`.

metal.sx call sites + lifecycle guards:
- msg_replace (replaceRegion:, MTLRegion = 48B) and the two setScissorRect:
  sites (MTLScissorRect = 32B) now spell their fn-pointer types with
  by-value params + callconv(.c) — the *MTLRegion/@local workaround is
  gone.
- metal_begin_frame_ios bails before nextDrawable when pixel_w/h are 0
  (drawableSize 0×0 makes nextDrawable abort via XPC).
- metal_init_ios only sets drawableSize when dims are positive.
- begin_frame's encoder/cmd_buffer failure paths now clear self.drawable
  so a partial failure doesn't leak a drawable back into the pool.

Examples + tests:
- examples/86-callconv-c-fnptr-large-aggregate.sx — new, covers Path B
  with C-CC fn-ptr cast.
- examples/87-fnptr-cast-large-aggregate.sx — renamed from issue-0025.sx,
  covers Path B with default sx-CC (the negative case).
- examples/85-cc-c-large-aggregate.sx — from Session 60, covers Path A.
- examples/issue-0014.sx, issue-0024.sx, issue-0025.sx — removed
  (resolved earlier this work).

71 regression tests pass, 0 failed. Chess game builds clean for iOS sim
and reaches its frame loop without aborting. Runtime: chess UI still
doesn't render — remaining issue is in the UIKit lifecycle / CAMetalLayer
setup (legacy-app vs scene-API hybrid), not a compiler bug. See
current/CHECKPOINT.md "Next step" for the diagnosis + options.
This commit is contained in:
agra
2026-05-18 00:11:23 +03:00
parent a1647eab9b
commit 63565e41ff
20 changed files with 260 additions and 258 deletions

View File

@@ -457,6 +457,7 @@ pub const FunctionTypeExpr = struct {
param_types: []const *Node,
param_names: ?[]const ?[]const u8 = null, // optional documentation names
return_type: ?*Node, // null = void return
call_conv: CallingConvention = .default,
};
pub const ClosureTypeExpr = struct {

View File

@@ -405,6 +405,13 @@ pub const LLVMEmitter = struct {
c.LLVMAddAttributeAtIndex(llvm_func, func_idx_attr, nounwind_attr);
}
// Apple ARM64 ABI for >16B non-HFA composites: pass by reference
// via a pointer in the next int register (NOT via LLVM's `byval`
// attribute, which lowers the struct on the stack — incompatible
// with what `clang` emits and what foreign C callees expect).
// abiCoerceParamType returned `ptr` for these slots, so the formal
// param IS a plain pointer; the prologue loads the struct back.
self.func_map.put(func_idx, llvm_func) catch unreachable;
}
@@ -436,6 +443,23 @@ pub const LLVMEmitter = struct {
self.block_map.put(block_key, bb) catch unreachable;
}
// byval params arrive as `ptr` in LLVM but the IR body expects struct values.
// At entry, load each byval param into a struct SSA value and re-map its ref.
const needs_c_abi = func.is_extern or func.call_conv == .c;
if (needs_c_abi and func.blocks.items.len > 0) {
const entry_key = makeBlockKey(func_idx, 0);
const entry_bb = self.block_map.get(entry_key) orelse unreachable;
c.LLVMPositionBuilderAtEnd(self.builder, entry_bb);
for (func.params, 0..) |param, pi| {
const raw_llvm_ty = self.toLLVMType(param.ty);
if (self.needsByval(param.ty, raw_llvm_ty)) {
const ptr_val = c.LLVMGetParam(llvm_func, @intCast(pi));
const loaded = c.LLVMBuildLoad2(self.builder, raw_llvm_ty, ptr_val, "byval.load");
self.ref_map.put(@intCast(pi), loaded) catch unreachable;
}
}
}
// Clear pending phis for this function
self.pending_phis.clearRetainingCapacity();
@@ -883,11 +907,21 @@ pub const LLVMEmitter = struct {
// Get the function type from LLVM and coerce arguments
const fn_ty = c.LLVMGlobalGetValueType(callee);
const param_count = c.LLVMCountParamTypes(fn_ty);
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
if (param_count > 0) {
const param_types = self.alloc.alloc(c.LLVMTypeRef, param_count) catch unreachable;
defer self.alloc.free(param_types);
c.LLVMGetParamTypes(fn_ty, param_types.ptr);
for (0..@min(args.len, param_count)) |j| {
// Materialize byval args before coercion so we pass a ptr instead of the struct value.
if (callee_needs_c_abi and j < callee_func.params.len) {
const ir_ty = callee_func.params[j].ty;
const raw_struct = self.toLLVMType(ir_ty);
if (self.needsByval(ir_ty, raw_struct)) {
args[j] = self.materializeByvalArg(args[j], raw_struct);
continue;
}
}
args[j] = self.coerceArg(args[j], param_types[j]);
}
}
@@ -922,6 +956,16 @@ pub const LLVMEmitter = struct {
break :blk null;
} else null;
// Read the fn-pointer type's calling convention. Only `.c` opts
// into the C-ABI byval coercion for >16B aggregate params.
const fp_is_c_abi: bool = if (callee_ir_ty) |cty| blk: {
if (!cty.isBuiltin()) {
const ci = self.ir_mod.types.get(cty);
if (ci == .function and ci.function.call_conv == .c) break :blk true;
}
break :blk false;
} else false;
const ret_ty = if (callee_ir_ty) |cty| blk: {
if (!cty.isBuiltin()) {
const ci = self.ir_mod.types.get(cty);
@@ -939,7 +983,13 @@ pub const LLVMEmitter = struct {
if (fn_params) |fp| {
for (0..call_op.args.len) |j| {
if (j < fp.len) {
var llvm_pty = self.toLLVMType(fp[j]);
const raw_struct = self.toLLVMType(fp[j]);
if (fp_is_c_abi and self.needsByval(fp[j], raw_struct)) {
args[j] = self.materializeByvalArg(args[j], raw_struct);
param_tys[j] = self.cached_ptr;
continue;
}
var llvm_pty = raw_struct;
// Array params in fn-ptr calls decay to pointers (C ABI)
if (c.LLVMGetTypeKind(llvm_pty) == c.LLVMArrayTypeKind) {
llvm_pty = self.cached_ptr;
@@ -2790,8 +2840,41 @@ pub const LLVMEmitter = struct {
return c.LLVMArrayType2(self.cached_i64, 2);
}
// Large struct (> 16 bytes) → leave as-is (should be indirect, but handle later)
return llvm_ty;
// Large composite (> 16 bytes) → pass by reference: ptr + byval(<T>) at
// the call/sig sites. LLVM's AArch64/x86_64 backend lowers byval to
// the right ABI sequence (caller copy + indirect arg).
return self.cached_ptr;
}
fn needsByval(self: *LLVMEmitter, ir_ty: TypeId, raw_llvm_ty: c.LLVMTypeRef) bool {
if (self.target_config.isWasm32()) return false;
if (ir_ty == .string) return false;
if (!ir_ty.isBuiltin()) {
const info = self.ir_mod.types.get(ir_ty);
if (info == .slice) return false;
}
if (c.LLVMGetTypeKind(raw_llvm_ty) != c.LLVMStructTypeKind) return false;
const n = c.LLVMCountStructElementTypes(raw_llvm_ty);
if (n >= 1 and n <= 4) {
var all_f = true;
var all_d = true;
var i: c_uint = 0;
while (i < n) : (i += 1) {
const ft = c.LLVMStructGetTypeAtIndex(raw_llvm_ty, i);
const fk = c.LLVMGetTypeKind(ft);
if (fk != c.LLVMFloatTypeKind) all_f = false;
if (fk != c.LLVMDoubleTypeKind) all_d = false;
}
if (all_f or all_d) return false;
}
const size = c.LLVMABISizeOfType(c.LLVMGetModuleDataLayout(self.llvm_module), raw_llvm_ty);
return size > 16;
}
fn materializeByvalArg(self: *LLVMEmitter, val: c.LLVMValueRef, struct_ty: c.LLVMTypeRef) c.LLVMValueRef {
const tmp = c.LLVMBuildAlloca(self.builder, struct_ty, "byval.tmp");
_ = c.LLVMBuildStore(self.builder, val, tmp);
return tmp;
}
// ── Cached composite types ──────────────────────────────────────

View File

@@ -424,10 +424,7 @@ pub const Function = struct {
private,
};
pub const CallingConvention = enum {
default,
c,
};
pub const CallingConvention = types.TypeInfo.CallConv;
pub fn init(name: StringId, params: []const Param, ret: TypeId) Function {
return .{

View File

@@ -7913,8 +7913,22 @@ pub const Lowering = struct {
for (args, 0..) |a, i| {
const expected_ty = if (i < mi.param_types.len) mi.param_types[i] else void_ptr;
const arg_ty = self.builder.getRefType(a);
// If protocol method expects *void but we have a value (struct or primitive), convert to pointer
const is_pointer_ty = if (!arg_ty.isBuiltin()) self.module.types.get(arg_ty) == .pointer else false;
// Untargeted `null` lowers as const_null with type .void. Re-emit it
// as a null of the expected pointer type instead of alloca'ing void.
if (arg_ty == .void and expected_ty == void_ptr) {
call_args.append(self.alloc, self.builder.constNull(void_ptr)) catch unreachable;
continue;
}
// A protocol method that expects `*void` accepts any single-pointer
// value directly (`*T`, `[*]T`). Only wrap non-pointer values in an
// alloca-slot — wrapping a pointer would pass the stack slot's
// address instead of the actual pointer, and the callee would read
// 8 bytes of pointer plus garbage from beyond the stack.
const is_pointer_ty = if (!arg_ty.isBuiltin()) blk: {
const info = self.module.types.get(arg_ty);
break :blk info == .pointer or info == .many_pointer;
} else false;
if (expected_ty == void_ptr and arg_ty != void_ptr and !is_pointer_ty) {
const slot = self.builder.alloca(arg_ty);
self.builder.store(slot, a);

View File

@@ -265,7 +265,8 @@ fn resolveFunctionType(ft: *const ast.FunctionTypeExpr, table: *TypeTable) TypeI
param_ids.append(alloc, resolveAstType(pt, table)) catch unreachable;
}
const ret_id = if (ft.return_type) |rt| resolveAstType(rt, table) else TypeId.void;
return table.functionType(param_ids.items, ret_id);
const cc: ir_types.TypeInfo.CallConv = if (ft.call_conv == .c) .c else .default;
return table.functionTypeCC(param_ids.items, ret_id, cc);
}
fn resolveClosureType(ct: *const ast.ClosureTypeExpr, table: *TypeTable) TypeId {

View File

@@ -129,8 +129,11 @@ pub const TypeInfo = union(enum) {
pub const FunctionInfo = struct {
params: []const TypeId,
ret: TypeId,
call_conv: CallConv = .default,
};
pub const CallConv = enum { default, c };
pub const ClosureInfo = struct {
params: []const TypeId,
ret: TypeId,
@@ -337,8 +340,12 @@ pub const TypeTable = struct {
}
pub fn functionType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
return self.functionTypeCC(params, ret, .default);
}
pub fn functionTypeCC(self: *TypeTable, params: []const TypeId, ret: TypeId, cc: TypeInfo.CallConv) TypeId {
const owned_params = self.alloc.dupe(TypeId, params) catch unreachable;
return self.intern(.{ .function = .{ .params = owned_params, .ret = ret } });
return self.intern(.{ .function = .{ .params = owned_params, .ret = ret, .call_conv = cc } });
}
pub fn closureType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
@@ -653,6 +660,8 @@ fn hashTypeInfo(h: *std.hash.Wyhash, info: TypeInfo) void {
.function => |f| {
for (f.params) |p| h.update(std.mem.asBytes(&p));
h.update(std.mem.asBytes(&f.ret));
const cc_byte: u8 = @intFromEnum(f.call_conv);
h.update(&.{cc_byte});
},
.closure => |c| {
for (c.params) |p| h.update(std.mem.asBytes(&p));
@@ -692,6 +701,7 @@ fn typeInfoEql(a: TypeInfo, b: TypeInfo) bool {
for (f.params, g.params) |fp, gp| {
if (fp != gp) return false;
}
if (f.call_conv != g.call_conv) return false;
return f.ret == g.ret;
},
.closure => |c| {

View File

@@ -471,10 +471,12 @@ pub const Parser = struct {
// '->' present: function type
self.advance(); // skip '->'
const return_type = try self.parseTypeExpr();
const call_conv = try self.parseOptionalCallConv();
return try self.createNode(start, .{ .function_type_expr = .{
.param_types = try param_types.toOwnedSlice(self.allocator),
.param_names = if (has_names) try param_names.toOwnedSlice(self.allocator) else null,
.return_type = return_type,
.call_conv = call_conv,
} });
}
// No '->': tuple type (even for single element)
@@ -1236,22 +1238,7 @@ pub const Parser = struct {
}
// Optional calling convention: callconv(.c)
var call_conv: ast.CallingConvention = .default;
if (self.current.tag == .kw_callconv) {
self.advance();
try self.expect(.l_paren);
try self.expect(.dot);
if (self.current.tag != .identifier)
return self.fail("expected calling convention name after '.'");
const cc_name = self.tokenSlice(self.current);
if (std.mem.eql(u8, cc_name, "c")) {
call_conv = .c;
} else {
return self.fail("unknown calling convention");
}
self.advance();
try self.expect(.r_paren);
}
const call_conv = try self.parseOptionalCallConv();
// Body: block `{ ... }`, arrow `=> expr;`, #builtin, #compiler, or #foreign marker
var is_arrow = false;
@@ -2370,22 +2357,7 @@ pub const Parser = struct {
}
// Optional calling convention: callconv(.c)
var call_conv: ast.CallingConvention = .default;
if (self.current.tag == .kw_callconv) {
self.advance();
try self.expect(.l_paren);
try self.expect(.dot);
if (self.current.tag != .identifier)
return self.fail("expected calling convention name after '.'");
const cc_name = self.tokenSlice(self.current);
if (std.mem.eql(u8, cc_name, "c")) {
call_conv = .c;
} else {
return self.fail("unknown calling convention");
}
self.advance();
try self.expect(.r_paren);
}
const call_conv = try self.parseOptionalCallConv();
// Two body forms:
// (params) => expr — expression lambda
@@ -2423,6 +2395,20 @@ pub const Parser = struct {
return tag == .l_brace or tag == .arrow or tag == .hash_builtin or tag == .hash_compiler or tag == .hash_foreign or tag == .fat_arrow or tag == .kw_callconv;
}
fn parseOptionalCallConv(self: *Parser) anyerror!ast.CallingConvention {
if (self.current.tag != .kw_callconv) return .default;
self.advance();
try self.expect(.l_paren);
try self.expect(.dot);
if (self.current.tag != .identifier)
return self.fail("expected calling convention name after '.'");
const cc_name = self.tokenSlice(self.current);
const cc: ast.CallingConvention = if (std.mem.eql(u8, cc_name, "c")) .c else return self.fail("unknown calling convention");
self.advance();
try self.expect(.r_paren);
return cc;
}
fn isAssignOp(self: *const Parser) bool {
return switch (self.current.tag) {
.equal, .plus_equal, .minus_equal, .star_equal, .slash_equal, .percent_equal,