From 7fd6decdc9d258a32a90da3542b14213f46952d6 Mon Sep 17 00:00:00 2001 From: agra Date: Tue, 19 May 2026 11:40:54 +0300 Subject: [PATCH] emit_llvm: sret return for >16-byte aggregate foreign returns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foreign functions that return a >16-byte non-HFA aggregate (e.g. Big24 / UIEdgeInsets on iOS / clang-shaped struct returns) need the indirect-return ABI: caller allocates space, passes its pointer as a hidden first arg with `sret()`, callee writes through it and returns void. AAPCS64 puts the pointer in x8; SysV AMD64 puts it in the first int register and treats the named return as void. The existing >16-byte branch in `abiCoerceParamType` was returning `ptr` for BOTH params and returns. That works for byval params (the established pattern — caller alloca + store + pass ptr, callee loads in prologue), but is wrong for returns: it caused the function decl to look like `ptr @fn(...)` rather than `void @fn(ptr sret(), ...)`, and the call site read whatever happened to be in x0 as a struct pointer — segfault on dereference (caught while writing the ffi-03 baseline). Fix layered into the same `abiCoerceParamType` / call-site code path: emitFunctionDecl: - Compute `uses_sret = needs_c_abi && needsByval(ret_ty, raw_ret_ty)`. - Ret type collapses to void. - Prepend a `ptr` param at slot 0. - Add `sret()` type attribute on param-index 1 (LLVMAttributeIndex 1 = first parameter; 0 = return value). .call lowering: - Detect callee_uses_sret via the same predicate. - Allocate the result on the caller's stack (`sret.slot`). - Prepend it as args[0] (with sret_off index alignment so the original sx args land at args[1..]). - After LLVMBuildCall2, set the same `sret()` attribute on the call site's arg 1 (mirrors the fn-decl attribute — both land in the AArch64 backend's lowering pass). - Load the result from the slot to produce the IR value. `call_indirect` (function-pointer dispatch — uikit.sx's typed `objc_msgSend` casts) keeps its existing behavior for now; the iOS path already round-trips UIEdgeInsets via that route. Folding the same sret transform into call_indirect is a follow-up. 89/89 regression tests still pass. Chess Android + iOS-sim both build clean. --- src/ir/emit_llvm.zig | 77 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 774e3b7..a6805e0 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -365,15 +365,27 @@ pub const LLVMEmitter = struct { // main always returns i32 at the LLVM level (JIT expects it) const raw_ret_ty = self.toLLVMType(func.ret); const needs_c_abi = func.is_extern or func.call_conv == .c; - const ret_ty = if (is_main) self.cached_i32 else if (needs_c_abi) self.abiCoerceParamType(func.ret, raw_ret_ty) else raw_ret_ty; + // sret return: C-ABI functions returning a >16 B non-HFA struct + // use the indirect-return convention (caller allocates space, + // passes its pointer as a hidden first arg with `sret()`, + // function writes through and returns void). Distinct from + // small-struct register coercion (i64 / [2 x i64]) and HFA. + const uses_sret = needs_c_abi and !is_main and self.needsByval(func.ret, raw_ret_ty); + const ret_ty = if (is_main) self.cached_i32 + else if (uses_sret) self.cached_void + else if (needs_c_abi) self.abiCoerceParamType(func.ret, raw_ret_ty) + else raw_ret_ty; - // Build parameter types — apply C ABI coercion for foreign/callconv(.c) functions - const param_count: c_uint = @intCast(func.params.len); - const param_types = self.alloc.alloc(c.LLVMTypeRef, func.params.len) catch unreachable; + // Build parameter types — apply C ABI coercion for foreign/callconv(.c) functions. + // When uses_sret, prepend the sret pointer at index 0. + const sret_offset: usize = if (uses_sret) 1 else 0; + const param_count: c_uint = @intCast(func.params.len + sret_offset); + const param_types = self.alloc.alloc(c.LLVMTypeRef, func.params.len + sret_offset) catch unreachable; defer self.alloc.free(param_types); + if (uses_sret) param_types[0] = self.cached_ptr; for (func.params, 0..) |param, j| { const llvm_ty = self.toLLVMType(param.ty); - param_types[j] = if (needs_c_abi) self.abiCoerceParamType(param.ty, llvm_ty) else llvm_ty; + param_types[j + sret_offset] = if (needs_c_abi) self.abiCoerceParamType(param.ty, llvm_ty) else llvm_ty; } const fn_type = c.LLVMFunctionType(ret_ty, param_types.ptr, param_count, 0); @@ -382,6 +394,15 @@ pub const LLVMEmitter = struct { const llvm_func = c.LLVMAddFunction(self.llvm_module, name_z.ptr, fn_type); + // sret() attribute on the prepended pointer param. + // LLVMAttributeIndex 1 = first parameter (0 = return value). + if (uses_sret) { + const sret_kind = c.LLVMGetEnumAttributeKindForName("sret", 4); + const sret_attr = c.LLVMCreateTypeAttribute(self.context, sret_kind, raw_ret_ty); + const param1_idx: c.LLVMAttributeIndex = @bitCast(@as(i32, 1)); + c.LLVMAddAttributeAtIndex(llvm_func, param1_idx, sret_attr); + } + // Set linkage switch (func.linkage) { .external => c.LLVMSetLinkage(llvm_func, c.LLVMExternalLinkage), @@ -907,24 +928,40 @@ pub const LLVMEmitter = struct { self.mapRef(c.LLVMGetUndef(self.toLLVMType(instruction.ty))); return; }; - const arg_count: c_uint = @intCast(call_op.args.len); - const args = self.alloc.alloc(c.LLVMValueRef, call_op.args.len) catch unreachable; + const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c; + const callee_raw_ret = self.toLLVMType(callee_func.ret); + const callee_uses_sret = callee_needs_c_abi and self.needsByval(callee_func.ret, callee_raw_ret); + + // When the callee uses sret, prepend an alloca for the result. + // Index alignment: actual_args[0] = sret_slot; actual_args[i+1] = sx arg i. + const sret_off: usize = if (callee_uses_sret) 1 else 0; + const total_args = call_op.args.len + sret_off; + const args = self.alloc.alloc(c.LLVMValueRef, total_args) catch unreachable; defer self.alloc.free(args); - for (call_op.args, 0..) |arg_ref, j| { - args[j] = self.resolveRef(arg_ref); + var sret_slot: c.LLVMValueRef = null; + if (callee_uses_sret) { + sret_slot = c.LLVMBuildAlloca(self.builder, callee_raw_ret, "sret.slot"); + args[0] = sret_slot; } + for (call_op.args, 0..) |arg_ref, j| { + args[j + sret_off] = self.resolveRef(arg_ref); + } + const arg_count: c_uint = @intCast(total_args); + // Get the function type from LLVM and coerce arguments const fn_ty = c.LLVMGlobalGetValueType(callee); const param_count = c.LLVMCountParamTypes(fn_ty); - const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c; if (param_count > 0) { const param_types = self.alloc.alloc(c.LLVMTypeRef, param_count) catch unreachable; defer self.alloc.free(param_types); c.LLVMGetParamTypes(fn_ty, param_types.ptr); for (0..@min(args.len, param_count)) |j| { + // The sret slot is already a properly-typed pointer; skip coercion. + if (callee_uses_sret and j == 0) continue; + const fn_param_idx = j - sret_off; // Materialize byval args before coercion so we pass a ptr instead of the struct value. - if (callee_needs_c_abi and j < callee_func.params.len) { - const ir_ty = callee_func.params[j].ty; + if (callee_needs_c_abi and fn_param_idx < callee_func.params.len) { + const ir_ty = callee_func.params[fn_param_idx].ty; const raw_struct = self.toLLVMType(ir_ty); if (self.needsByval(ir_ty, raw_struct)) { args[j] = self.materializeByvalArg(args[j], raw_struct); @@ -934,9 +971,19 @@ pub const LLVMEmitter = struct { args[j] = self.coerceArg(args[j], param_types[j]); } } - var result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, if (instruction.ty == .void) "" else "call"); - // Coerce ABI return value (e.g. i64) back to IR struct type if needed - if (instruction.ty != .void and callee_func.is_extern) { + const call_label: [*:0]const u8 = if (instruction.ty == .void or callee_uses_sret) "" else "call"; + var result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, call_label); + if (callee_uses_sret) { + // Mirror the function-decl `sret()` attribute on the call site so the + // LLVM backend lowers arg 0 via x8 (AAPCS64) / hidden ptr (SysV AMD64). + const sret_kind = c.LLVMGetEnumAttributeKindForName("sret", 4); + const sret_attr = c.LLVMCreateTypeAttribute(self.context, sret_kind, callee_raw_ret); + const param1_idx: c.LLVMAttributeIndex = @bitCast(@as(i32, 1)); + c.LLVMAddCallSiteAttribute(result, param1_idx, sret_attr); + // Load the actual struct value the callee wrote into the slot. + result = c.LLVMBuildLoad2(self.builder, callee_raw_ret, sret_slot, "sret.load"); + } else if (instruction.ty != .void and callee_func.is_extern) { + // Coerce ABI return value (e.g. i64 / [2 x i64]) back to IR struct type if needed const expected_ty = self.toLLVMType(instruction.ty); result = self.coerceArg(result, expected_ty); }