emit_llvm: sret return for >16-byte aggregate foreign returns

Foreign functions that return a >16-byte non-HFA aggregate (e.g.
Big24 / UIEdgeInsets on iOS / clang-shaped struct returns) need the
indirect-return ABI: caller allocates space, passes its pointer as a
hidden first arg with `sret(<T>)`, callee writes through it and
returns void. AAPCS64 puts the pointer in x8; SysV AMD64 puts it in
the first int register and treats the named return as void.

The existing >16-byte branch in `abiCoerceParamType` was returning
`ptr` for BOTH params and returns. That works for byval params (the
established pattern — caller alloca + store + pass ptr, callee loads
in prologue), but is wrong for returns: it caused the function decl
to look like `ptr @fn(...)` rather than `void @fn(ptr sret(<T>), ...)`,
and the call site read whatever happened to be in x0 as a struct
pointer — segfault on dereference (caught while writing the ffi-03
baseline).

Fix layered into the same `abiCoerceParamType` / call-site code path:

  emitFunctionDecl:
    - Compute `uses_sret = needs_c_abi && needsByval(ret_ty, raw_ret_ty)`.
    - Ret type collapses to void.
    - Prepend a `ptr` param at slot 0.
    - Add `sret(<RetType>)` type attribute on param-index 1
      (LLVMAttributeIndex 1 = first parameter; 0 = return value).

  .call lowering:
    - Detect callee_uses_sret via the same predicate.
    - Allocate the result on the caller's stack (`sret.slot`).
    - Prepend it as args[0] (with sret_off index alignment so the
      original sx args land at args[1..]).
    - After LLVMBuildCall2, set the same `sret(<T>)` attribute on
      the call site's arg 1 (mirrors the fn-decl attribute — both
      land in the AArch64 backend's lowering pass).
    - Load the result from the slot to produce the IR value.

`call_indirect` (function-pointer dispatch — uikit.sx's typed
`objc_msgSend` casts) keeps its existing behavior for now; the iOS
path already round-trips UIEdgeInsets via that route. Folding the
same sret transform into call_indirect is a follow-up.

89/89 regression tests still pass. Chess Android + iOS-sim both
build clean.
This commit is contained in:
agra
2026-05-19 11:40:54 +03:00
parent edd8689fb2
commit 7fd6decdc9

View File

@@ -365,15 +365,27 @@ pub const LLVMEmitter = struct {
// main always returns i32 at the LLVM level (JIT expects it)
const raw_ret_ty = self.toLLVMType(func.ret);
const needs_c_abi = func.is_extern or func.call_conv == .c;
const ret_ty = if (is_main) self.cached_i32 else if (needs_c_abi) self.abiCoerceParamType(func.ret, raw_ret_ty) else raw_ret_ty;
// sret return: C-ABI functions returning a >16 B non-HFA struct
// use the indirect-return convention (caller allocates space,
// passes its pointer as a hidden first arg with `sret(<T>)`,
// function writes through and returns void). Distinct from
// small-struct register coercion (i64 / [2 x i64]) and HFA.
const uses_sret = needs_c_abi and !is_main and self.needsByval(func.ret, raw_ret_ty);
const ret_ty = if (is_main) self.cached_i32
else if (uses_sret) self.cached_void
else if (needs_c_abi) self.abiCoerceParamType(func.ret, raw_ret_ty)
else raw_ret_ty;
// Build parameter types — apply C ABI coercion for foreign/callconv(.c) functions
const param_count: c_uint = @intCast(func.params.len);
const param_types = self.alloc.alloc(c.LLVMTypeRef, func.params.len) catch unreachable;
// Build parameter types — apply C ABI coercion for foreign/callconv(.c) functions.
// When uses_sret, prepend the sret pointer at index 0.
const sret_offset: usize = if (uses_sret) 1 else 0;
const param_count: c_uint = @intCast(func.params.len + sret_offset);
const param_types = self.alloc.alloc(c.LLVMTypeRef, func.params.len + sret_offset) catch unreachable;
defer self.alloc.free(param_types);
if (uses_sret) param_types[0] = self.cached_ptr;
for (func.params, 0..) |param, j| {
const llvm_ty = self.toLLVMType(param.ty);
param_types[j] = if (needs_c_abi) self.abiCoerceParamType(param.ty, llvm_ty) else llvm_ty;
param_types[j + sret_offset] = if (needs_c_abi) self.abiCoerceParamType(param.ty, llvm_ty) else llvm_ty;
}
const fn_type = c.LLVMFunctionType(ret_ty, param_types.ptr, param_count, 0);
@@ -382,6 +394,15 @@ pub const LLVMEmitter = struct {
const llvm_func = c.LLVMAddFunction(self.llvm_module, name_z.ptr, fn_type);
// sret(<RetType>) attribute on the prepended pointer param.
// LLVMAttributeIndex 1 = first parameter (0 = return value).
if (uses_sret) {
const sret_kind = c.LLVMGetEnumAttributeKindForName("sret", 4);
const sret_attr = c.LLVMCreateTypeAttribute(self.context, sret_kind, raw_ret_ty);
const param1_idx: c.LLVMAttributeIndex = @bitCast(@as(i32, 1));
c.LLVMAddAttributeAtIndex(llvm_func, param1_idx, sret_attr);
}
// Set linkage
switch (func.linkage) {
.external => c.LLVMSetLinkage(llvm_func, c.LLVMExternalLinkage),
@@ -907,24 +928,40 @@ pub const LLVMEmitter = struct {
self.mapRef(c.LLVMGetUndef(self.toLLVMType(instruction.ty)));
return;
};
const arg_count: c_uint = @intCast(call_op.args.len);
const args = self.alloc.alloc(c.LLVMValueRef, call_op.args.len) catch unreachable;
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
const callee_raw_ret = self.toLLVMType(callee_func.ret);
const callee_uses_sret = callee_needs_c_abi and self.needsByval(callee_func.ret, callee_raw_ret);
// When the callee uses sret, prepend an alloca for the result.
// Index alignment: actual_args[0] = sret_slot; actual_args[i+1] = sx arg i.
const sret_off: usize = if (callee_uses_sret) 1 else 0;
const total_args = call_op.args.len + sret_off;
const args = self.alloc.alloc(c.LLVMValueRef, total_args) catch unreachable;
defer self.alloc.free(args);
for (call_op.args, 0..) |arg_ref, j| {
args[j] = self.resolveRef(arg_ref);
var sret_slot: c.LLVMValueRef = null;
if (callee_uses_sret) {
sret_slot = c.LLVMBuildAlloca(self.builder, callee_raw_ret, "sret.slot");
args[0] = sret_slot;
}
for (call_op.args, 0..) |arg_ref, j| {
args[j + sret_off] = self.resolveRef(arg_ref);
}
const arg_count: c_uint = @intCast(total_args);
// Get the function type from LLVM and coerce arguments
const fn_ty = c.LLVMGlobalGetValueType(callee);
const param_count = c.LLVMCountParamTypes(fn_ty);
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
if (param_count > 0) {
const param_types = self.alloc.alloc(c.LLVMTypeRef, param_count) catch unreachable;
defer self.alloc.free(param_types);
c.LLVMGetParamTypes(fn_ty, param_types.ptr);
for (0..@min(args.len, param_count)) |j| {
// The sret slot is already a properly-typed pointer; skip coercion.
if (callee_uses_sret and j == 0) continue;
const fn_param_idx = j - sret_off;
// Materialize byval args before coercion so we pass a ptr instead of the struct value.
if (callee_needs_c_abi and j < callee_func.params.len) {
const ir_ty = callee_func.params[j].ty;
if (callee_needs_c_abi and fn_param_idx < callee_func.params.len) {
const ir_ty = callee_func.params[fn_param_idx].ty;
const raw_struct = self.toLLVMType(ir_ty);
if (self.needsByval(ir_ty, raw_struct)) {
args[j] = self.materializeByvalArg(args[j], raw_struct);
@@ -934,9 +971,19 @@ pub const LLVMEmitter = struct {
args[j] = self.coerceArg(args[j], param_types[j]);
}
}
var result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, if (instruction.ty == .void) "" else "call");
// Coerce ABI return value (e.g. i64) back to IR struct type if needed
if (instruction.ty != .void and callee_func.is_extern) {
const call_label: [*:0]const u8 = if (instruction.ty == .void or callee_uses_sret) "" else "call";
var result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, call_label);
if (callee_uses_sret) {
// Mirror the function-decl `sret(<T>)` attribute on the call site so the
// LLVM backend lowers arg 0 via x8 (AAPCS64) / hidden ptr (SysV AMD64).
const sret_kind = c.LLVMGetEnumAttributeKindForName("sret", 4);
const sret_attr = c.LLVMCreateTypeAttribute(self.context, sret_kind, callee_raw_ret);
const param1_idx: c.LLVMAttributeIndex = @bitCast(@as(i32, 1));
c.LLVMAddCallSiteAttribute(result, param1_idx, sret_attr);
// Load the actual struct value the callee wrote into the slot.
result = c.LLVMBuildLoad2(self.builder, callee_raw_ret, sret_slot, "sret.load");
} else if (instruction.ty != .void and callee_func.is_extern) {
// Coerce ABI return value (e.g. i64 / [2 x i64]) back to IR struct type if needed
const expected_ty = self.toLLVMType(instruction.ty);
result = self.coerceArg(result, expected_ty);
}