diff --git a/examples/85-cc-c-large-aggregate.sx b/examples/85-cc-c-large-aggregate.sx new file mode 100644 index 0000000..32ac10f --- /dev/null +++ b/examples/85-cc-c-large-aggregate.sx @@ -0,0 +1,31 @@ +// Regression test for issue-0025 path A. +// +// sx functions declared with `callconv(.c)` that take a composite > 16 bytes +// by value must marshal the arg through `ptr byval()` per AAPCS64 / SysV +// AArch64: the caller copies the struct to an alloca, passes the alloca +// pointer with a `byval()` attribute, and the callee's entry block loads +// the struct back from the pointer. +// +// Before the fix, abiCoerceParamType returned the raw LLVM struct type for +// >16-byte composites (TODO at src/ir/emit_llvm.zig:2793), so the C ABI +// promise was silently violated whenever sx-emitted C-callable code +// interoperated with a real C caller. + +#import "modules/std.sx"; + +Wide :: struct { + a: s64; + b: s64; + c: s64; + d: s64; +} + +accept_c :: (w: Wide) -> s64 callconv(.c) { + w.a + w.b + w.c + w.d; +} + +main :: () -> s32 { + w := Wide.{ a = 1, b = 10, c = 100, d = 1000 }; + if accept_c(w) != 1111 { return 1; } + 0; +} diff --git a/examples/86-callconv-c-fnptr-large-aggregate.sx b/examples/86-callconv-c-fnptr-large-aggregate.sx new file mode 100644 index 0000000..bb74473 --- /dev/null +++ b/examples/86-callconv-c-fnptr-large-aggregate.sx @@ -0,0 +1,37 @@ +// Regression test for issue-0025 path B. +// +// When a fn-pointer's type is spelled with `callconv(.c)`, the indirect +// call must apply the same C-ABI byval coercion that direct C-ABI calls +// do at the call site (path A): >16-byte non-HFA aggregates are passed +// as `ptr byval()`. Without the fix, the indirect call site builds +// an LLVM function type whose param slot is the raw struct, which the +// AArch64/x86_64 backend tries to lay out across registers + stack in +// ways that don't match the byval-attributed callee signature — the +// callee then reads garbage out of the wrong machine-state slots. +// +// The opt-in is the `callconv(.c)` on the fn-pointer type spelling. +// Pure-sx fn-pointer casts (no callconv suffix) keep their default +// calling convention — verified by examples/87-fnptr-cast-large-aggregate.sx. + +#import "modules/std.sx"; + +Wide :: struct { + a: s64; + b: s64; + c: s64; + d: s64; +} + +accept_c :: (w: Wide) -> s64 callconv(.c) { + w.a + w.b + w.c + w.d; +} + +main :: () -> s32 { + w := Wide.{ a = 1, b = 10, c = 100, d = 1000 }; + if accept_c(w) != 1111 { return 1; } + + fn_ptr : (Wide) -> s64 callconv(.c) = xx accept_c; + if fn_ptr(w) != 1111 { return 2; } + + 0; +} diff --git a/examples/87-fnptr-cast-large-aggregate.sx b/examples/87-fnptr-cast-large-aggregate.sx new file mode 100644 index 0000000..fe5967f --- /dev/null +++ b/examples/87-fnptr-cast-large-aggregate.sx @@ -0,0 +1,29 @@ +// Pure-sx fn-pointer cast: a function-pointer typed without `callconv(.c)` +// keeps the default (sx) calling convention. Passing a >16-byte aggregate +// through that pointer must not get the C-ABI byval coercion — the sx-CC +// callee expects the struct as an SSA value, not as a `ptr byval()`. +// +// Pair with examples/86-callconv-c-fnptr-large-aggregate.sx, which covers +// the opposite arm (fn-pointer typed `callconv(.c)` does get byval). + +#import "modules/std.sx"; + +Wide :: struct { + a: s64; b: s64; c: s64; d: s64; +} + +accept :: (w: Wide) -> s64 { + w.a + w.b + w.c + w.d; +} + +main :: () -> s32 { + w := Wide.{ a = 1, b = 10, c = 100, d = 1000 }; + direct := accept(w); + if direct != 1111 { return 1; } + + fn_ptr : (Wide) -> s64 = xx accept; + indirect := fn_ptr(w); + if indirect != 1111 { return 2; } + + 0; +} diff --git a/examples/issue-0014.sx b/examples/issue-0014.sx deleted file mode 100644 index 29b7208..0000000 --- a/examples/issue-0014.sx +++ /dev/null @@ -1,15 +0,0 @@ -// issue-0014: Feature request — {{{ CONTENT_HASH }}} template variable for wasm shell -// -// When targeting wasm, the compiler processes shell.html and substitutes -// {{{ SCRIPT }}} with the -// -// -// This lets browsers cache until the next build, then bust automatically. -// No changes needed to build.sx or modules/compiler.sx — just the compiler -// recognizing the new placeholder during shell template substitution. diff --git a/examples/issue-0024.sx b/examples/issue-0024.sx deleted file mode 100644 index da7f3c1..0000000 --- a/examples/issue-0024.sx +++ /dev/null @@ -1,90 +0,0 @@ -// issue-0024: NSLog/foreign-side-effect calls placed as the FIRST statement -// of an `if X { ... } else { ... }` branch body do not produce visible -// output, even when the branch is provably taken (the SECOND statement in -// the same body — also a foreign call — does produce output). -// -// ── Observed iOS-side symptom (session 59 bisect) ───────────────────────── -// -// In library/modules/gpu/metal.sx's `metal_create_texture_ios`: -// -// slot : TextureSlot = .{ tex = tex, bytes_per_pixel = bytes_per_pixel }; -// self.textures.append(slot); -// NSLog(ns_string("[metal] T6 appended\n".ptr)); // ← fires -// -// pixels_null := pixels == null; -// if pixels_null { -// NSLog(ns_string("[metal] T6b null\n".ptr)); // ← never fires -// } else { -// NSLog(ns_string("[metal] T6a non-null\n".ptr)); // ← never fires -// handle : u32 = xx self.textures.len; -// metal_update_texture_region_ios(self, handle, 0, 0, w, h, pixels); -// // ← DOES fire -// // (its first -// // NSLog at -// // fn entry -// // appears in -// // the unified -// // log) -// NSLog(ns_string("[metal] T7 done\n".ptr)); // ← (helper crashed -// // before this) -// } -// -// T6 appears in the iOS unified log. T6a/T6b never appear. The else -// branch's helper call DOES fire (its own first-statement NSLog inside -// the helper appears). So the else-branch IS entered; just its first -// NSLog statement produces no output. -// -// ── Pure-sx repro below does NOT trigger ─────────────────────────────────── -// -// Running `sx run examples/issue-0024.sx` exits 0 (counter == 4 — all -// bumps fired). The bug only manifests with foreign calls (NSLog / ns_string), -// and possibly only when the process subsequently crashes (replaceRegion -// in the metal.sx case) — which raises the alternative hypothesis that -// the missing NSLog output is just iOS unified-logging buffer-loss on -// process death, not a sx compiler bug. The runtime sequence between T6 -// and the crash was ~500μs; logs within ~1ms of an unhandled exception -// can be lost to OSLog's internal buffering on Apple Silicon iOS-sim. -// -// ── Investigation plan ───────────────────────────────────────────────────── -// -// Two paths to disambiguate: -// 1. Replace NSLog markers with `write(STDERR_FILENO, ...)` calls -// (synchronous, no OSLog involvement). If markers still don't appear: -// sx compiler bug — likely in src/ir/lower.zig:2166-2196 (the -// `is_value` branch of `lowerIfExpr` and downstream `lowerBlockValue` -// around 922-948). Possible: side-effecting leading statements -// dropped when branches are treated as values. -// 2. If markers DO appear with synchronous write: the iOS-side symptom -// is unified-logging buffer-loss, not a compiler bug. Close this issue -// as "wontfix — diagnostic limitation" and move the iOS debugging to -// foreign-write tracing. -// -// ── Real-world impact ────────────────────────────────────────────────────── -// -// Bisecting issue-0026 (replaceRegion crash) is currently blocked: without -// trustworthy markers inside if/else branches we can't tell which arg -// arrives wrong. Resolution unblocks step 3b of the Metal port. - -#import "modules/std.sx"; - -counter : s64 = 0; - -bump :: () { counter = counter + 1; } - -probe :: (skip: bool) { - bump(); - if skip { - bump(); - bump(); - } else { - bump(); - bump(); - } - bump(); -} - -main :: () -> s32 { - probe(false); - // counter == 4 (entry + 2 in false branch + exit) → exit 0 - if counter == 4 then 0 else 1; -} diff --git a/examples/issue-0025.sx b/examples/issue-0025.sx deleted file mode 100644 index 6df5a4e..0000000 --- a/examples/issue-0025.sx +++ /dev/null @@ -1,94 +0,0 @@ -// issue-0025: Composite types larger than 16 bytes are passed without the -// LLVM `byval()` attribute, and the `call_indirect` (fn-pointer cast) -// path doesn't apply C-ABI parameter coercion at all. Both gaps cause -// silent shape-mismatch when sx code calls foreign C functions that take -// large aggregates by value, OR when sx code calls a sx fn through a -// fn-pointer typed with a large-aggregate parameter. -// -// ── Two failing forms ───────────────────────────────────────────────────── -// -// (A) Direct call to a sx function with a >16B param: -// -// Wide :: struct { a: s64; b: s64; c: s64; d: s64; } // 32 bytes -// accept :: (w: Wide) -> s64 { w.a + w.b + w.c + w.d; } -// accept(Wide.{ a = 1, b = 10, c = 100, d = 1000 }) // expect 1111 -// -// src/ir/emit_llvm.zig:2747-2795 (`abiCoerceParamType`): -// - <=8 bytes → coerced to i64 -// - 9-16 bytes → coerced to [2 x i64] -// - >16 bytes → returns llvm_ty unchanged with TODO at line 2793 -// -// The TODO is the bug: large composites should be coerced to `ptr` -// with a `byval(struct.T)` LLVM attribute. LLVM's mid-end then -// materializes the right machine code per target. Today the struct -// is left as-is, which LLVM tries to pass across registers + stack -// slots in ways that don't match the C ABI promise. -// -// (B) Indirect call via fn-pointer cast (the `xx objc_msgSend` idiom): -// -// fn_ptr : (Wide) -> s64 = xx accept; -// fn_ptr(Wide.{ ... }) -// -// src/ir/emit_llvm.zig:902-967 (`.call_indirect`): both the -// FunctionInfo-known arm (939-952) and the LLVMTypeOf-fallback arm -// (953-956) construct `param_tys[j]` WITHOUT routing through -// `abiCoerceParamType`. So even if (A) is fixed, fn-pointer-cast call -// sites still mis-marshal large composites. -// -// ── Real-world impact ────────────────────────────────────────────────────── -// -// Every `xx objc_msgSend` call site in library/modules/platform/uikit.sx -// + library/modules/gpu/metal.sx. Works in practice today only because: -// - We never pass aggregates >16 bytes by value through fn-pointer casts -// (workaround: declare param as `*T` + pass `@local`; arm64 AAPCS's -// indirect-by-ref happens to match this machine-state-wise). -// - HFAs (CGSize 2×f64, MTLClearColor 4×f64, CGRect 4×f64 as return) -// are correctly classified at emit_llvm.zig:2766-2779. -// -// ── Workarounds in use ───────────────────────────────────────────────────── -// -// library/modules/gpu/metal.sx declares MTLRegion (48B) + MTLScissorRect -// (32B) call sites with `*MTLRegion` / `*MTLScissorRect` and passes -// `@region` / `@rect`. Should not be needed once this issue is fixed. -// -// ── Fix sketch ───────────────────────────────────────────────────────────── -// -// (A) emit_llvm.zig:2793 — return `ptr` and emit `byval(struct.T)` on -// the param via `LLVMAddCallSiteAttribute` / `LLVMCreateTypeAttribute`. -// At call sites, alloca + memcpy + pass the alloca pointer. Apply -// identically at function-definition emission so direct calls roundtrip. -// -// (B) emit_llvm.zig:902-967 — factor out a helper -// `coerceCallParams(param_count, src_args, dst_fn_param_tys) -// -> (coerced_args, coerced_tys)` that wraps `abiCoerceParamType`. -// Use the helper from both arms. -// -// Edge cases to preserve: -// - Variadic foreign functions (printf family) — variadic tail per -// AAPCS64 still passes composites in their natural form. Keep -// existing behavior for variadic args. -// - HFAs already handled at 2766-2779 — don't touch. -// - Structs <=8 bytes coerced to `i64`, 9-16 bytes to `[2 x i64]` — -// don't touch. - -#import "modules/std.sx"; - -Wide :: struct { - a: s64; b: s64; c: s64; d: s64; -} - -accept :: (w: Wide) -> s64 { - w.a + w.b + w.c + w.d; -} - -main :: () -> s32 { - w := Wide.{ a = 1, b = 10, c = 100, d = 1000 }; - direct := accept(w); // exercises path (A) - if direct != 1111 { return 1; } - - fn_ptr : (Wide) -> s64 = xx accept; - indirect := fn_ptr(w); // exercises path (B) - if indirect != 1111 { return 2; } - - 0; -} diff --git a/library/modules/gpu/metal.sx b/library/modules/gpu/metal.sx index 68e8cec..cfe8b6d 100644 --- a/library/modules/gpu/metal.sx +++ b/library/modules/gpu/metal.sx @@ -54,11 +54,10 @@ MTLClearColor :: struct { } // MTLOrigin / MTLSize / MTLRegion / MTLScissorRect — integer aggregates. -// MTLRegion is 48 bytes and MTLScissorRect is 32 bytes; arm64 Apple ABI -// passes >16-byte composites by reference (address in the next register). -// We declare the call shapes with `*MTLRegion` etc., construct a local on -// the stack, and pass `@local` — the machine state matches what the Obj-C -// method expects. +// MTLRegion is 48 bytes and MTLScissorRect is 32 bytes; both are passed +// by value to the Obj-C runtime, which the compiler marshals as +// `ptr byval()` via the C-ABI byval coercion. The fn-pointer cast +// must spell `callconv(.c)` so the indirect call applies that coercion. MTLOrigin :: struct { x: u64; y: u64; z: u64; } MTLSize :: struct { width: u64; height: u64; depth: u64; } MTLRegion :: struct { origin: MTLOrigin; size: MTLSize; } @@ -243,8 +242,14 @@ metal_init_ios :: (self: *MetalGPU) -> bool { msg_ou(self.layer, sel_registerName("setPixelFormat:".ptr), MTL_PIXEL_FORMAT_BGRA8_UNORM); msg_ob(self.layer, sel_registerName("setFramebufferOnly:".ptr), 1); - size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h }; - msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size); + // setDrawableSize:(0,0) makes nextDrawable abort via XPC. Skip the + // size set when dims are not yet known — the layer's drawableSize + // defaults to its bounds×contentsScale until we override it, which + // also lets the first frame render at the natural backing size. + if self.pixel_w > 0 and self.pixel_h > 0 { + size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h }; + msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size); + } } true; @@ -263,6 +268,7 @@ metal_begin_frame_ios :: (self: *MetalGPU, clear: ClearColor) -> bool { inline if OS != .ios { return false; } if self.layer == null { return false; } if self.queue == null { return false; } + if self.pixel_w <= 0 or self.pixel_h <= 0 { return false; } msg_o : (*void, *void) -> *void = xx objc_msgSend; msg_oo : (*void, *void, *void) -> void = xx objc_msgSend; @@ -300,12 +306,12 @@ metal_begin_frame_ios :: (self: *MetalGPU, clear: ClearColor) -> bool { // cmd = [queue commandBuffer] (autoreleased) self.cmd_buffer = msg_o(self.queue, sel_registerName("commandBuffer".ptr)); - if self.cmd_buffer == null { return false; } + if self.cmd_buffer == null { self.drawable = null; return false; } // encoder = [cmd renderCommandEncoderWithDescriptor:pass] (autoreleased) self.encoder = msg_oo_ret(self.cmd_buffer, sel_registerName("renderCommandEncoderWithDescriptor:".ptr), pass); - if self.encoder == null { return false; } + if self.encoder == null { self.cmd_buffer = null; self.drawable = null; return false; } true; } @@ -520,10 +526,10 @@ metal_update_texture_region_ios :: (self: *MetalGPU, handle: u32, x: s32, y: s32 bytes_per_row : u64 = xx (slot.bytes_per_pixel * cast(u32) w); // [tex replaceRegion:region mipmapLevel:0 withBytes:pixels bytesPerRow:bytes_per_row] - msg_replace : (*void, *void, *MTLRegion, u64, *void, u64) -> void = xx objc_msgSend; + msg_replace : (*void, *void, MTLRegion, u64, *void, u64) -> void callconv(.c) = xx objc_msgSend; msg_replace(slot.tex, sel_registerName("replaceRegion:mipmapLevel:withBytes:bytesPerRow:".ptr), - @region, 0, pixels, bytes_per_row); + region, 0, pixels, bytes_per_row); } // ── Per-draw state ─────────────────────────────────────────────────────── @@ -575,9 +581,9 @@ metal_set_scissor_ios :: (self: *MetalGPU, x: s32, y: s32, w: s32, h: s32) { inline if OS != .ios { return; } if self.encoder == null { return; } rect : MTLScissorRect = .{ x = xx x, y = xx y, width = xx w, height = xx h }; - // [encoder setScissorRect:rect] (MTLScissorRect is 32 bytes → indirect) - msg : (*void, *void, *MTLScissorRect) -> void = xx objc_msgSend; - msg(self.encoder, sel_registerName("setScissorRect:".ptr), @rect); + // [encoder setScissorRect:rect] (MTLScissorRect is 32 bytes → ptr byval) + msg : (*void, *void, MTLScissorRect) -> void callconv(.c) = xx objc_msgSend; + msg(self.encoder, sel_registerName("setScissorRect:".ptr), rect); } metal_disable_scissor_ios :: (self: *MetalGPU) { @@ -586,8 +592,8 @@ metal_disable_scissor_ios :: (self: *MetalGPU) { // Metal has no "disable scissor" — set the rect to cover the full // drawable so subsequent draws aren't clipped. rect : MTLScissorRect = .{ x = 0, y = 0, width = xx self.pixel_w, height = xx self.pixel_h }; - msg : (*void, *void, *MTLScissorRect) -> void = xx objc_msgSend; - msg(self.encoder, sel_registerName("setScissorRect:".ptr), @rect); + msg : (*void, *void, MTLScissorRect) -> void callconv(.c) = xx objc_msgSend; + msg(self.encoder, sel_registerName("setScissorRect:".ptr), rect); } metal_draw_triangles_ios :: (self: *MetalGPU, vertex_offset: s32, vertex_count: s32) { diff --git a/src/ast.zig b/src/ast.zig index 596a61a..0b36308 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -457,6 +457,7 @@ pub const FunctionTypeExpr = struct { param_types: []const *Node, param_names: ?[]const ?[]const u8 = null, // optional documentation names return_type: ?*Node, // null = void return + call_conv: CallingConvention = .default, }; pub const ClosureTypeExpr = struct { diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 535e1a1..132b448 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -405,6 +405,13 @@ pub const LLVMEmitter = struct { c.LLVMAddAttributeAtIndex(llvm_func, func_idx_attr, nounwind_attr); } + // Apple ARM64 ABI for >16B non-HFA composites: pass by reference + // via a pointer in the next int register (NOT via LLVM's `byval` + // attribute, which lowers the struct on the stack — incompatible + // with what `clang` emits and what foreign C callees expect). + // abiCoerceParamType returned `ptr` for these slots, so the formal + // param IS a plain pointer; the prologue loads the struct back. + self.func_map.put(func_idx, llvm_func) catch unreachable; } @@ -436,6 +443,23 @@ pub const LLVMEmitter = struct { self.block_map.put(block_key, bb) catch unreachable; } + // byval params arrive as `ptr` in LLVM but the IR body expects struct values. + // At entry, load each byval param into a struct SSA value and re-map its ref. + const needs_c_abi = func.is_extern or func.call_conv == .c; + if (needs_c_abi and func.blocks.items.len > 0) { + const entry_key = makeBlockKey(func_idx, 0); + const entry_bb = self.block_map.get(entry_key) orelse unreachable; + c.LLVMPositionBuilderAtEnd(self.builder, entry_bb); + for (func.params, 0..) |param, pi| { + const raw_llvm_ty = self.toLLVMType(param.ty); + if (self.needsByval(param.ty, raw_llvm_ty)) { + const ptr_val = c.LLVMGetParam(llvm_func, @intCast(pi)); + const loaded = c.LLVMBuildLoad2(self.builder, raw_llvm_ty, ptr_val, "byval.load"); + self.ref_map.put(@intCast(pi), loaded) catch unreachable; + } + } + } + // Clear pending phis for this function self.pending_phis.clearRetainingCapacity(); @@ -883,11 +907,21 @@ pub const LLVMEmitter = struct { // Get the function type from LLVM and coerce arguments const fn_ty = c.LLVMGlobalGetValueType(callee); const param_count = c.LLVMCountParamTypes(fn_ty); + const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c; if (param_count > 0) { const param_types = self.alloc.alloc(c.LLVMTypeRef, param_count) catch unreachable; defer self.alloc.free(param_types); c.LLVMGetParamTypes(fn_ty, param_types.ptr); for (0..@min(args.len, param_count)) |j| { + // Materialize byval args before coercion so we pass a ptr instead of the struct value. + if (callee_needs_c_abi and j < callee_func.params.len) { + const ir_ty = callee_func.params[j].ty; + const raw_struct = self.toLLVMType(ir_ty); + if (self.needsByval(ir_ty, raw_struct)) { + args[j] = self.materializeByvalArg(args[j], raw_struct); + continue; + } + } args[j] = self.coerceArg(args[j], param_types[j]); } } @@ -922,6 +956,16 @@ pub const LLVMEmitter = struct { break :blk null; } else null; + // Read the fn-pointer type's calling convention. Only `.c` opts + // into the C-ABI byval coercion for >16B aggregate params. + const fp_is_c_abi: bool = if (callee_ir_ty) |cty| blk: { + if (!cty.isBuiltin()) { + const ci = self.ir_mod.types.get(cty); + if (ci == .function and ci.function.call_conv == .c) break :blk true; + } + break :blk false; + } else false; + const ret_ty = if (callee_ir_ty) |cty| blk: { if (!cty.isBuiltin()) { const ci = self.ir_mod.types.get(cty); @@ -939,7 +983,13 @@ pub const LLVMEmitter = struct { if (fn_params) |fp| { for (0..call_op.args.len) |j| { if (j < fp.len) { - var llvm_pty = self.toLLVMType(fp[j]); + const raw_struct = self.toLLVMType(fp[j]); + if (fp_is_c_abi and self.needsByval(fp[j], raw_struct)) { + args[j] = self.materializeByvalArg(args[j], raw_struct); + param_tys[j] = self.cached_ptr; + continue; + } + var llvm_pty = raw_struct; // Array params in fn-ptr calls decay to pointers (C ABI) if (c.LLVMGetTypeKind(llvm_pty) == c.LLVMArrayTypeKind) { llvm_pty = self.cached_ptr; @@ -2790,8 +2840,41 @@ pub const LLVMEmitter = struct { return c.LLVMArrayType2(self.cached_i64, 2); } - // Large struct (> 16 bytes) → leave as-is (should be indirect, but handle later) - return llvm_ty; + // Large composite (> 16 bytes) → pass by reference: ptr + byval() at + // the call/sig sites. LLVM's AArch64/x86_64 backend lowers byval to + // the right ABI sequence (caller copy + indirect arg). + return self.cached_ptr; + } + + fn needsByval(self: *LLVMEmitter, ir_ty: TypeId, raw_llvm_ty: c.LLVMTypeRef) bool { + if (self.target_config.isWasm32()) return false; + if (ir_ty == .string) return false; + if (!ir_ty.isBuiltin()) { + const info = self.ir_mod.types.get(ir_ty); + if (info == .slice) return false; + } + if (c.LLVMGetTypeKind(raw_llvm_ty) != c.LLVMStructTypeKind) return false; + const n = c.LLVMCountStructElementTypes(raw_llvm_ty); + if (n >= 1 and n <= 4) { + var all_f = true; + var all_d = true; + var i: c_uint = 0; + while (i < n) : (i += 1) { + const ft = c.LLVMStructGetTypeAtIndex(raw_llvm_ty, i); + const fk = c.LLVMGetTypeKind(ft); + if (fk != c.LLVMFloatTypeKind) all_f = false; + if (fk != c.LLVMDoubleTypeKind) all_d = false; + } + if (all_f or all_d) return false; + } + const size = c.LLVMABISizeOfType(c.LLVMGetModuleDataLayout(self.llvm_module), raw_llvm_ty); + return size > 16; + } + + fn materializeByvalArg(self: *LLVMEmitter, val: c.LLVMValueRef, struct_ty: c.LLVMTypeRef) c.LLVMValueRef { + const tmp = c.LLVMBuildAlloca(self.builder, struct_ty, "byval.tmp"); + _ = c.LLVMBuildStore(self.builder, val, tmp); + return tmp; } // ── Cached composite types ────────────────────────────────────── diff --git a/src/ir/inst.zig b/src/ir/inst.zig index 6881295..c1f1419 100644 --- a/src/ir/inst.zig +++ b/src/ir/inst.zig @@ -424,10 +424,7 @@ pub const Function = struct { private, }; - pub const CallingConvention = enum { - default, - c, - }; + pub const CallingConvention = types.TypeInfo.CallConv; pub fn init(name: StringId, params: []const Param, ret: TypeId) Function { return .{ diff --git a/src/ir/lower.zig b/src/ir/lower.zig index cf1f097..4055907 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -7913,8 +7913,22 @@ pub const Lowering = struct { for (args, 0..) |a, i| { const expected_ty = if (i < mi.param_types.len) mi.param_types[i] else void_ptr; const arg_ty = self.builder.getRefType(a); - // If protocol method expects *void but we have a value (struct or primitive), convert to pointer - const is_pointer_ty = if (!arg_ty.isBuiltin()) self.module.types.get(arg_ty) == .pointer else false; + + // Untargeted `null` lowers as const_null with type .void. Re-emit it + // as a null of the expected pointer type instead of alloca'ing void. + if (arg_ty == .void and expected_ty == void_ptr) { + call_args.append(self.alloc, self.builder.constNull(void_ptr)) catch unreachable; + continue; + } + // A protocol method that expects `*void` accepts any single-pointer + // value directly (`*T`, `[*]T`). Only wrap non-pointer values in an + // alloca-slot — wrapping a pointer would pass the stack slot's + // address instead of the actual pointer, and the callee would read + // 8 bytes of pointer plus garbage from beyond the stack. + const is_pointer_ty = if (!arg_ty.isBuiltin()) blk: { + const info = self.module.types.get(arg_ty); + break :blk info == .pointer or info == .many_pointer; + } else false; if (expected_ty == void_ptr and arg_ty != void_ptr and !is_pointer_ty) { const slot = self.builder.alloca(arg_ty); self.builder.store(slot, a); diff --git a/src/ir/type_bridge.zig b/src/ir/type_bridge.zig index 5d6db8f..9b1fb2b 100644 --- a/src/ir/type_bridge.zig +++ b/src/ir/type_bridge.zig @@ -265,7 +265,8 @@ fn resolveFunctionType(ft: *const ast.FunctionTypeExpr, table: *TypeTable) TypeI param_ids.append(alloc, resolveAstType(pt, table)) catch unreachable; } const ret_id = if (ft.return_type) |rt| resolveAstType(rt, table) else TypeId.void; - return table.functionType(param_ids.items, ret_id); + const cc: ir_types.TypeInfo.CallConv = if (ft.call_conv == .c) .c else .default; + return table.functionTypeCC(param_ids.items, ret_id, cc); } fn resolveClosureType(ct: *const ast.ClosureTypeExpr, table: *TypeTable) TypeId { diff --git a/src/ir/types.zig b/src/ir/types.zig index 8b64005..d8a56c9 100644 --- a/src/ir/types.zig +++ b/src/ir/types.zig @@ -129,8 +129,11 @@ pub const TypeInfo = union(enum) { pub const FunctionInfo = struct { params: []const TypeId, ret: TypeId, + call_conv: CallConv = .default, }; + pub const CallConv = enum { default, c }; + pub const ClosureInfo = struct { params: []const TypeId, ret: TypeId, @@ -337,8 +340,12 @@ pub const TypeTable = struct { } pub fn functionType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId { + return self.functionTypeCC(params, ret, .default); + } + + pub fn functionTypeCC(self: *TypeTable, params: []const TypeId, ret: TypeId, cc: TypeInfo.CallConv) TypeId { const owned_params = self.alloc.dupe(TypeId, params) catch unreachable; - return self.intern(.{ .function = .{ .params = owned_params, .ret = ret } }); + return self.intern(.{ .function = .{ .params = owned_params, .ret = ret, .call_conv = cc } }); } pub fn closureType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId { @@ -653,6 +660,8 @@ fn hashTypeInfo(h: *std.hash.Wyhash, info: TypeInfo) void { .function => |f| { for (f.params) |p| h.update(std.mem.asBytes(&p)); h.update(std.mem.asBytes(&f.ret)); + const cc_byte: u8 = @intFromEnum(f.call_conv); + h.update(&.{cc_byte}); }, .closure => |c| { for (c.params) |p| h.update(std.mem.asBytes(&p)); @@ -692,6 +701,7 @@ fn typeInfoEql(a: TypeInfo, b: TypeInfo) bool { for (f.params, g.params) |fp, gp| { if (fp != gp) return false; } + if (f.call_conv != g.call_conv) return false; return f.ret == g.ret; }, .closure => |c| { diff --git a/src/parser.zig b/src/parser.zig index a52440c..1c925eb 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -471,10 +471,12 @@ pub const Parser = struct { // '->' present: function type self.advance(); // skip '->' const return_type = try self.parseTypeExpr(); + const call_conv = try self.parseOptionalCallConv(); return try self.createNode(start, .{ .function_type_expr = .{ .param_types = try param_types.toOwnedSlice(self.allocator), .param_names = if (has_names) try param_names.toOwnedSlice(self.allocator) else null, .return_type = return_type, + .call_conv = call_conv, } }); } // No '->': tuple type (even for single element) @@ -1236,22 +1238,7 @@ pub const Parser = struct { } // Optional calling convention: callconv(.c) - var call_conv: ast.CallingConvention = .default; - if (self.current.tag == .kw_callconv) { - self.advance(); - try self.expect(.l_paren); - try self.expect(.dot); - if (self.current.tag != .identifier) - return self.fail("expected calling convention name after '.'"); - const cc_name = self.tokenSlice(self.current); - if (std.mem.eql(u8, cc_name, "c")) { - call_conv = .c; - } else { - return self.fail("unknown calling convention"); - } - self.advance(); - try self.expect(.r_paren); - } + const call_conv = try self.parseOptionalCallConv(); // Body: block `{ ... }`, arrow `=> expr;`, #builtin, #compiler, or #foreign marker var is_arrow = false; @@ -2370,22 +2357,7 @@ pub const Parser = struct { } // Optional calling convention: callconv(.c) - var call_conv: ast.CallingConvention = .default; - if (self.current.tag == .kw_callconv) { - self.advance(); - try self.expect(.l_paren); - try self.expect(.dot); - if (self.current.tag != .identifier) - return self.fail("expected calling convention name after '.'"); - const cc_name = self.tokenSlice(self.current); - if (std.mem.eql(u8, cc_name, "c")) { - call_conv = .c; - } else { - return self.fail("unknown calling convention"); - } - self.advance(); - try self.expect(.r_paren); - } + const call_conv = try self.parseOptionalCallConv(); // Two body forms: // (params) => expr — expression lambda @@ -2423,6 +2395,20 @@ pub const Parser = struct { return tag == .l_brace or tag == .arrow or tag == .hash_builtin or tag == .hash_compiler or tag == .hash_foreign or tag == .fat_arrow or tag == .kw_callconv; } + fn parseOptionalCallConv(self: *Parser) anyerror!ast.CallingConvention { + if (self.current.tag != .kw_callconv) return .default; + self.advance(); + try self.expect(.l_paren); + try self.expect(.dot); + if (self.current.tag != .identifier) + return self.fail("expected calling convention name after '.'"); + const cc_name = self.tokenSlice(self.current); + const cc: ast.CallingConvention = if (std.mem.eql(u8, cc_name, "c")) .c else return self.fail("unknown calling convention"); + self.advance(); + try self.expect(.r_paren); + return cc; + } + fn isAssignOp(self: *const Parser) bool { return switch (self.current.tag) { .equal, .plus_equal, .minus_equal, .star_equal, .slash_equal, .percent_equal, diff --git a/tests/expected/85-cc-c-large-aggregate.exit b/tests/expected/85-cc-c-large-aggregate.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/85-cc-c-large-aggregate.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/85-cc-c-large-aggregate.txt b/tests/expected/85-cc-c-large-aggregate.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/expected/85-cc-c-large-aggregate.txt @@ -0,0 +1 @@ + diff --git a/tests/expected/86-callconv-c-fnptr-large-aggregate.exit b/tests/expected/86-callconv-c-fnptr-large-aggregate.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/86-callconv-c-fnptr-large-aggregate.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/86-callconv-c-fnptr-large-aggregate.txt b/tests/expected/86-callconv-c-fnptr-large-aggregate.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/expected/86-callconv-c-fnptr-large-aggregate.txt @@ -0,0 +1 @@ + diff --git a/tests/expected/87-fnptr-cast-large-aggregate.exit b/tests/expected/87-fnptr-cast-large-aggregate.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/87-fnptr-cast-large-aggregate.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/87-fnptr-cast-large-aggregate.txt b/tests/expected/87-fnptr-cast-large-aggregate.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/expected/87-fnptr-cast-large-aggregate.txt @@ -0,0 +1 @@ +