From d6078c2e6bcdc996107356a47239fae25d209292 Mon Sep 17 00:00:00 2001 From: agra Date: Wed, 3 Jun 2026 08:53:51 +0300 Subject: [PATCH] test(backend): lock LLVM type/ABI shapes before A7.1 extraction (A7.1 scaffolding step 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test-first scaffolding for LLVM backend modularization (Phase A7.1) before the type/ABI helpers move into src/backend/llvm/{types,abi}.zig. Visibility-only change to the targets — no behavior change. Closes the ARCH-SAFETY "no generic ABI snapshot" gap. - 2 new emit_llvm.test.zig tests: - abiCoerceParamType across every C-ABI size bucket: <=8 -> i64, 9-16 -> [2 x i64], >16 -> ptr, HFA (all-float/all-double, <=4 fields) -> unchanged, string -> ptr, slice -> ptr, scalar -> unchanged. Built via a local internStruct helper (field slice in the module arena -> no testing-allocator leak); asserts against emitter.cached_* + LLVMArrayType2. - needsByval: true only for >16-byte non-HFA struct; false for <=16 / HFA / string / slice / non-struct. - 1 new .ir snapshot: 1202-ffi-cc-c-large-aggregate (the canonical callconv(.c) >16-byte byval example that directly documents abiCoerceParamType) — pins the byval param path end-to-end (5 byval + entry reload + 2 sret from Arena.init). Path-free + idempotent (verified across two captures). Suite count unchanged (snapshot added to an existing example). - Widened abiCoerceParamType + needsByval to pub (visibility only; abiCoerceParamTypeEx/materializeByvalArg/verifySizes stay private — move with callers in sub-step 2). No logic touched. - Recorded the A7.1 coverage inventory + residual gaps (wasm32 usize->i32 branch, fn-ptr large-aggregate 1203/1204) in ARCH-SAFETY.md. Gate: zig build, zig build test, bash tests/run_examples.sh -> 361/0 (no churn beyond the new 1202 .ir). --- .../expected/1202-ffi-cc-c-large-aggregate.ir | 182 ++++++++++++++++++ src/ir/emit_llvm.test.zig | 77 ++++++++ src/ir/emit_llvm.zig | 4 +- 3 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 examples/expected/1202-ffi-cc-c-large-aggregate.ir diff --git a/examples/expected/1202-ffi-cc-c-large-aggregate.ir b/examples/expected/1202-ffi-cc-c-large-aggregate.ir new file mode 100644 index 0000000..1f60847 --- /dev/null +++ b/examples/expected/1202-ffi-cc-c-large-aggregate.ir @@ -0,0 +1,182 @@ + +@__sx_default_context = internal global { { ptr, ptr, ptr }, ptr } { { ptr, ptr, ptr } { ptr null, ptr @__thunk_CAllocator_Allocator_alloc, ptr @__thunk_CAllocator_Allocator_dealloc }, ptr null } + +; Function Attrs: nounwind +declare void @out(ptr) #0 + +declare ptr @malloc(i64) + +declare void @free(ptr) + +declare ptr @memcpy(ptr, ptr, i64) + +declare ptr @memset(ptr, i32, i64) + +; Function Attrs: nounwind +define internal ptr @CAllocator.alloc(ptr %0, ptr %1, i64 %2) #0 { +entry: + %alloca = alloca ptr, align 8 + store ptr %1, ptr %alloca, align 8 + %allocaN = alloca i64, align 8 + store i64 %2, ptr %allocaN, align 8 + %load = load i64, ptr %allocaN, align 8 + %call = call ptr @malloc(i64 %load) + ret ptr %call +} + +; Function Attrs: nounwind +define internal void @CAllocator.dealloc(ptr %0, ptr %1, ptr %2) #0 { +entry: + %alloca = alloca ptr, align 8 + store ptr %1, ptr %alloca, align 8 + %allocaN = alloca ptr, align 8 + store ptr %2, ptr %allocaN, align 8 + %load = load ptr, ptr %allocaN, align 8 + call void @free(ptr %load) + ret void +} + +; Function Attrs: nounwind +declare i64 @GPA.init(ptr) #0 + +; Function Attrs: nounwind +declare ptr @GPA.alloc(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @GPA.dealloc(ptr, ptr, ptr) #0 + +; Function Attrs: nounwind +declare void @Arena.add_chunk(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @Arena.init(ptr sret({ ptr, i64, { ptr, ptr, ptr } }), ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @Arena.reset(ptr, ptr) #0 + +; Function Attrs: nounwind +declare void @Arena.deinit(ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @Arena.alloc(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @Arena.dealloc(ptr, ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @BufAlloc.init(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @BufAlloc.reset(ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @BufAlloc.alloc(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @BufAlloc.dealloc(ptr, ptr, ptr) #0 + +; Function Attrs: nounwind +declare void @TrackingAllocator.init(ptr sret({ { ptr, ptr, ptr }, i64, i64, i64 }), ptr, ptr) #0 + +; Function Attrs: nounwind +declare i64 @TrackingAllocator.leak_count(ptr, ptr) #0 + +; Function Attrs: nounwind +declare void @TrackingAllocator.report(ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @TrackingAllocator.alloc(ptr, ptr, i64) #0 + +; Function Attrs: nounwind +declare void @TrackingAllocator.dealloc(ptr, ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @cstring(ptr, i64) #0 + +; Function Attrs: nounwind +declare ptr @int_to_string(ptr, i64) #0 + +; Function Attrs: nounwind +declare ptr @bool_to_string(ptr, i1) #0 + +; Function Attrs: nounwind +declare ptr @float_to_string(ptr, double) #0 + +; Function Attrs: nounwind +declare void @hex_group(ptr, ptr, i64, i64) #0 + +; Function Attrs: nounwind +declare ptr @int_to_hex_string(ptr, i64) #0 + +; Function Attrs: nounwind +declare ptr @concat(ptr, ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @substr(ptr, ptr, i64, i64) #0 + +; Function Attrs: nounwind +declare ptr @xml_escape(ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @path_join(ptr, ptr) #0 + +; Function Attrs: nounwind +declare ptr @any_to_string(ptr, [2 x i64]) #0 + +; Function Attrs: nounwind +declare ptr @build_format(ptr, ptr) #0 + +; Function Attrs: nounwind +define internal i64 @accept_c(ptr %0) #0 { +entry: + %byval.load = load { i64, i64, i64, i64 }, ptr %0, align 8 + %alloca = alloca { i64, i64, i64, i64 }, align 8 + store { i64, i64, i64, i64 } %byval.load, ptr %alloca, align 8 + %load = load { i64, i64, i64, i64 }, ptr %alloca, align 8 + %sg = extractvalue { i64, i64, i64, i64 } %load, 0 + %loadN = load { i64, i64, i64, i64 }, ptr %alloca, align 8 + %sgN = extractvalue { i64, i64, i64, i64 } %loadN, 1 + %add = add i64 %sg, %sgN + %loadN = load { i64, i64, i64, i64 }, ptr %alloca, align 8 + %sgN = extractvalue { i64, i64, i64, i64 } %loadN, 2 + %addN = add i64 %add, %sgN + %loadN = load { i64, i64, i64, i64 }, ptr %alloca, align 8 + %sgN = extractvalue { i64, i64, i64, i64 } %loadN, 3 + %addN = add i64 %addN, %sgN + ret i64 %addN +} + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %alloca = alloca { i64, i64, i64, i64 }, align 8 + store { i64, i64, i64, i64 } { i64 1, i64 10, i64 100, i64 1000 }, ptr %alloca, align 8 + %load = load { i64, i64, i64, i64 }, ptr %alloca, align 8 + %byval.tmp = alloca { i64, i64, i64, i64 }, align 8 + store { i64, i64, i64, i64 } %load, ptr %byval.tmp, align 8 + %call = call i64 @accept_c(ptr %byval.tmp) + %icmp = icmp ne i64 %call, 1111 + br i1 %icmp, label %if.then.0, label %if.merge.1 + +if.then.0: ; preds = %entry + ret i32 1 + +if.merge.1: ; preds = %entry + ret i32 0 +} + +; Function Attrs: nounwind +define internal ptr @__thunk_CAllocator_Allocator_alloc(ptr %0, ptr %1, i64 %2) #0 { +entry: + %call = call ptr @CAllocator.alloc(ptr %0, ptr %1, i64 %2) + ret ptr %call +} + +; Function Attrs: nounwind +define internal void @__thunk_CAllocator_Allocator_dealloc(ptr %0, ptr %1, ptr %2) #0 { +entry: + call void @CAllocator.dealloc(ptr %0, ptr %1, ptr %2) + ret void +} + + diff --git a/src/ir/emit_llvm.test.zig b/src/ir/emit_llvm.test.zig index 2406f53..8d9426f 100644 --- a/src/ir/emit_llvm.test.zig +++ b/src/ir/emit_llvm.test.zig @@ -353,6 +353,83 @@ test "emit: type conversion toLLVMType" { _ = emitter.toLLVMType(.noreturn); } +// ── A7.1 scaffolding: ABI param coercion ──────────────────────────── +// Lock the C-ABI struct-coercion buckets (abiCoerceParamType / needsByval), +// which feed callconv(.c) / #foreign signatures, before they move to +// src/backend/llvm/abi.zig in A7.1 sub-step 2. + +const llvm = @import("../llvm_api.zig"); +const cc = llvm.c; + +fn internStruct(module: *Module, name: []const u8, field_tys: []const TypeId) TypeId { + var fields = std.ArrayList(types.TypeInfo.StructInfo.Field).empty; + defer fields.deinit(std.testing.allocator); + for (field_tys, 0..) |fty, i| { + var nb: [8]u8 = undefined; + const fname = std.fmt.bufPrint(&nb, "f{d}", .{i}) catch unreachable; + fields.append(std.testing.allocator, .{ .name = str(module, fname), .ty = fty }) catch unreachable; + } + // Dupe into the module arena so the interned struct's field slice lives for + // the module's lifetime (freed at module.deinit) — no testing-allocator leak. + const owned = module.slice_arena.allocator().dupe(types.TypeInfo.StructInfo.Field, fields.items) catch unreachable; + return module.types.intern(.{ .@"struct" = .{ .name = str(module, name), .fields = owned } }); +} + +test "emit: abiCoerceParamType coerces C-ABI structs by size bucket" { + const alloc = std.testing.allocator; + var module = Module.init(alloc); + defer module.deinit(); + + // Intern the shapes before building the emitter (toLLVMType reads live). + const small = internStruct(&module, "Small", &.{ .s32, .s32 }); // 8 bytes + const mid = internStruct(&module, "Mid", &.{ .s64, .s64 }); // 16 bytes + const big = internStruct(&module, "Big", &.{ .s64, .s64, .s64 }); // 24 bytes + const hfa_f = internStruct(&module, "HfaF", &.{ .f32, .f32, .f32, .f32 }); // 16, all-float + const hfa_d = internStruct(&module, "HfaD", &.{ .f64, .f64 }); // 16, all-double + const sl = module.types.sliceOf(.s32); + + var emitter = LLVMEmitter.init(alloc, &module, "test_abi", .{}); + defer emitter.deinit(); + + // ≤ 8 bytes → i64. + try std.testing.expect(emitter.abiCoerceParamType(small, emitter.toLLVMType(small)) == emitter.cached_i64); + // 9–16 bytes → [2 x i64]. + try std.testing.expect(emitter.abiCoerceParamType(mid, emitter.toLLVMType(mid)) == cc.LLVMArrayType2(emitter.cached_i64, 2)); + // > 16 bytes → ptr (passed byval at the call/sig sites). + try std.testing.expect(emitter.abiCoerceParamType(big, emitter.toLLVMType(big)) == emitter.cached_ptr); + // HFA (all-float / all-double, ≤ 4 fields) → unchanged. + try std.testing.expect(emitter.abiCoerceParamType(hfa_f, emitter.toLLVMType(hfa_f)) == emitter.toLLVMType(hfa_f)); + try std.testing.expect(emitter.abiCoerceParamType(hfa_d, emitter.toLLVMType(hfa_d)) == emitter.toLLVMType(hfa_d)); + // string / slice collapse to ptr at the C-API boundary (len dropped). + try std.testing.expect(emitter.abiCoerceParamType(.string, emitter.toLLVMType(.string)) == emitter.cached_ptr); + try std.testing.expect(emitter.abiCoerceParamType(sl, emitter.toLLVMType(sl)) == emitter.cached_ptr); + // Scalars pass through unchanged. + try std.testing.expect(emitter.abiCoerceParamType(.s32, emitter.toLLVMType(.s32)) == emitter.toLLVMType(.s32)); +} + +test "emit: needsByval only for > 16-byte non-HFA structs" { + const alloc = std.testing.allocator; + var module = Module.init(alloc); + defer module.deinit(); + + const small = internStruct(&module, "Small", &.{ .s32, .s32 }); + const mid = internStruct(&module, "Mid", &.{ .s64, .s64 }); + const big = internStruct(&module, "Big", &.{ .s64, .s64, .s64 }); + const hfa_d = internStruct(&module, "HfaD", &.{ .f64, .f64 }); + const sl = module.types.sliceOf(.s32); + + var emitter = LLVMEmitter.init(alloc, &module, "test_byval", .{}); + defer emitter.deinit(); + + try std.testing.expect(emitter.needsByval(big, emitter.toLLVMType(big))); // > 16 + try std.testing.expect(!emitter.needsByval(small, emitter.toLLVMType(small))); + try std.testing.expect(!emitter.needsByval(mid, emitter.toLLVMType(mid))); // exactly 16 + try std.testing.expect(!emitter.needsByval(hfa_d, emitter.toLLVMType(hfa_d))); // HFA + try std.testing.expect(!emitter.needsByval(.string, emitter.toLLVMType(.string))); + try std.testing.expect(!emitter.needsByval(sl, emitter.toLLVMType(sl))); + try std.testing.expect(!emitter.needsByval(.s32, emitter.toLLVMType(.s32))); // non-struct +} + // ── Struct/Enum/Union tests ───────────────────────────────────────── test "emit: struct_init and struct_get" { diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 45c8be5..a0c2deb 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -4718,7 +4718,7 @@ pub const LLVMEmitter = struct { // - Small integer struct (≤ 8 bytes, non-HFA) → i64 // - HFA (homogeneous float aggregate) → leave as-is (LLVM handles it) - fn abiCoerceParamType(self: *LLVMEmitter, ir_ty: TypeId, llvm_ty: c.LLVMTypeRef) c.LLVMTypeRef { + pub fn abiCoerceParamType(self: *LLVMEmitter, ir_ty: TypeId, llvm_ty: c.LLVMTypeRef) c.LLVMTypeRef { return self.abiCoerceParamTypeEx(ir_ty, llvm_ty, true); } @@ -4787,7 +4787,7 @@ pub const LLVMEmitter = struct { return self.cached_ptr; } - fn needsByval(self: *LLVMEmitter, ir_ty: TypeId, raw_llvm_ty: c.LLVMTypeRef) bool { + pub fn needsByval(self: *LLVMEmitter, ir_ty: TypeId, raw_llvm_ty: c.LLVMTypeRef) bool { if (self.target_config.isWasm32()) return false; if (ir_ty == .string) return false; if (!ir_ty.isBuiltin()) {