From 736382d39c04a23271959d0a2d23235bf992a8cb Mon Sep 17 00:00:00 2001
From: agra <alex@swipelab.co>
Date: Tue, 19 May 2026 11:44:43 +0300
Subject: [PATCH] =?UTF-8?q?ffi=200.4:=20focused=20FP-aggregate=20(HFA)=20b?=
 =?UTF-8?q?aseline=20=E2=80=94=20FQuad=20+=20DQuad?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

91/91 regression tests pass (+ffi-04-fp-struct).

Single-file regression net for the all-float / all-double aggregate
ABI path:

  FQuad — 16 B, 4×f32   (same slot as ffi-02's Vec4f)
  DQuad — 32 B, 4×f64   (UIEdgeInsets-shape — the f32-vs-f64 landmine)

Already nominally covered by ffi-02's Vec4f, but pinning it as a
focused single-file test means a future ABI rule change that breaks
the HFA path fails *this* test directly without a noisy drag-in from
the multi-shape baseline.

DQuad at 32 B straddles the AAPCS64 HFA limit (≤4 floats of same
type, total ≤64 B); it stays as a struct value passed through
v0..v3 rather than going indirect. The snapshot confirms the values
arrive intact.
---
 examples/ffi-04-fp-struct.sx          | 49 +++++++++++++++++++++++++++
 tests/expected/ffi-04-fp-struct.exit  |  1 +
 tests/expected/ffi-04-fp-struct.txt   |  6 ++++
 vendors/ffi_fp_struct/ffi_fp_struct.c | 29 ++++++++++++++++
 vendors/ffi_fp_struct/ffi_fp_struct.h | 20 +++++++++++
 5 files changed, 105 insertions(+)
 create mode 100644 examples/ffi-04-fp-struct.sx
 create mode 100644 tests/expected/ffi-04-fp-struct.exit
 create mode 100644 tests/expected/ffi-04-fp-struct.txt
 create mode 100644 vendors/ffi_fp_struct/ffi_fp_struct.c
 create mode 100644 vendors/ffi_fp_struct/ffi_fp_struct.h

diff --git a/examples/ffi-04-fp-struct.sx b/examples/ffi-04-fp-struct.sx
new file mode 100644
index 0000000..67f5499
--- /dev/null
+++ b/examples/ffi-04-fp-struct.sx
@@ -0,0 +1,49 @@
+// Phase 0 baseline (PLAN-FFI.md step 0.4): focused FP-aggregate (HFA)
+// FFI test. All-float / all-double aggregates of ≤4 fields stay as
+// struct values in LLVM and pass through the float register file
+// (AAPCS64 v0..v3, SysV AMD64 xmm0..xmm7). Distinct from the int
+// register-coercion paths (i64 / [2 x i64]).
+//
+//   FQuad — 16 B, four f32     (same slot as ffi-02's Vec4f)
+//   DQuad — 32 B, four f64     (UIEdgeInsets-shape HFA — the
+//                               f32-vs-f64 landmine from this session)
+//
+// Already nominally covered by ffi-02's Vec4f, but pinning it as a
+// focused single-file test means a future ABI rule change that
+// breaks the FP path fails *this* test directly without a noisy
+// drag-in from the multi-shape baseline.
+
+#import "modules/std.sx";
+
+#import c {
+    #source "vendors/ffi_fp_struct/ffi_fp_struct.c";
+};
+
+FQuad :: struct { a: f32; b: f32; c: f32; d: f32; }
+DQuad :: struct { a: f64; b: f64; c: f64; d: f64; }
+
+ffi_fquad_make    :: (a: f32, b: f32, c: f32, d: f32) -> FQuad #foreign;
+ffi_fquad_reverse :: (v: FQuad)                       -> FQuad #foreign;
+ffi_fquad_sum     :: (v: FQuad)                       -> f32   #foreign;
+
+ffi_dquad_make    :: (a: f64, b: f64, c: f64, d: f64) -> DQuad #foreign;
+ffi_dquad_reverse :: (v: DQuad)                       -> DQuad #foreign;
+ffi_dquad_sum     :: (v: DQuad)                       -> f64   #foreign;
+
+main :: () -> s32 {
+    // ── FQuad (16 B, 4×f32 HFA) ────────────────────────────────────
+    f := ffi_fquad_make(1.0, 2.0, 3.0, 4.0);
+    print("fquad make = ({}, {}, {}, {})\n", f.a, f.b, f.c, f.d);
+    g := ffi_fquad_reverse(f);
+    print("fquad rev  = ({}, {}, {}, {})\n", g.a, g.b, g.c, g.d);
+    print("fquad sum  = {}\n", ffi_fquad_sum(f));
+
+    // ── DQuad (32 B, 4×f64 HFA — UIEdgeInsets-shape) ──────────────
+    d := ffi_dquad_make(1.5, 2.5, 3.5, 4.5);
+    print("dquad make = ({}, {}, {}, {})\n", d.a, d.b, d.c, d.d);
+    e := ffi_dquad_reverse(d);
+    print("dquad rev  = ({}, {}, {}, {})\n", e.a, e.b, e.c, e.d);
+    print("dquad sum  = {}\n", ffi_dquad_sum(d));
+
+    0;
+}
diff --git a/tests/expected/ffi-04-fp-struct.exit b/tests/expected/ffi-04-fp-struct.exit
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/tests/expected/ffi-04-fp-struct.exit
@@ -0,0 +1 @@
+0
diff --git a/tests/expected/ffi-04-fp-struct.txt b/tests/expected/ffi-04-fp-struct.txt
new file mode 100644
index 0000000..26b9909
--- /dev/null
+++ b/tests/expected/ffi-04-fp-struct.txt
@@ -0,0 +1,6 @@
+fquad make = (1.000000, 2.000000, 3.000000, 4.000000)
+fquad rev  = (4.000000, 3.000000, 2.000000, 1.000000)
+fquad sum  = 10.000000
+dquad make = (1.500000, 2.500000, 3.500000, 4.500000)
+dquad rev  = (4.500000, 3.500000, 2.500000, 1.500000)
+dquad sum  = 12.000000
diff --git a/vendors/ffi_fp_struct/ffi_fp_struct.c b/vendors/ffi_fp_struct/ffi_fp_struct.c
new file mode 100644
index 0000000..125a5f7
--- /dev/null
+++ b/vendors/ffi_fp_struct/ffi_fp_struct.c
@@ -0,0 +1,29 @@
+#include "ffi_fp_struct.h"
+
+FQuad ffi_fquad_make(float a, float b, float c, float d) {
+    FQuad r = { a, b, c, d };
+    return r;
+}
+
+FQuad ffi_fquad_reverse(FQuad v) {
+    FQuad r = { v.d, v.c, v.b, v.a };
+    return r;
+}
+
+float ffi_fquad_sum(FQuad v) {
+    return v.a + v.b + v.c + v.d;
+}
+
+DQuad ffi_dquad_make(double a, double b, double c, double d) {
+    DQuad r = { a, b, c, d };
+    return r;
+}
+
+DQuad ffi_dquad_reverse(DQuad v) {
+    DQuad r = { v.d, v.c, v.b, v.a };
+    return r;
+}
+
+double ffi_dquad_sum(DQuad v) {
+    return v.a + v.b + v.c + v.d;
+}
diff --git a/vendors/ffi_fp_struct/ffi_fp_struct.h b/vendors/ffi_fp_struct/ffi_fp_struct.h
new file mode 100644
index 0000000..cd4fda2
--- /dev/null
+++ b/vendors/ffi_fp_struct/ffi_fp_struct.h
@@ -0,0 +1,20 @@
+// Focused FP-aggregate (HFA) FFI baselines. Distinct from the int-aggregate
+// register-coercion paths because all-float / all-double structs of ≤4 fields
+// stay as struct values in LLVM and are passed/returned via the float
+// register file (AAPCS64 v0..v3; SysV AMD64 xmm0..xmm7). This was the
+// `UIEdgeInsets`-as-f32-vs-f64 landmine — pinned here so a future ABI rule
+// change that wrecks the FP path fails this test directly.
+//
+//   FQuad   — 16 B, four float    (small HFA; same slot as Vec4f)
+//   DQuad   — 32 B, four double   (UIEdgeInsets-shape HFA)
+
+typedef struct { float a; float b; float c; float d; }              FQuad;
+typedef struct { double a; double b; double c; double d; }          DQuad;
+
+FQuad  ffi_fquad_make   (float a, float b, float c, float d);
+FQuad  ffi_fquad_reverse(FQuad v);
+float  ffi_fquad_sum    (FQuad v);
+
+DQuad  ffi_dquad_make   (double a, double b, double c, double d);
+DQuad  ffi_dquad_reverse(DQuad v);
+double ffi_dquad_sum    (DQuad v);