test(ffi-linkage): xfail export fn called from C via AOT (Phase 2.0)

Phase 2 of the extern/export stream verifies `export` (define + expose a C-ABI sx symbol) end-to-end. C->sx-by-name linkage cannot work under the corpus's `sx run` JIT mode — a JIT-resident symbol is invisible to a dlopen'd C dylib's flat-namespace lookup — so this lands a new AOT execution mode for the corpus: an `expected/<name>.aot` marker switches an example from JIT `sx run` to a `sx build` + execute flow, linking the sx object with its C `#source` companions into a native binary. example/1226 defines `sx_square :: (n: i32) -> i32 export { ... }` and a companion .c that declares `extern int sx_square(int)` and calls it back. RED: with `export` not yet lowered, the AOT link fails with an undefined `_sx_square` (the define path still emits it `internal` + with an implicit ctx slot, and lazy lowering leaves an uncalled export fn as a bodiless declare). Phase 2.1 greens it. Also retires the standalone `tests/run_examples.sh` runner — `zig build test` (src/corpus_run.test.zig) is now the sole corpus runner, and the shell mirror would have needed its own AOT-mode port to stay in lockstep. verify-step.sh drops its redundant step (zig build test already runs the corpus); CLAUDE.md documents the `.aot` mode.
2026-06-14 14:41:33 +03:00
parent 6932426c41
commit 6a539ca057
11 changed files with 138 additions and 202 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -430,15 +430,11 @@ After any compiler change:
     preferred way to update snapshots — no shell script needed.
   - A test is still keyed off its `expected/<name>.exit` marker, so seed an
     empty marker first for a brand-new example (see "Adding a feature").
-3. **Standalone corpus run** (optional): `bash tests/run_examples.sh`
+`zig build test` is the only way to run the corpus — there is no standalone
-   - Runs the corpus independent of `zig build test` (used by
+shell runner (the legacy `tests/run_examples.sh` was removed). An
-     `tools/verify-step.sh`). `--update` still regenerates snapshots and
+`expected/<name>.aot` marker switches an example from JIT `sx run` to a
-     produces byte-identical output to `-Dupdate-goldens`.
+`sx build` + execute flow (needed to exercise a C-ABI symbol exported FROM sx
-   - Every test must show `ok` (currently 626); zero failures, zero timeouts.
+— a JIT-resident symbol is invisible to a dlopen'd C dylib).
   - Uses GNU `timeout`/`gtimeout` when present (Homebrew coreutils on macOS)
     and runs without a per-test wall-clock guard when neither is found.
   - The two normalizers (`normalize`/`normalize_ir` in the script and the
     mirrors in `src/corpus_run.test.zig`) must stay in lockstep.
 ### Test layout
@@ -495,7 +491,6 @@ There is no monolithic smoke file — each feature is its own focused example.
 | `issues/NNNN-slug.md` | Open-issue / bug-report writeup (mark RESOLVED in a banner when fixed; the `.md` stays). |
 | `issues/NNNN-slug.sx` (+ `issues/NNNN-slug/`) | The issue's minimal repro, co-located with the `.md`. A repro with an `issues/expected/NNNN-slug.exit` marker runs in the suite; unpinned ones don't. |
 | `src/corpus_run.test.zig` | The corpus runner inside `zig build test` — spawns `sx` per example, diffs stdout/stderr/exit (+ optional IR); regenerates snapshots under `-Dupdate-goldens`. |
 | `tests/run_examples.sh` | Standalone shell runner (used by `tools/verify-step.sh`); same compare + `--update` as the Zig test. |
 ### Unit test file convention
--- a/examples/1226-ffi-export-fn.c
+++ b/examples/1226-ffi-export-fn.c
@@ -0,0 +1,8 @@
 #include "1226-ffi-export-fn.h"
 // Defined on the sx side via `export` — a plain C-ABI symbol, no sx context.
 extern int sx_square(int n);
 int call_sx_square(int n) {
    return sx_square(n) + 1;
 }
--- a/examples/1226-ffi-export-fn.h
+++ b/examples/1226-ffi-export-fn.h
@@ -0,0 +1,7 @@
 #ifndef SX_EXPORT_FN_H
 #define SX_EXPORT_FN_H
 // Calls back into the sx-exported `sx_square` and adds 1.
 int call_sx_square(int n);
 #endif
--- a/examples/1226-ffi-export-fn.sx
+++ b/examples/1226-ffi-export-fn.sx
@@ -0,0 +1,28 @@
 // export function (FFI-linkage stream, Phase 2): define an sx function with
 // the bare `export` linkage modifier — external linkage + C ABI + no sx ctx —
 // so a companion C translation unit can call back into it by its plain symbol
 // name. The C side (`#source`) declares `sx_square` as a normal `extern int`
 // and calls it; sx `main` drives the C side via `call_sx_square`. Mirrors the
 // import-direction `extern` examples (1223–1225) for the define direction.
 //
 // Without `export`, an sx-defined fn is `internal` linkage + carries the
 // implicit `__sx_ctx` slot, so the C object can neither resolve nor correctly
 // call the symbol — this is the gap `export` fills.
 #import "modules/std.sx";
 #import c {
    #include "1226-ffi-export-fn.h";
    #source "1226-ffi-export-fn.c";
 };
 // sx-defined, exported to C: external linkage + C ABI + no implicit ctx.
 sx_square :: (n: i32) -> i32 export {
    return n * n;
 }
 main :: () -> i32 {
    // call_sx_square (C) calls back into sx_square, adds 1.
    print("call_sx_square(6) = {}\n", call_sx_square(6));
    print("call_sx_square(9) = {}\n", call_sx_square(9));
    0
 }
--- a/examples/expected/1226-ffi-export-fn.aot
+++ b/examples/expected/1226-ffi-export-fn.aot
--- a/examples/expected/1226-ffi-export-fn.exit
+++ b/examples/expected/1226-ffi-export-fn.exit
@@ -0,0 +1 @@
 0
--- a/examples/expected/1226-ffi-export-fn.stderr
+++ b/examples/expected/1226-ffi-export-fn.stderr
--- a/examples/expected/1226-ffi-export-fn.stdout
+++ b/examples/expected/1226-ffi-export-fn.stdout
@@ -0,0 +1,2 @@
 call_sx_square(6) = 37
 call_sx_square(9) = 82
--- a/src/corpus_run.test.zig
+++ b/src/corpus_run.test.zig
@@ -1,11 +1,15 @@
 const std = @import("std");
 const corpus_paths = @import("corpus_paths");
-// End-to-end example/issue regression runner — the pure-Zig replacement for
+// End-to-end example/issue regression runner. For every
-// `tests/run_examples.sh`. For every `<root>/expected/<name>.exit` marker under
+// `<root>/expected/<name>.exit` marker under examples/ and issues/, spawn the
-// examples/ and issues/, spawn the installed `sx` binary on `<name>.sx`, capture
+// installed `sx` binary on `<name>.sx`, capture stdout/stderr/exit, normalize,
-// stdout/stderr/exit, normalize, and diff against the stored snapshot. Optional
+// and diff against the stored snapshot. Optional `<name>.ir` snapshots
-// `<name>.ir` snapshots additionally diff `sx ir` output.
+// additionally diff `sx ir` output; an `<name>.aot` marker switches the
 // example from JIT `sx run` to a `sx build` + execute flow.
 //
 // This is the sole regression runner — `zig build test` is the only way to run
 // the corpus (the legacy standalone `tests/run_examples.sh` was removed).
 //
 // Each example runs in its OWN subprocess (via std.process.run), so a crashing
 // example reports its exit code (or 128+signal, matching a shell's `$?`) instead
@@ -21,9 +25,7 @@ const corpus_paths = @import("corpus_paths");
 // reimplemented here.)
 //
 // Snapshots are regenerated in-build with `zig build test -Dupdate-goldens`
-// (see the update-mode branch below) — no shell script needed. The legacy
+// (see the update-mode branch below) — no shell script needed.
 // `bash tests/run_examples.sh --update` still works and produces byte-identical
 // output; the two normalizers (here and in run_examples.sh) must stay in lockstep.
 const TIMEOUT_SECS = 10;
 const MAX_OUTPUT = 16 * 1024 * 1024;
@@ -50,9 +52,9 @@ fn isLowerHex(c: u8) bool {
    return (c >= '0' and c <= '9') or (c >= 'a' and c <= 'f');
 }
-/// Mirror of `normalize()` in run_examples.sh: collapse `0x` + 4-or-more
+/// Collapse `0x` + 4-or-more lowercase-hex digits to `0xADDR` so heap/fn
-/// lowercase-hex digits to `0xADDR` so heap/fn addresses don't desync snapshots.
+/// addresses don't desync snapshots. (The path-collapse rule is intentionally
-/// (The path-collapse sed rule is intentionally omitted — see file header.)
+/// omitted — see file header.)
 fn normalizeStd(arena: std.mem.Allocator, in: []const u8) ![]u8 {
    var out: std.ArrayList(u8) = .empty;
    var i: usize = 0;
@@ -115,7 +117,8 @@ fn appendIrSubst(arena: std.mem.Allocator, out: *std.ArrayList(u8), line: []cons
    }
 }
-/// Mirror of `normalize_ir()` in run_examples.sh.
+/// Normalize `sx ir` output for snapshot diffing: drop volatile module
 /// header lines and collapse LLVM's auto-suffixed temporaries.
 fn normalizeIr(arena: std.mem.Allocator, in: []const u8) ![]u8 {
    var out: std.ArrayList(u8) = .empty;
    var lines = std.mem.splitScalar(u8, in, '\n');
@@ -215,23 +218,74 @@ fn sweepRoot(
        const err_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.stderr", .{ exp_dir, name })) orelse "";
        const ir_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.ir", .{ exp_dir, name }));
-        // --- sx run ---
+        // An `<name>.aot` marker switches the example from JIT `sx run` to a
-        const run_res = std.process.run(a, io, .{
+        // build+execute flow: `sx build` links the sx object with any C
-            .argv = &.{ corpus_paths.sx_exe, "run", rel_path },
+        // `#source` companions into a native binary, which is then executed.
-            .cwd = .{ .path = repo_root },
+        // This is the ONLY way to exercise a C-ABI symbol exported FROM sx
-            .timeout = deadline(io),
+        // (an `export` fn): in JIT mode the sx symbol lives in JIT memory and
-        }) catch |err| {
+        // is invisible to a dlopen'd C dylib's flat-namespace lookup, so a
-            try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: `sx run` {s}{s}", .{
+        // C→sx-by-name call can only be linked ahead-of-time.
-                name,
+        const is_aot = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.aot", .{ exp_dir, name })) != null;
                @errorName(err),
                if (err == error.Timeout) " (>10s)" else "",
            }));
            continue;
        };
-        const act_exit = termCode(run_res.term);
+        var act_exit: u32 = undefined;
-        const act_out = trimNl(try normalizeStd(a, run_res.stdout));
+        var act_out: []const u8 = undefined;
-        const act_err = trimNl(try normalizeStd(a, run_res.stderr));
+        var act_err: []const u8 = undefined;
        if (is_aot) {
            // Build a native executable, then run it. The build's own stderr
            // ("compiled: <path>") is intentionally discarded — only the built
            // program's streams are snapshotted. A build failure (e.g. an
            // unresolved exported symbol) surfaces as a non-zero exit with the
            // linker error on stderr.
            const bin_path = try std.fmt.allocPrint(a, "/tmp/sx_aot_{s}", .{name});
            const build_res = std.process.run(a, io, .{
                .argv = &.{ corpus_paths.sx_exe, "build", rel_path, "-o", bin_path },
                .cwd = .{ .path = repo_root },
                .timeout = deadline(io),
            }) catch |err| {
                try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: `sx build` {s}{s}", .{
                    name, @errorName(err), if (err == error.Timeout) " (>10s)" else "",
                }));
                continue;
            };
            if (termCode(build_res.term) != 0) {
                act_exit = termCode(build_res.term);
                act_out = "";
                act_err = trimNl(try normalizeStd(a, build_res.stderr));
            } else {
                const exec_res = std.process.run(a, io, .{
                    .argv = &.{bin_path},
                    .cwd = .{ .path = repo_root },
                    .timeout = deadline(io),
                }) catch |err| {
                    try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: exec AOT binary {s}{s}", .{
                        name, @errorName(err), if (err == error.Timeout) " (>10s)" else "",
                    }));
                    continue;
                };
                act_exit = termCode(exec_res.term);
                act_out = trimNl(try normalizeStd(a, exec_res.stdout));
                act_err = trimNl(try normalizeStd(a, exec_res.stderr));
            }
        } else {
            // --- sx run ---
            const run_res = std.process.run(a, io, .{
                .argv = &.{ corpus_paths.sx_exe, "run", rel_path },
                .cwd = .{ .path = repo_root },
                .timeout = deadline(io),
            }) catch |err| {
                try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: `sx run` {s}{s}", .{
                    name,
                    @errorName(err),
                    if (err == error.Timeout) " (>10s)" else "",
                }));
                continue;
            };
            act_exit = termCode(run_res.term);
            act_out = trimNl(try normalizeStd(a, run_res.stdout));
            act_err = trimNl(try normalizeStd(a, run_res.stderr));
        }
        // --- sx ir (only when a snapshot already exists; mirrors the shell's
        // `$has_ir` gate — update mode never CREATES new .ir files) ---
--- a/tests/run_examples.sh
+++ b/tests/run_examples.sh
@@ -1,158 +0,0 @@
 #!/bin/bash
 # Example/issue regression test runner.
 # Usage: ./tests/run_examples.sh [--update]
 #   --update: regenerate expected output (.exit/.stdout/.stderr, and .ir where present)
 #
 # Layout (per CLAUDE.md): expected output lives in an `expected/` dir that
 # sits NEXT TO the test file, with three streams split out:
 #   <root>/<name>.sx
 #   <root>/expected/<name>.exit     # process exit code
 #   <root>/expected/<name>.stdout   # normalized stdout
 #   <root>/expected/<name>.stderr   # normalized stderr
 #   <root>/expected/<name>.ir       # optional `sx ir` snapshot
 # A test is any <name>.sx that has an <root>/expected/<name>.exit marker.
 # Roots scanned: examples/ and issues/.
 set -uo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 SX="$ROOT_DIR/zig-out/bin/sx"
 ROOTS=("$ROOT_DIR/examples" "$ROOT_DIR/issues")
 PASS=0
 FAIL=0
 SKIP=0
 TIMEOUT_COUNT=0
 UPDATE=0
 TIMEOUT=10
 if [[ "${1:-}" == "--update" ]]; then
    UPDATE=1
 fi
 # Per-test wall-clock guard. GNU `timeout` (or `gtimeout` from Homebrew
 # coreutils) kills a hung test after $TIMEOUT seconds. Neither ships on a
 # bare macOS, so degrade gracefully: when no timeout binary is found, run the
 # command directly (a hang then blocks the suite, but the suite still works).
 TIMEOUT_CMD=()
 if command -v timeout >/dev/null 2>&1; then
    TIMEOUT_CMD=(timeout "$TIMEOUT")
 elif command -v gtimeout >/dev/null 2>&1; then
    TIMEOUT_CMD=(gtimeout "$TIMEOUT")
 fi
 # Run a command under the timeout wrapper if one is available, else directly.
 # The length check (not "${arr[@]}") keeps this safe under bash 3.2 + `set -u`,
 # where expanding an empty array trips "unbound variable".
 run_sx() {
    if [[ ${#TIMEOUT_CMD[@]} -gt 0 ]]; then
        "${TIMEOUT_CMD[@]}" "$@"
    else
        "$@"
    fi
 }
 # Normalize stdout/stderr for snapshot diffing. Applied identically to both
 # expected and actual, so it can only reconcile location/host noise — never
 # desync an otherwise-matching pair. The path rule collapses any absolute
 # `.../examples/` or `.../issues/` prefix (diagnostics embed the source path)
 # down to the repo-relative form, so snapshots are checkout-location independent
 # (canonical tree vs. a git worktree).
 normalize() {
    sed -E \
        -e 's/0x[0-9a-f]{4,}/0xADDR/g' \
        -e 's#(/[^[:space:]]*)?/(examples|issues)/#\2/#g'
 }
 # Normalize `sx ir` output for snapshot diffing (host-specific noise + LLVM
 # auto-suffixed temporaries).
 normalize_ir() {
    sed -E \
        -e '/^; ModuleID =/d' \
        -e '/^source_filename =/d' \
        -e '/^target datalayout =/d' \
        -e '/^target triple =/d' \
        -e '/^attributes #[0-9]+ = \{/d' \
        -e 's/%([a-z]+)[0-9]+/%\1N/g'
 }
 TMP_ERR="$(mktemp)"
 trap 'rm -f "$TMP_ERR"' EXIT
 for root in "${ROOTS[@]}"; do
    expected_dir="$root/expected"
    [[ -d "$expected_dir" ]] || continue
    for exit_file in "$expected_dir"/*.exit; do
        [[ -e "$exit_file" ]] || continue
        name=$(basename "$exit_file" .exit)
        sx_file="$root/${name}.sx"
        out_file="$expected_dir/${name}.stdout"
        err_file="$expected_dir/${name}.stderr"
        ir_file="$expected_dir/${name}.ir"
        if [[ ! -f "$sx_file" ]]; then
            SKIP=$((SKIP + 1))
            continue
        fi
        printf "  %-48s" "$name"
        actual_out=$(run_sx "$SX" run "$sx_file" 2>"$TMP_ERR" | normalize)
        actual_exit=${PIPESTATUS[0]}
        actual_err=$(normalize < "$TMP_ERR")
        if [[ $actual_exit -eq 124 ]]; then
            TIMEOUT_COUNT=$((TIMEOUT_COUNT + 1))
            echo "TIMEOUT (>${TIMEOUT}s)"
            continue
        fi
        has_ir=false
        [[ -f "$ir_file" ]] && has_ir=true
        actual_ir=""
        if $has_ir; then
            actual_ir=$("$SX" ir "$sx_file" 2>&1 | normalize_ir)
        fi
        if [[ $UPDATE -eq 1 ]]; then
            echo "$actual_out" > "$out_file"
            echo "$actual_err" > "$err_file"
            echo "$actual_exit" > "$exit_file"
            $has_ir && echo "$actual_ir" > "$ir_file"
            echo "updated (exit=$actual_exit)"
            continue
        fi
        expected_out=$(normalize < "$out_file" 2>/dev/null)
        expected_err=$(normalize < "$err_file" 2>/dev/null)
        expected_exit=$(cat "$exit_file")
        expected_ir=""
        $has_ir && expected_ir=$(normalize_ir < "$ir_file")
        out_ok=true; err_ok=true; exit_ok=true; ir_ok=true
        [[ "$actual_out" == "$expected_out" ]] || out_ok=false
        [[ "$actual_err" == "$expected_err" ]] || err_ok=false
        [[ "$actual_exit" == "$expected_exit" ]] || exit_ok=false
        if $has_ir && [[ "$actual_ir" != "$expected_ir" ]]; then ir_ok=false; fi
        if $out_ok && $err_ok && $exit_ok && $ir_ok; then
            PASS=$((PASS + 1))
            echo "ok"
        else
            FAIL=$((FAIL + 1))
            echo "FAIL"
            $out_ok || { echo "  --- stdout diff ---"; diff <(echo "$expected_out") <(echo "$actual_out") || true; }
            $err_ok || { echo "  --- stderr diff ---"; diff <(echo "$expected_err") <(echo "$actual_err") || true; }
            $exit_ok || echo "  exit code: expected=$expected_exit actual=$actual_exit"
            $ir_ok || { echo "  --- IR diff ---"; diff <(echo "$expected_ir") <(echo "$actual_ir") || true; }
        fi
    done
 done
 if [[ $UPDATE -eq 1 ]]; then
    echo "Updated all expected output files."
    exit 0
 fi
 echo "$PASS passed, $FAIL failed, $SKIP skipped, $TIMEOUT_COUNT timed out"
 [[ $FAIL -eq 0 && $TIMEOUT_COUNT -eq 0 ]]
--- a/tools/verify-step.sh
+++ b/tools/verify-step.sh
@@ -20,13 +20,12 @@ cd "$ROOT"
 echo "── 1/5 zig build ─────────────────────────────────────"
 zig build
-echo "── 2/5 zig build test ────────────────────────────────"
+echo "── 2/4 zig build test ────────────────────────────────"
 # Runs the unit tests AND the full example/issue regression corpus
 # (src/corpus_run.test.zig) — a failing example fails the build.
 zig build test
-echo "── 3/5 example regression suite ──────────────────────"
+echo "── 3/4 chess: cross-build for all 3 platforms ────────"
 bash tests/run_examples.sh
 echo "── 4/5 chess: cross-build for all 3 platforms ────────"
 # Builds must be serial — sx writes to .sx-tmp/ which would race in parallel.
 cd "$GAME"
 "$SX" build main.sx                        > /tmp/sx-game-macos-build.log 2>&1 \
@@ -39,7 +38,7 @@ echo "  iOS sim  OK"
    || { echo "Android build failed:"; cat /tmp/sx-game-android-build.log; exit 1; }
 echo "  Android  OK"
-echo "── 5/5 chess: launch + screenshot on each platform ───"
+echo "── 4/4 chess: launch + screenshot on each platform ───"
 # macOS — direct binary launch
 ./sx-out/macos/SxChess > /tmp/sx-game-macos-run.log 2>&1 &
		`@@ -0,0 +1,2 @@`
							`call_sx_square(6) = 37`
							`call_sx_square(9) = 82`