test: run example corpus in zig build test; sx ir → stdout

`zig build test` now runs the full examples/ + issues/ regression corpus
alongside the Zig unit tests, driven by a pure-Zig test
(src/corpus_run.test.zig) — no shell script in the build path. It spawns
the installed `sx` per example (subprocess-isolated, per-run timeout),
diffs stdout/stderr/exit and optional `sx ir` snapshots, and fails the
build on any mismatch. The file list is enumerated at runtime, so new
examples are covered with no test edit.

- `sx ir` / `ir-dump` now write to stdout (fd 1) instead of stderr, so
  the dumps can be piped/redirected.
- `zig build test -Dupdate-goldens` regenerates snapshots in-build,
  byte-identical to the legacy `run_examples.sh --update`; on mismatch
  the runner prints how to regenerate.
- run_examples.sh kept (still used by tools/verify-step.sh) and made
  portable to a bare macOS: timeout/gtimeout fallback, bash 3.2-safe
  empty-array handling.
- CLAUDE.md: document the new workflow.
This commit is contained in:
agra
2026-06-13 09:41:56 +03:00
parent 39488133c9
commit ab3c9202ff
7 changed files with 464 additions and 25 deletions

367
src/corpus_run.test.zig Normal file
View File

@@ -0,0 +1,367 @@
const std = @import("std");
const corpus_paths = @import("corpus_paths");
// End-to-end example/issue regression runner — the pure-Zig replacement for
// `tests/run_examples.sh`. For every `<root>/expected/<name>.exit` marker under
// examples/ and issues/, spawn the installed `sx` binary on `<name>.sx`, capture
// stdout/stderr/exit, normalize, and diff against the stored snapshot. Optional
// `<name>.ir` snapshots additionally diff `sx ir` output.
//
// Each example runs in its OWN subprocess (via std.process.run), so a crashing
// example reports its exit code (or 128+signal, matching a shell's `$?`) instead
// of taking down the test binary. A per-run deadline guards against hangs.
//
// Paths + the `sx` binary path are injected at configure time (build.zig
// `corpus_paths`); the FILE LIST is enumerated at test time, so new examples are
// covered with no edit here. The child runs with cwd = repo root and is handed a
// repo-relative path (e.g. `examples/0001-foo.sx`) — exactly the form the stored
// snapshots are normalized to, and the cwd `tests/fixtures/` imports resolve
// against. (The shell runner passes absolute paths and relies on a sed rule to
// collapse them back; running relatively makes that rule a no-op, so it is not
// reimplemented here.)
//
// Snapshots are regenerated in-build with `zig build test -Dupdate-goldens`
// (see the update-mode branch below) — no shell script needed. The legacy
// `bash tests/run_examples.sh --update` still works and produces byte-identical
// output; the two normalizers (here and in run_examples.sh) must stay in lockstep.
const TIMEOUT_SECS = 10;
const MAX_OUTPUT = 16 * 1024 * 1024;
/// Wrap the live C `environ` so spawned children inherit the test process's
/// environment. `Io.Threaded`'s default `process_environ` is EMPTY, and a null
/// `environ_map` on a spawn falls back to it — so without this the child `sx`
/// runs with no PATH (and getenv-based examples like 1222 fail spuriously).
/// The slice points into the process-lifetime `environ` global; no copy needed.
fn currentEnviron() std.process.Environ {
const raw: [*:null]const ?[*:0]const u8 = @ptrCast(std.c.environ);
return .{ .block = .{ .slice = std.mem.span(raw) } };
}
var g_test_threaded: ?std.Io.Threaded = null;
fn test_io() std.Io {
if (g_test_threaded == null) {
g_test_threaded = std.Io.Threaded.init(std.heap.page_allocator, .{ .environ = currentEnviron() });
}
return g_test_threaded.?.io();
}
fn isLowerHex(c: u8) bool {
return (c >= '0' and c <= '9') or (c >= 'a' and c <= 'f');
}
/// Mirror of `normalize()` in run_examples.sh: collapse `0x` + 4-or-more
/// lowercase-hex digits to `0xADDR` so heap/fn addresses don't desync snapshots.
/// (The path-collapse sed rule is intentionally omitted — see file header.)
fn normalizeStd(arena: std.mem.Allocator, in: []const u8) ![]u8 {
var out: std.ArrayList(u8) = .empty;
var i: usize = 0;
while (i < in.len) {
if (in[i] == '0' and i + 1 < in.len and in[i + 1] == 'x') {
var j = i + 2;
while (j < in.len and isLowerHex(in[j])) j += 1;
if (j - (i + 2) >= 4) {
try out.appendSlice(arena, "0xADDR");
i = j;
continue;
}
}
try out.append(arena, in[i]);
i += 1;
}
return out.items;
}
/// `^attributes #[0-9]+ = \{` — one of normalize_ir's line-drop patterns.
fn isAttributesLine(line: []const u8) bool {
const pfx = "attributes #";
if (!std.mem.startsWith(u8, line, pfx)) return false;
var k: usize = pfx.len;
const start = k;
while (k < line.len and line[k] >= '0' and line[k] <= '9') k += 1;
return k > start and std.mem.startsWith(u8, line[k..], " = {");
}
fn dropIrLine(line: []const u8) bool {
return std.mem.startsWith(u8, line, "; ModuleID =") or
std.mem.startsWith(u8, line, "source_filename =") or
std.mem.startsWith(u8, line, "target datalayout =") or
std.mem.startsWith(u8, line, "target triple =") or
isAttributesLine(line);
}
/// Apply `s/%([a-z]+)[0-9]+/%\1N/g` to one line — collapse LLVM's auto-suffixed
/// temporaries (`%tmp17` -> `%tmpN`) so renumbering doesn't desync snapshots.
fn appendIrSubst(arena: std.mem.Allocator, out: *std.ArrayList(u8), line: []const u8) !void {
var i: usize = 0;
while (i < line.len) {
if (line[i] == '%') {
const lstart = i + 1;
var j = lstart;
while (j < line.len and line[j] >= 'a' and line[j] <= 'z') j += 1;
const letters_end = j;
const dstart = j;
while (j < line.len and line[j] >= '0' and line[j] <= '9') j += 1;
if (letters_end > lstart and j > dstart) {
try out.append(arena, '%');
try out.appendSlice(arena, line[lstart..letters_end]);
try out.append(arena, 'N');
i = j;
continue;
}
}
try out.append(arena, line[i]);
i += 1;
}
}
/// Mirror of `normalize_ir()` in run_examples.sh.
fn normalizeIr(arena: std.mem.Allocator, in: []const u8) ![]u8 {
var out: std.ArrayList(u8) = .empty;
var lines = std.mem.splitScalar(u8, in, '\n');
var first = true;
while (lines.next()) |line| {
if (dropIrLine(line)) continue;
if (!first) try out.append(arena, '\n');
first = false;
try appendIrSubst(arena, &out, line);
}
return out.items;
}
/// Match the shell runner's `$(...)` capture, which strips trailing newlines
/// from both expected and actual before comparing.
fn trimNl(s: []const u8) []const u8 {
return std.mem.trimEnd(u8, s, "\n");
}
/// bash `$?` convention: normal exit -> code; signal-terminated -> 128+signal.
fn termCode(term: std.process.Child.Term) u32 {
return switch (term) {
.exited => |c| c,
.signal, .stopped => |s| 128 + @as(u32, @intCast(@intFromEnum(s))),
.unknown => |u| u,
};
}
fn deadline(io: std.Io) std.Io.Timeout {
const dur: std.Io.Clock.Duration = .{
.raw = std.Io.Duration.fromSeconds(TIMEOUT_SECS),
.clock = .awake,
};
return .{ .deadline = std.Io.Clock.Timestamp.fromNow(io, dur) };
}
fn readOptional(io: std.Io, gpa: std.mem.Allocator, abs_path: []const u8) ?[]u8 {
return std.Io.Dir.readFileAlloc(.cwd(), io, abs_path, gpa, .limited(MAX_OUTPUT)) catch null;
}
/// Run every `<root>/expected/*.exit` test. Appends a formatted diagnostic to
/// `failures` (owned by `fail_gpa`) for each mismatch. Returns the number of
/// tests actually run (markers whose `.sx` is missing are skipped).
fn sweepRoot(
fail_gpa: std.mem.Allocator,
io: std.Io,
root_dir: []const u8,
failures: *std.ArrayList([]const u8),
) !usize {
// Repo root (parent of examples/ or issues/) is the child's cwd: relative
// source paths land in diagnostics already-normalized, and tests/fixtures/
// imports resolve here.
const repo_root = std.fs.path.dirname(root_dir) orelse ".";
const root_base = std.fs.path.basename(root_dir); // "examples" | "issues"
var name_arena_state = std.heap.ArenaAllocator.init(fail_gpa);
defer name_arena_state.deinit();
const name_arena = name_arena_state.allocator();
const expected_dir_path = try std.fs.path.join(name_arena, &.{ root_dir, "expected" });
var dir = std.Io.Dir.openDirAbsolute(io, expected_dir_path, .{ .iterate = true }) catch return 0;
defer dir.close(io);
// Collect marker names first (entry.name is only valid until the next
// iterate step; spawning subprocesses mid-iteration is asking for trouble).
var names: std.ArrayList([]const u8) = .empty;
var it = dir.iterate();
while (try it.next(io)) |entry| {
if (entry.kind == .directory) continue; // accept .file and .unknown d_type
if (!std.mem.endsWith(u8, entry.name, ".exit")) continue;
const name = entry.name[0 .. entry.name.len - ".exit".len];
try names.append(name_arena, try name_arena.dupe(u8, name));
}
var work_state = std.heap.ArenaAllocator.init(fail_gpa);
defer work_state.deinit();
var ran: usize = 0;
var skipped: usize = 0;
var updated: usize = 0;
for (names.items) |name| {
_ = work_state.reset(.retain_capacity);
const a = work_state.allocator();
const sx_abs = try std.fs.path.join(a, &.{ root_dir, try std.fmt.allocPrint(a, "{s}.sx", .{name}) });
std.Io.Dir.access(.cwd(), io, sx_abs, .{}) catch { // marker without source
skipped += 1;
std.debug.print("[corpus-run] skip {s} (no {s}.sx)\n", .{ name, name });
continue;
};
ran += 1;
const rel_path = try std.fmt.allocPrint(a, "{s}/{s}.sx", .{ root_base, name });
const exp_dir = expected_dir_path;
const exit_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.exit", .{ exp_dir, name })) orelse "";
const out_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.stdout", .{ exp_dir, name })) orelse "";
const err_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.stderr", .{ exp_dir, name })) orelse "";
const ir_raw = readOptional(io, a, try std.fmt.allocPrint(a, "{s}/{s}.ir", .{ exp_dir, name }));
// --- sx run ---
const run_res = std.process.run(a, io, .{
.argv = &.{ corpus_paths.sx_exe, "run", rel_path },
.cwd = .{ .path = repo_root },
.timeout = deadline(io),
}) catch |err| {
try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: `sx run` {s}{s}", .{
name,
@errorName(err),
if (err == error.Timeout) " (>10s)" else "",
}));
continue;
};
const act_exit = termCode(run_res.term);
const act_out = trimNl(try normalizeStd(a, run_res.stdout));
const act_err = trimNl(try normalizeStd(a, run_res.stderr));
// --- sx ir (only when a snapshot already exists; mirrors the shell's
// `$has_ir` gate — update mode never CREATES new .ir files) ---
var act_ir: ?[]const u8 = null;
if (ir_raw != null) {
const ir_res = std.process.run(a, io, .{
.argv = &.{ corpus_paths.sx_exe, "ir", rel_path },
.cwd = .{ .path = repo_root },
.timeout = deadline(io),
}) catch |err| {
try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: `sx ir` {s}", .{ name, @errorName(err) }));
continue;
};
// `sx ir` writes IR to stdout; mirror the shell's `2>&1` by appending
// stderr (empty for a clean dump).
const merged = try std.fmt.allocPrint(a, "{s}{s}", .{ ir_res.stdout, ir_res.stderr });
act_ir = trimNl(try normalizeIr(a, merged));
}
// --- update mode: overwrite snapshots with freshly-normalized output ---
if (corpus_paths.update_goldens) {
try writeGolden(io, a, exp_dir, name, "exit", try std.fmt.allocPrint(a, "{d}", .{act_exit}));
try writeGolden(io, a, exp_dir, name, "stdout", act_out);
try writeGolden(io, a, exp_dir, name, "stderr", act_err);
if (act_ir) |ir| try writeGolden(io, a, exp_dir, name, "ir", ir);
updated += 1;
continue;
}
// --- verify against stored snapshot ---
const exp_exit = std.fmt.parseInt(u32, std.mem.trim(u8, exit_raw, " \t\r\n"), 10) catch {
try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}: unparseable expected exit '{s}'", .{ name, std.mem.trim(u8, exit_raw, " \t\r\n") }));
continue;
};
const exp_out = trimNl(try normalizeStd(a, out_raw));
const exp_err = trimNl(try normalizeStd(a, err_raw));
var diag: std.ArrayList(u8) = .empty;
if (act_exit != exp_exit)
try diag.appendSlice(a, try std.fmt.allocPrint(a, " exit: expected={d} actual={d}\n", .{ exp_exit, act_exit }));
if (!std.mem.eql(u8, act_out, exp_out))
try appendDiff(a, &diag, "stdout", exp_out, act_out);
if (!std.mem.eql(u8, act_err, exp_err))
try appendDiff(a, &diag, "stderr", exp_err, act_err);
if (ir_raw) |ir_expected_raw| {
const exp_ir = trimNl(try normalizeIr(a, ir_expected_raw));
if (!std.mem.eql(u8, act_ir.?, exp_ir))
try appendDiff(a, &diag, "IR", exp_ir, act_ir.?);
}
try recordIfFailed(fail_gpa, failures, name, diag.items);
}
if (skipped > 0)
std.debug.print("[corpus-run] {s}: {d} marker(s) skipped (no matching .sx)\n", .{ root_base, skipped });
if (corpus_paths.update_goldens)
std.debug.print("[corpus-run] {s}: {d} snapshot(s) regenerated\n", .{ root_base, updated });
return ran;
}
/// Overwrite `<exp_dir>/<name>.<ext>` with `content` + a trailing newline —
/// matching the shell runner's `echo "$x" > file` (command substitution strips
/// trailing newlines; echo re-adds exactly one). Update mode only.
fn writeGolden(
io: std.Io,
a: std.mem.Allocator,
exp_dir: []const u8,
name: []const u8,
ext: []const u8,
content: []const u8,
) !void {
const path = try std.fmt.allocPrint(a, "{s}/{s}.{s}", .{ exp_dir, name, ext });
const data = try std.fmt.allocPrint(a, "{s}\n", .{content});
try std.Io.Dir.writeFile(.cwd(), io, .{ .sub_path = path, .data = data });
}
fn recordIfFailed(
fail_gpa: std.mem.Allocator,
failures: *std.ArrayList([]const u8),
name: []const u8,
diag: []const u8,
) !void {
if (diag.len == 0) return;
try failures.append(fail_gpa, try std.fmt.allocPrint(fail_gpa, "{s}:\n{s}", .{ name, diag }));
}
const DIFF_CAP = 2000;
fn appendDiff(a: std.mem.Allocator, diag: *std.ArrayList(u8), label: []const u8, expected: []const u8, actual: []const u8) !void {
try diag.appendSlice(a, try std.fmt.allocPrint(a, " --- {s}: expected ---\n{s}\n --- {s}: actual ---\n{s}\n", .{
label, cap(expected), label, cap(actual),
}));
}
fn cap(s: []const u8) []const u8 {
return if (s.len > DIFF_CAP) s[0..DIFF_CAP] else s;
}
fn reportFailures(label: []const u8, ran: usize, failures: []const []const u8) !void {
std.debug.print("[corpus-run] {s}: {d} ran, {d} failed\n", .{ label, ran, failures.len });
for (failures) |f| std.debug.print("FAIL {s}\n", .{f});
if (failures.len > 0 and !corpus_paths.update_goldens) std.debug.print(
\\
\\ ── snapshot mismatch ──────────────────────────────────────────────
\\ If the new output is CORRECT (intentional change), regenerate snapshots:
\\ zig build test -Dupdate-goldens
\\ git diff examples/expected/ issues/expected/ # review before committing
\\ Otherwise this is a regression — fix the code, don't update the snapshot.
\\ ───────────────────────────────────────────────────────────────────
\\
, .{});
try std.testing.expect(failures.len == 0);
}
test "examples corpus: every examples/*.sx runs and matches its snapshot" {
const io = test_io();
var failures: std.ArrayList([]const u8) = .empty;
defer failures.deinit(std.testing.allocator);
const ran = try sweepRoot(std.testing.allocator, io, corpus_paths.examples_dir, &failures);
defer for (failures.items) |f| std.testing.allocator.free(f);
try std.testing.expect(ran > 0);
try reportFailures("examples", ran, failures.items);
}
test "issues corpus: every pinned issues/*.sx repro runs and matches its snapshot" {
const io = test_io();
var failures: std.ArrayList([]const u8) = .empty;
defer failures.deinit(std.testing.allocator);
const ran = try sweepRoot(std.testing.allocator, io, corpus_paths.issues_dir, &failures);
defer for (failures.items) |f| std.testing.allocator.free(f);
try reportFailures("issues", ran, failures.items);
}