Files
sx/tests/run_examples.sh
agra ab3c9202ff test: run example corpus in zig build test; sx ir → stdout
`zig build test` now runs the full examples/ + issues/ regression corpus
alongside the Zig unit tests, driven by a pure-Zig test
(src/corpus_run.test.zig) — no shell script in the build path. It spawns
the installed `sx` per example (subprocess-isolated, per-run timeout),
diffs stdout/stderr/exit and optional `sx ir` snapshots, and fails the
build on any mismatch. The file list is enumerated at runtime, so new
examples are covered with no test edit.

- `sx ir` / `ir-dump` now write to stdout (fd 1) instead of stderr, so
  the dumps can be piped/redirected.
- `zig build test -Dupdate-goldens` regenerates snapshots in-build,
  byte-identical to the legacy `run_examples.sh --update`; on mismatch
  the runner prints how to regenerate.
- run_examples.sh kept (still used by tools/verify-step.sh) and made
  portable to a bare macOS: timeout/gtimeout fallback, bash 3.2-safe
  empty-array handling.
- CLAUDE.md: document the new workflow.
2026-06-13 09:41:56 +03:00

159 lines
5.4 KiB
Bash
Executable File

#!/bin/bash
# Example/issue regression test runner.
# Usage: ./tests/run_examples.sh [--update]
# --update: regenerate expected output (.exit/.stdout/.stderr, and .ir where present)
#
# Layout (per CLAUDE.md): expected output lives in an `expected/` dir that
# sits NEXT TO the test file, with three streams split out:
# <root>/<name>.sx
# <root>/expected/<name>.exit # process exit code
# <root>/expected/<name>.stdout # normalized stdout
# <root>/expected/<name>.stderr # normalized stderr
# <root>/expected/<name>.ir # optional `sx ir` snapshot
# A test is any <name>.sx that has an <root>/expected/<name>.exit marker.
# Roots scanned: examples/ and issues/.
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
SX="$ROOT_DIR/zig-out/bin/sx"
ROOTS=("$ROOT_DIR/examples" "$ROOT_DIR/issues")
PASS=0
FAIL=0
SKIP=0
TIMEOUT_COUNT=0
UPDATE=0
TIMEOUT=10
if [[ "${1:-}" == "--update" ]]; then
UPDATE=1
fi
# Per-test wall-clock guard. GNU `timeout` (or `gtimeout` from Homebrew
# coreutils) kills a hung test after $TIMEOUT seconds. Neither ships on a
# bare macOS, so degrade gracefully: when no timeout binary is found, run the
# command directly (a hang then blocks the suite, but the suite still works).
TIMEOUT_CMD=()
if command -v timeout >/dev/null 2>&1; then
TIMEOUT_CMD=(timeout "$TIMEOUT")
elif command -v gtimeout >/dev/null 2>&1; then
TIMEOUT_CMD=(gtimeout "$TIMEOUT")
fi
# Run a command under the timeout wrapper if one is available, else directly.
# The length check (not "${arr[@]}") keeps this safe under bash 3.2 + `set -u`,
# where expanding an empty array trips "unbound variable".
run_sx() {
if [[ ${#TIMEOUT_CMD[@]} -gt 0 ]]; then
"${TIMEOUT_CMD[@]}" "$@"
else
"$@"
fi
}
# Normalize stdout/stderr for snapshot diffing. Applied identically to both
# expected and actual, so it can only reconcile location/host noise — never
# desync an otherwise-matching pair. The path rule collapses any absolute
# `.../examples/` or `.../issues/` prefix (diagnostics embed the source path)
# down to the repo-relative form, so snapshots are checkout-location independent
# (canonical tree vs. a git worktree).
normalize() {
sed -E \
-e 's/0x[0-9a-f]{4,}/0xADDR/g' \
-e 's#(/[^[:space:]]*)?/(examples|issues)/#\2/#g'
}
# Normalize `sx ir` output for snapshot diffing (host-specific noise + LLVM
# auto-suffixed temporaries).
normalize_ir() {
sed -E \
-e '/^; ModuleID =/d' \
-e '/^source_filename =/d' \
-e '/^target datalayout =/d' \
-e '/^target triple =/d' \
-e '/^attributes #[0-9]+ = \{/d' \
-e 's/%([a-z]+)[0-9]+/%\1N/g'
}
TMP_ERR="$(mktemp)"
trap 'rm -f "$TMP_ERR"' EXIT
for root in "${ROOTS[@]}"; do
expected_dir="$root/expected"
[[ -d "$expected_dir" ]] || continue
for exit_file in "$expected_dir"/*.exit; do
[[ -e "$exit_file" ]] || continue
name=$(basename "$exit_file" .exit)
sx_file="$root/${name}.sx"
out_file="$expected_dir/${name}.stdout"
err_file="$expected_dir/${name}.stderr"
ir_file="$expected_dir/${name}.ir"
if [[ ! -f "$sx_file" ]]; then
SKIP=$((SKIP + 1))
continue
fi
printf " %-48s" "$name"
actual_out=$(run_sx "$SX" run "$sx_file" 2>"$TMP_ERR" | normalize)
actual_exit=${PIPESTATUS[0]}
actual_err=$(normalize < "$TMP_ERR")
if [[ $actual_exit -eq 124 ]]; then
TIMEOUT_COUNT=$((TIMEOUT_COUNT + 1))
echo "TIMEOUT (>${TIMEOUT}s)"
continue
fi
has_ir=false
[[ -f "$ir_file" ]] && has_ir=true
actual_ir=""
if $has_ir; then
actual_ir=$("$SX" ir "$sx_file" 2>&1 | normalize_ir)
fi
if [[ $UPDATE -eq 1 ]]; then
echo "$actual_out" > "$out_file"
echo "$actual_err" > "$err_file"
echo "$actual_exit" > "$exit_file"
$has_ir && echo "$actual_ir" > "$ir_file"
echo "updated (exit=$actual_exit)"
continue
fi
expected_out=$(normalize < "$out_file" 2>/dev/null)
expected_err=$(normalize < "$err_file" 2>/dev/null)
expected_exit=$(cat "$exit_file")
expected_ir=""
$has_ir && expected_ir=$(normalize_ir < "$ir_file")
out_ok=true; err_ok=true; exit_ok=true; ir_ok=true
[[ "$actual_out" == "$expected_out" ]] || out_ok=false
[[ "$actual_err" == "$expected_err" ]] || err_ok=false
[[ "$actual_exit" == "$expected_exit" ]] || exit_ok=false
if $has_ir && [[ "$actual_ir" != "$expected_ir" ]]; then ir_ok=false; fi
if $out_ok && $err_ok && $exit_ok && $ir_ok; then
PASS=$((PASS + 1))
echo "ok"
else
FAIL=$((FAIL + 1))
echo "FAIL"
$out_ok || { echo " --- stdout diff ---"; diff <(echo "$expected_out") <(echo "$actual_out") || true; }
$err_ok || { echo " --- stderr diff ---"; diff <(echo "$expected_err") <(echo "$actual_err") || true; }
$exit_ok || echo " exit code: expected=$expected_exit actual=$actual_exit"
$ir_ok || { echo " --- IR diff ---"; diff <(echo "$expected_ir") <(echo "$actual_ir") || true; }
fi
done
done
if [[ $UPDATE -eq 1 ]]; then
echo "Updated all expected output files."
exit 0
fi
echo "$PASS passed, $FAIL failed, $SKIP skipped, $TIMEOUT_COUNT timed out"
[[ $FAIL -eq 0 && $TIMEOUT_COUNT -eq 0 ]]