diff --git a/current/PLAN-DIST.md b/current/PLAN-DIST.md new file mode 100644 index 0000000..f44459e --- /dev/null +++ b/current/PLAN-DIST.md @@ -0,0 +1,124 @@ +# PLAN-DIST — bundle `zig` as sx's hermetic link/libc backend + +## Goal + +`sx build` produces a native binary by driving a **bundled `zig`** +(`zig cc`) as the linker, so a distributed sx on Linux needs no system +`cc`/lld/libc/CRT. `sx run` (JIT) is unaffected — it never links. + +This is the "be like Zig" move: reuse Zig's hermetic toolchain (lld + +crt objects + musl/glibc, all bundled in the `zig` distribution) instead +of building our own lld-in-process + libc-from-source pipeline. + +> **Configuration surface** (env vars, flags, resolution order, +> activation truth table, target→ABI map, distribution layout) is +> specified in [../design/bundled-zig-link-backend-design.md](../design/bundled-zig-link-backend-design.md) — the design-of-record +> for how the backend is configured. Keep the two files in sync. + +## Locked decisions + +1. **Default Linux output ABI = static musl** (`x86_64-linux-musl`, + `-static`). Output runs on ANY Linux with zero deps — the property + that makes Zig binaries portable. glibc/dynamic only via explicit + `--target x86_64-linux-gnu`. +2. **Activation = auto** when a bundled/resolvable `zig` exists AND the + user passed no `--linker`. Falls back to system `cc` otherwise. +3. **Dev uses PATH `zig`** (0.16.0 already installed). Defer copying a + vendored toolchain into `libexec/` until Phase 3 packaging. + +## Why `zig cc`, not raw `ld.lld` + +`zig cc` is a clang-compatible driver, so it slots into the **existing** +cc-style argv branch in `src/target.zig` almost unchanged, and supplies +lld + crt objects + musl/glibc automatically per `-target`. Driving +`ld.lld` directly would force us to locate/pass crt1.o/crti.o/libc +ourselves — exactly the work we're avoiding. + +## Key code anchors (verified) + +- Linker selection hook: `TargetConfig.getLinker()` — `src/target.zig:194-196` + (`self.linker orelse "cc"`). +- Unix `cc`-style link branch: `src/target.zig:524-564` (this is where + the zig backend hooks in; `-o`/`-L`/`-l`/extra objects already pass + through clang-compatibly). +- Exe-relative resolution pattern to mirror for finding zig: + `src/imports.zig:204-227` (`discoverStdlibPaths`, `$SX_STDLIB_PATH` + override + `/..` candidates). +- `--linker` CLI flag parsing: `src/main.zig:87-90`. +- Emit triple (must agree with link target): `src/ir/emit_llvm.zig` + (`LLVMSetTarget`, ~L246-284). + +## Phases + +### Phase 0 — Resolve a bundled/host zig +- New `src/zig_backend.zig`: `discoverZig(alloc) -> ?[]const u8`. + Resolution order: + 1. `$SX_ZIG` env override. + 2. `/../libexec/zig/zig` (install layout, Phase 3). + 3. `/../../zig-bundle/zig` (dev vendored layout, Phase 3). + 4. `zig` on `PATH` (dev fallback — active now). +- Add `SX_DEBUG_ZIG` trace, matching existing `SX_DEBUG_*` hooks. +- No behavior change yet; just resolution + a debug/print hook to confirm. + +### Phase 1 — `zig cc` link backend (core change) +- `src/target.zig`: generalize the linker from a single token to a + **driver argv**. Today `getLinker()` returns one string at `argv[0]`; + introduce a `LinkBackend` so the internal backend contributes + `{zigPath, "cc"}` as leading entries. +- In the Unix branch (L524-564), when backend = zig: + - prepend `zig cc`, + - append `-target `, + - add `-static` for musl, + - everything else (`-o`, `-L`, `-l`, extra objects, extra link flags) + passes through unchanged. +- Add `sxTripleToZig()` mapping (sx shorthand/triple → zig `-target`); + unspecified-on-Linux → `x86_64-linux-musl`. +- Align emit triple: when the zig backend is selected, set the LLVM + module triple in `emit_llvm.zig` to match the link target + (x86_64-linux), so the `.o` links cleanly against musl crt. + +### Phase 2 — Activation +- Auto-enable: if `discoverZig()` succeeds and no `--linker` override, + use the zig backend for `sx build`. System `cc` remains the fallback. +- Optional explicit `--self-contained` / `--no-self-contained` to force. +- Confirm `sx run`/JIT path is untouched (no link step). + +### Phase 3 — Distribution packaging +- `build.zig`: a `dist` step assembling + - `bin/sx` (built with `-Dstatic-llvm`), + - `libexec/zig/` (vendored zig binary **and its `lib/`**, copied from a + pinned ziglang.org release per host arch), + - `library/` (stdlib), + into a relocatable tarball. +- Pin the zig version (currently 0.16.0). + +### Phase 4 — Verify & lock +- Manual first: `sx build hello.sx` (auto zig backend) then `file`/`ldd` + the output → expect "statically linked". +- Honor snapshot-integrity + FFI-cadence rules before adding a corpus + test (host/arch-gated, likely a `.build` sidecar). + +## Risks / watch + +- **Bundle size**: zig + its `lib/` ≈ 50–60 MB. +- **gnu vs musl ABI**: pure codegen objects link fine against musl; + TLS/stack-protector are the only realistic friction. Aligning the emit + triple (Phase 1) covers the common path. +- **macOS/Windows cross** via the same `zig cc -target` is nearly free + after Phase 1, but Apple-SDK linking has caveats — scope to Linux + target first; treat the rest as follow-up. +- **c_import.zig** also shells `cc` for C imports (JIT). Out of scope + here; same backend can absorb it later. + +## Status + +- [x] Phase 0 — resolve zig (`src/zig_backend.zig`) +- [x] Phase 1 — zig cc link backend (`target.zig` + `emit_llvm` triple normalize) +- [x] Phase 2 — activation (`--self-contained`/`--no-self-contained`; auto on bundled zig) +- [ ] Phase 3 — dist packaging (vendor `zig` into `libexec/`) +- [ ] Phase 4 — verify & lock (manual ✓ macOS/Linux/Windows; corpus test pending runner `--self-contained` support) + +Scope landed as **macOS + Linux + Windows** (not Linux-first). See the +"Implementation status" section in +[../design/bundled-zig-link-backend-design.md](../design/bundled-zig-link-backend-design.md) +for what refined the original locked decisions. diff --git a/design/bundled-zig-link-backend-design.md b/design/bundled-zig-link-backend-design.md new file mode 100644 index 0000000..1c652e9 --- /dev/null +++ b/design/bundled-zig-link-backend-design.md @@ -0,0 +1,384 @@ +# Bundled `zig` Link Backend for sx — Design Doc & Proposal + +> Status: **core landed (macOS / Linux / Windows).** This is the +> design-of-record for how a distributed sx links native binaries +> hermetically. The phased plan lives in +> [../current/PLAN-DIST.md](../current/PLAN-DIST.md); keep the two in sync. +> User-facing surface is documented in `readme.md` (Cross-Compilation §). + +--- + +## Implementation status (landed) + +The core backend is implemented and verified on a macOS host: + +| Target | Result | Notes | +|--------|--------|-------| +| `--target linux-musl` | static ELF | `zig cc -target x86_64-linux-musl -static` | +| `--target windows-gnu` | PE32+ | `zig cc -target x86_64-windows-gnu` | +| `--target macos` | Mach-O (runs) | `zig cc -target -macos`, no `-static` | + +What shipped, and where it **refined** the original locked decisions: + +- **Scope = macOS + Linux + Windows** (not Linux-first). iOS/Android/wasm keep + their specialized toolchains. (`TargetConfig.zigBackendInScope`.) +- **Auto-activation = a *bundled* zig is found** (a real distribution, or a + pinned `$SX_ZIG`). A `PATH`-only zig is the dev fallback and engages **only** + under `--self-contained` — so native dev/CI builds are never silently + rerouted, across all three OSes. This is the precise meaning of the §5.5 + "zig found (B)" column: **B = bundled**. *(Refinement of "auto when zig + found": PATH-zig does not auto-engage; the musl-only auto gating considered + mid-design was dropped in favor of bundled-vs-PATH, which is OS-agnostic.)* +- **No translation table** (per the triple-scheme decision): sx triples are + passed straight to `zig cc`, and `emit_llvm` runs them through + `LLVMNormalizeTargetTriple` so vendor-less zig triples (e.g. + `x86_64-windows-gnu`) land their OS/env in LLVM's canonical positions — + otherwise "windows" sits in the vendor slot and the object silently falls + back to ELF. The one unavoidable exception is **macOS**: the object must be + emitted from Apple's `apple-darwin` triple (LLVM needs it for Mach-O), but + zig's `-target` parser rejects that scheme, so the *linker* triple alone is + the vendor-less `-macos`. One OS-specific line, not a table. +- **New shorthands:** `linux-musl`, `linux-musl-arm`, `windows-gnu` (zig + scheme). The existing `linux`/`linux-arm` shorthands were also de-vendored + (`x86_64-linux-gnu`, matching the corpus runner's own expander). + +Files: `src/zig_backend.zig` (discovery), `src/target.zig` +(`selectZigLinker` / `emitZigLinkArgv` / `zigTargetTriple` / dispatch in +`link`), `src/ir/emit_llvm.zig` (triple normalization), `src/main.zig` +(`--self-contained` / `--no-self-contained` + shorthands). + +Not yet done: distribution packaging (Phase 3 — vendoring `zig` into +`libexec/`), and a corpus regression test (needs the runner to thread +`--self-contained`; manual verification only so far). + +The sections below are the original proposal; where they say "Linux-first" or +"follow-up" for macOS/Windows, the table above supersedes them. + +--- + +## 0. TL;DR + feasibility + +**Problem.** A distributed `sx` compiler can run on a Linux box (static-LLVM +binary + relocatable `library/`), but it cannot *finish a build*: the final +link step shells out to the host's `cc`, and relies on the host's libc + CRT +objects. No `cc`/glibc/SDK on the box → no binary. That is the gap between +"sx runs here" and "sx is a toolchain here." + +**Proposal.** Bundle a pinned `zig` binary inside the sx distribution and use +`zig cc` as the link backend for `sx build`. `zig cc` brings its own lld, +CRT objects, and libc (musl or glibc) for the chosen target. Default Linux +output is **statically-linked musl**, which runs on any Linux with zero +dependencies — the property that makes Zig's own output portable. + +**Feasibility: high.** The change is contained: +- The linker is selected through a single hook — + `TargetConfig.getLinker()` at `src/target.zig:194-196` — and the final + link argv is built in one place, the Unix `cc`-style branch at + `src/target.zig:524-564`. +- `zig cc` is a clang-compatible driver, so `-o` / `-L` / `-l` / extra + objects pass through that branch unchanged. The backend only has to + prepend `zig cc` and add `-target …` / `-static`. +- Exe-relative resolution (for finding the bundled zig) is already solved + for the stdlib in `src/imports.zig:204-227` and can be mirrored. +- `sx run` is JIT and never links, so it is wholly unaffected. + +The cost is a ~50–60 MB vendored `zig` (binary + its `lib/`) in the +distribution, and version-pinning discipline. + +--- + +## 1. Motivation & background + +### 1.1 Current state + +| Concern | Today | File | +|---------|-------|------| +| Compiler binary | Self-containable via `-Dstatic-llvm` (no system LLVM) | `build.zig:9-10,156-162` | +| Stdlib | Relocatable, found relative to the exe | `src/imports.zig:204-227` | +| **Linking** | **Shells to system `cc`** | `src/target.zig:524-564` | +| **libc / CRT** | **Provided by the host `cc` driver implicitly** | (no `-lc`/crt passed) | + +So two of three legs of a portable toolchain already stand. The third — the +linker and the libc/CRT it pulls in — is the host dependency this design +removes. + +### 1.2 Why this matters for distribution + +The goal is to hand someone a tarball and have `sx build app.sx` produce a +working binary on a stock Linux machine — a fresh container, a minimal CI +image, a box without `build-essential`. Today that fails at the link step. +Zig solved exactly this problem for its own users; since sx is *built with* +Zig, the cleanest fix is to stand on Zig's hermetic toolchain rather than +re-implement it. + +--- + +## 2. Goals & non-goals + +### Goals +- `sx build` produces a native Linux binary with **no host `cc`/ld/libc/SDK**. +- Default Linux output is **portable** (static musl): runs on any Linux. +- **Zero-config in the common case**: a bundled or PATH `zig` is detected and + used automatically; the operator sets nothing. +- A fully-specified, documented configuration surface (this document) for the + cases that *do* need tuning. +- No regression for existing users: system `cc` remains a fallback, and any + explicit `--linker` still wins. + +### Non-goals (this iteration) +- Reimplementing lld in-process or building libc from source (see §7 — + Zig already does both; we reuse it). +- First-class Windows/macOS cross-compilation (nearly free as a follow-up, + but unverified — §11). +- Routing C-import compilation (`src/c_import.zig`, which also shells `cc`) + through the backend. +- Glibc-floor version pinning (`…-gnu.2.28`); exposed only if needed. + +--- + +## 3. How Zig achieves hermetic builds (the model we're borrowing) + +Zig's turnkey cross-compilation rests on bundling the two things sx borrows +from the host: + +1. **In-process lld.** Zig embeds LLVM's lld (ELF/COFF/Mach-O/wasm) and links + without spawning an external linker. +2. **libc as data.** Zig ships musl *source* (builds `libc.a` + `crt*.o` on + demand, cached → static, no dynamic linker → portable output) and glibc + stubs generated from `.abilist` per version. For Windows it ships mingw + `.def` files and synthesizes import libraries. + +`zig cc` exposes all of this behind a clang-compatible driver: `zig cc +-target x86_64-linux-musl -static foo.o -o foo` yields a portable binary on +any host, with nothing installed. **This design consumes that driver rather +than rebuilding its internals** — the whole second column above arrives for +free by vendoring the `zig` binary. + +--- + +## 4. Design overview + +`sx build` gains a **link backend** abstraction with two implementations: + +- `system_cc` — today's behavior (shell `cc`, host libc). +- `bundled_zig` — shell ` cc -target [-static] …`. + +Selection is automatic (§5.5): if a usable `zig` is discovered and the user +gave no explicit `--linker`, `bundled_zig` is used; otherwise `system_cc`. +The backend plugs into the existing Unix link branch — it contributes the +leading `zig cc` tokens and the `-target`/`-static` flags; the rest of the +argv assembly is unchanged because `zig cc` is clang-compatible. + +One supporting change: when `bundled_zig` is active, the triple handed to +LLVM in `src/ir/emit_llvm.zig` is aligned to the link target (`x86_64-linux`) +so the emitted object links cleanly against the selected musl CRT. + +--- + +## 5. Detailed design (the configuration surface) + +### 5.1 zig discovery — resolution order + +`discoverZig()` (new `src/zig_backend.zig`) returns the first hit: + +1. `$SX_ZIG` — explicit override. +2. `/../libexec/zig/zig` — **install layout** (§6). +3. `/../../zig-bundle/zig` — **dev vendored layout** (§6). +4. `zig` on `PATH` — **dev fallback** (the only one active today). + +`` is resolved exactly as `src/imports.zig` resolves the stdlib. +If none resolve, behavior depends on activation (§5.5): auto-mode silently +falls back to `system_cc`; `--self-contained` errors. + +### 5.2 Environment variables + +| Var | Effect | Default | +|-----|--------|---------| +| `SX_ZIG` | Absolute path to the `zig` used as the link backend. Highest-priority discovery source. | unset | +| `ZIG_LIB_DIR` | Path to the bundled zig's `lib/`. Needed **only** if `zig` was relocated away from its `lib/`. In the supported layout (§6) they ship together and zig self-locates — leave unset. | unset | +| `SX_DEBUG_ZIG` | Trace discovery: each candidate path and the chosen one (or "none → cc"). Mirrors `SX_DEBUG_STDLIB`. | unset | +| `SX_DEBUG_LINK` | **Existing.** Prints the full link argv — shows the exact `zig cc …` invocation. | unset | +| `SX_STDLIB_PATH` | **Existing.** Stdlib override; unrelated to linking but noted because a full distribution sets neither and relies on exe-relative discovery for both. | unset | + +### 5.3 CLI flags (`sx build`) + +| Flag | Effect | +|------|--------| +| `--self-contained` | Force `bundled_zig` ON. If no usable zig is found, **error** — do not silently fall back. | +| `--no-self-contained` | Force `system_cc`. | +| `--linker ` | **Existing.** Explicit linker; supplying it **disables** auto-activation (user's choice wins). To pin a specific zig, prefer `SX_ZIG` + `--self-contained`. | +| `--target ` | **Existing.** Selects target + ABI (§5.4). With `bundled_zig` active and target unspecified on a Linux host → `x86_64-linux-musl` static. | +| `--sysroot ` | **Existing.** Forwarded to the linker; rarely needed with `bundled_zig` (zig brings its own sysroot). | + +### 5.4 Target → ABI mapping + +The default (no `--target`) deliberately differs from the legacy `linux` +shorthand, because portable static output is the entire point. + +| `sx` invocation | zig `-target` | Link mode | Portable? | +|-----------------|---------------|-----------|-----------| +| *(no `--target`, Linux host)* | `x86_64-linux-musl` | `-static` | ✅ any Linux | +| `--target linux-musl` *(new)* | `x86_64-linux-musl` | `-static` | ✅ | +| `--target linux` / `linux-x86` | `x86_64-linux-gnu` | dynamic | ❌ host glibc, versioned | +| `--target linux-arm` | `aarch64-linux-musl` | `-static` | ✅ | +| `--target windows` | `x86_64-windows-gnu` | per zig | follow-up (§11) | +| `--target macos` / `macos-arm` | `aarch64-macos` | per zig | follow-up (§11) | + +- A **new** `linux-musl` shorthand is added; the existing `linux` shorthand + keeps its current gnu/dynamic meaning for back-compat. +- The LLVM emit triple is aligned to the link target so the `.o` links + cleanly against the selected libc/CRT (§4). + +### 5.5 Activation truth table + +`B` = a usable zig was discovered (§5.1). Subcommand = `sx build`. + +| `--self-contained` | `--no-self-contained` | `--linker` | zig found (B) | Result | +|:---:|:---:|:---:|:---:|--------| +| — | — | no | yes | **bundled_zig** (auto) | +| — | — | no | no | system `cc` (silent fallback) | +| — | — | yes | * | user's `--linker` | +| yes | — | * | yes | **bundled_zig** (forced) | +| yes | — | * | no | **error**: `--self-contained` but no zig | +| — | yes | * | * | system `cc` (forced off) | + +- `--self-contained` + `--linker` together: backend choice goes to + `--self-contained`; treat the literal combination as a usage error + (document, don't guess). +- `sx run` / `sx ir` / `sx asm` never link → backend not consulted. + +### 5.6 Emit-triple alignment + +`src/ir/emit_llvm.zig` (`LLVMSetTarget`, ~L246-284) currently uses the host +default triple when `--target` is unspecified (on Linux, +`x86_64-unknown-linux-gnu`). When `bundled_zig` is active, set the module +triple to match the link target (`x86_64-linux`) so codegen and the musl CRT +agree. Pure codegen objects are ABI-compatible across gnu/musl; aligning the +triple removes the edge-case risk (TLS model, stack protector) up front. + +--- + +## 6. Distribution layout (packaging) + +A relocatable tree; everything resolves relative to `bin/sx`, so the whole +directory moves/untars anywhere with no env vars set: + +``` +sx--/ +├── bin/ +│ └── sx # built -Dstatic-llvm (no system LLVM dep) +├── libexec/ +│ └── zig/ +│ ├── zig # pinned zig binary +│ └── lib/ # zig's lib/ (musl/glibc sources, lld data, …) +└── library/ # sx stdlib (existing discovery) + └── modules/… +``` + +Rules: +- `zig` and its `lib/` **must** ship together under `libexec/zig/` so zig + self-locates `lib/`; splitting them forces `ZIG_LIB_DIR`. +- Pinned zig version: **0.16.0** (matches the build toolchain). Record the + exact version in the release manifest — a mismatched `zig cc` CLI is the + likeliest future breakage. +- Vendor the matching zig release per host os/arch from ziglang.org at + package time. + +--- + +## 7. Alternatives considered + +| Alternative | Why not (now) | +|-------------|---------------| +| **In-process lld + bundled musl sysroot** (sx owns the pipeline; no zig) | Requires a custom LLVM build *with* lld — the Homebrew `llvm@19` here ships none (`liblld*.a`, headers, `ld.lld` all absent) — plus a C++ lld shim and per-arch prebuilt musl. Strictly more work for the same user-visible result. The right *eventual* target if we want zero foreign binaries; tracked as a follow-up. | +| **Full Zig-style: build libc from source on demand** | Most flexible (any arch/libc version, no prebuilt blobs) but the most work; only worth it after the in-process-lld path exists. | +| **Document a hard dependency on system `cc`** | Zero engineering, but defeats the goal — the box still needs `build-essential`. Acceptable only as the current fallback, not the distribution story. | +| **Bundle just `ld.lld` + a musl sysroot (no full zig)** | Smaller than a whole zig, but we'd hand-manage crt object selection, dynamic-linker paths, and import libs — i.e. re-derive what `zig cc` already encapsulates. Bundle-size saving doesn't justify the fragility. | + +Vendoring `zig` wins on effort-to-result because sx already builds with Zig: +it's a first-party dependency, not a foreign toolchain, and it unlocks +Windows/macOS targets later for nearly free. + +--- + +## 8. Phasing + +Detail in [../current/PLAN-DIST.md](../current/PLAN-DIST.md). Summary: + +0. **Resolve zig** — `discoverZig()` + `SX_DEBUG_ZIG`; PATH fallback only. +1. **Link backend** — generalize the linker to a driver argv; emit + `zig cc -target … -static`; align the emit triple. +2. **Auto activation** — wire the §5.5 truth table; `cc` fallback intact. +3. **Packaging** — `build.zig` `dist` step assembling the §6 tree. +4. **Verify & lock** — `file`/`ldd` shows "statically linked"; host/arch-gated + corpus test honoring the snapshot-integrity + FFI-cadence rules. + +The minimum end-to-end proof is Phases 0+1 against PATH zig. + +--- + +## 9. Open decisions + +**Locked:** +- Default Linux ABI = **static musl** (portable output). +- Activation = **auto** when a usable zig is found and no `--linker`. +- Dev uses **PATH zig**; vendoring deferred to Phase 3. + +**Still open:** +- Exact spelling of the force flags (`--self-contained` vs e.g. + `--bundled-linker`); name chosen here pending review. +- Whether auto-mode should *warn* on silent `cc` fallback or stay quiet + (leaning quiet, with `SX_DEBUG_ZIG` for diagnosis). +- Whether to gate the Phase-4 corpus test behind a `.build` `target` + sidecar or keep it manual until a Linux CI runner exists. + +--- + +## 10. Risks + +- **Bundle size** ≈ 50–60 MB (zig + `lib/`). Acceptable for a toolchain; + call it out in release notes. +- **zig CLI drift** across versions — pin hard, record in the manifest; + the most likely future breakage. +- **gnu vs musl ABI** for the emitted object — covered by the emit-triple + alignment (§5.6); TLS/stack-protector are the only realistic friction. +- **Operator confusion**: default-no-target (musl) diverging from the + `linux` shorthand (gnu). Mitigated by the new `linux-musl` shorthand and + explicit documentation (§5.4). + +--- + +## 11. Out of scope / follow-ups + +- **Windows / macOS targets** via the same `zig cc -target`: nearly free + after the Linux path, but Apple-SDK and Windows specifics need their own + verification — not documented as supported until tested. +- **`src/c_import.zig`** still shells system `cc` for C imports in JIT mode; + route through the backend later. +- **In-process lld** (alternative in §7) as the eventual zero-foreign-binary + endgame. + +--- + +## Appendix — quick recipes (once implemented) + +```sh +# Portable static Linux binary (default when a bundled zig is present): +sx build app.sx -o app +file app # → "ELF 64-bit … statically linked" + +# Force the backend; fail loudly if no zig is bundled: +sx build app.sx --self-contained + +# Use a specific zig: +SX_ZIG=/opt/zig-0.16.0/zig sx build app.sx --self-contained + +# Opt out, use the system toolchain: +sx build app.sx --no-self-contained + +# Dynamic glibc instead of static musl: +sx build app.sx --target linux + +# Debug discovery + the exact link invocation: +SX_DEBUG_ZIG=1 SX_DEBUG_LINK=1 sx build app.sx +``` diff --git a/examples/1660-platform-windows-win32-print.sx b/examples/1660-platform-windows-win32-print.sx new file mode 100644 index 0000000..ad01b95 --- /dev/null +++ b/examples/1660-platform-windows-win32-print.sx @@ -0,0 +1,29 @@ +// Windows x86_64 — print "42" and exit(0) through the Win32 system-call +// boundary. The Windows analog of the Linux raw-`syscall` write (see +// 1651): Windows has no stable raw syscall ABI (NtWriteFile's ordinal +// shifts between OS builds), so the documented boundary IS kernel32 — +// `GetStdHandle` + `WriteFile` to print, `ExitProcess` to terminate. +// +// Exercises the bundled-`zig` link backend end to end: built with +// `--target windows-gnu --self-contained`, zig cc (mingw) auto-resolves +// kernel32, producing a PE32+ that prints "42\n" and exits 0. +// +// Pinned `x86_64-windows-gnu` via `.build`: ir-only on this non-Windows +// host (the `.ir` snapshot locks the Win64-ABI lowering of the three +// extern calls); runs end-to-end on a Windows x86_64 runner. + +kernel32 :: #library "kernel32"; + +// DWORD = u32, HANDLE/LPVOID = *void, BOOL = i32. +GetStdHandle :: (n_std_handle: u32) -> *void extern; +WriteFile :: (file: *void, buf: *u8, n: u32, written: *u32, overlapped: *void) -> i32 extern; +ExitProcess :: (code: u32) -> void extern; + +main :: () { + // STD_OUTPUT_HANDLE = (DWORD)-11 = 0xFFFFFFF5. + out := GetStdHandle(0xFFFFFFF5); + msg : [3]u8 = .[52, 50, 10]; // "42\n" + written : u32 = 0; + WriteFile(out, @msg[0], 3, @written, null); + ExitProcess(0); +} diff --git a/examples/expected/1660-platform-windows-win32-print.build b/examples/expected/1660-platform-windows-win32-print.build new file mode 100644 index 0000000..196aeb9 --- /dev/null +++ b/examples/expected/1660-platform-windows-win32-print.build @@ -0,0 +1 @@ +{ "target": "x86_64-windows-gnu" } diff --git a/examples/expected/1660-platform-windows-win32-print.exit b/examples/expected/1660-platform-windows-win32-print.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/1660-platform-windows-win32-print.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/1660-platform-windows-win32-print.ir b/examples/expected/1660-platform-windows-win32-print.ir new file mode 100644 index 0000000..e2d26ea --- /dev/null +++ b/examples/expected/1660-platform-windows-win32-print.ir @@ -0,0 +1,26 @@ + +; Function Attrs: nounwind +declare ptr @GetStdHandle(i32) #0 + +; Function Attrs: nounwind +declare i32 @WriteFile(ptr, ptr, i32, ptr, ptr) #0 + +; Function Attrs: nounwind +declare void @ExitProcess(i32) #0 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %call = call ptr @GetStdHandle(i32 -11) + %alloca = alloca ptr, align 8 + store ptr %call, ptr %alloca, align 8 + %allocaN = alloca [3 x i8], align 1 + store [3 x i8] c"42\0A", ptr %allocaN, align 1 + %allocaN = alloca i32, align 4 + store i32 0, ptr %allocaN, align 4 + %load = load ptr, ptr %alloca, align 8 + %igp.ptr = getelementptr i8, ptr %allocaN, i64 0 + %callN = call i32 @WriteFile(ptr %load, ptr %igp.ptr, i32 3, ptr %allocaN, ptr null) + call void @ExitProcess(i32 0) + ret i32 0 +} diff --git a/examples/expected/1660-platform-windows-win32-print.stderr b/examples/expected/1660-platform-windows-win32-print.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1660-platform-windows-win32-print.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/1660-platform-windows-win32-print.stdout b/examples/expected/1660-platform-windows-win32-print.stdout new file mode 100644 index 0000000..d81cc07 --- /dev/null +++ b/examples/expected/1660-platform-windows-win32-print.stdout @@ -0,0 +1 @@ +42 diff --git a/readme.md b/readme.md index bd5e10d..91d176e 100644 --- a/readme.md +++ b/readme.md @@ -685,12 +685,34 @@ if p.json { /* emit ONLY machine output on stdout */ } ## Cross-Compilation ```sh -sx build app.sx --target linux # Linux x86_64 +sx build app.sx --target linux # Linux x86_64 (glibc, dynamic) +sx build app.sx --target linux-musl # Linux x86_64 (musl, static) sx build app.sx --target macos-arm # macOS ARM64 -sx build app.sx --target windows # Windows x86_64 +sx build app.sx --target windows # Windows x86_64 (MSVC) +sx build app.sx --target windows-gnu # Windows x86_64 (MinGW) sx build app.sx --target wasm # WebAssembly ``` +### Self-contained builds (bundled `zig`) + +For macOS / Linux / Windows targets, sx can link with a bundled `zig` as +`zig cc` instead of the host's system linker — it supplies lld, the CRT, and +libc (musl/glibc/mingw), so no `cc`/SDK needs to be installed. The default +Linux output is statically-linked musl, which runs on any Linux. + +```sh +sx build app.sx --target linux-musl --self-contained # static, portable ELF +sx build app.sx --self-contained # host target, hermetic link +SX_ZIG=/path/to/zig sx build app.sx --self-contained # pin a specific zig +sx build app.sx --no-self-contained # force the system toolchain +``` + +`--self-contained` uses a `zig` found via `$SX_ZIG`, a bundled copy next to the +`sx` binary, or `zig` on `PATH`. In a packaged distribution (with a bundled +`zig` alongside `sx`) the backend activates automatically; a `PATH`-only `zig` +is used only when `--self-contained` is passed, so native dev builds are never +silently rerouted. Set `SX_DEBUG_ZIG=1` to trace discovery. + ## Acknowledgments - [Jonathan Blow](https://en.wikipedia.org/wiki/Jonathan_Blow) for Jai, the language that inspired this one diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 4a83157..75b695c 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -255,10 +255,16 @@ pub const LLVMEmitter = struct { const llvm_module = c.LLVMModuleCreateWithNameInContext(module_name, ctx); const builder = c.LLVMCreateBuilderInContext(ctx); - // Set target triple - const triple_owned = target_config.triple == null; - const triple = target_config.triple orelse c.LLVMGetDefaultTargetTriple(); - defer if (triple_owned) c.LLVMDisposeMessage(@constCast(triple)); + // Set target triple. Normalize first: zig-scheme, vendor-less triples + // (e.g. "x86_64-windows-gnu") would otherwise have "windows" land in + // LLVM's vendor slot under its positional parser, leaving OS=unknown + // and the object format silently falling back to ELF. Normalization is + // LLVM's own reordering — not a hand-maintained translation table. + const raw_owned = target_config.triple == null; + const raw_triple = target_config.triple orelse c.LLVMGetDefaultTargetTriple(); + defer if (raw_owned) c.LLVMDisposeMessage(@constCast(raw_triple)); + const triple = c.LLVMNormalizeTargetTriple(raw_triple); + defer c.LLVMDisposeMessage(triple); c.LLVMSetTarget(llvm_module, triple); diff --git a/src/main.zig b/src/main.zig index 0e89752..b55f168 100644 --- a/src/main.zig +++ b/src/main.zig @@ -50,11 +50,17 @@ pub fn main(init: std.process.Init) !void { else if (std.mem.eql(u8, raw, "macos-x86")) try macosTripleForArch(allocator, "x86_64") else if (std.mem.eql(u8, raw, "linux") or std.mem.eql(u8, raw, "linux-x86")) - "x86_64-unknown-linux-gnu" + "x86_64-linux-gnu" else if (std.mem.eql(u8, raw, "linux-arm")) - "aarch64-unknown-linux-gnu" + "aarch64-linux-gnu" + else if (std.mem.eql(u8, raw, "linux-musl")) + "x86_64-linux-musl" + else if (std.mem.eql(u8, raw, "linux-musl-arm")) + "aarch64-linux-musl" else if (std.mem.eql(u8, raw, "windows")) "x86_64-windows-msvc" + else if (std.mem.eql(u8, raw, "windows-gnu")) + "x86_64-windows-gnu" else if (std.mem.eql(u8, raw, "ios") or std.mem.eql(u8, raw, "ios-arm")) "arm64-apple-ios14.0" else if (std.mem.eql(u8, raw, "ios-sim") or std.mem.eql(u8, raw, "ios-sim-arm")) @@ -88,6 +94,10 @@ pub fn main(init: std.process.Init) !void { i += 1; if (i >= args.len) { std.debug.print("error: --linker requires a value\n", .{}); return; } target_config.linker = args[i]; + } else if (std.mem.eql(u8, arg, "--self-contained")) { + target_config.self_contained = .on; + } else if (std.mem.eql(u8, arg, "--no-self-contained")) { + target_config.self_contained = .off; } else if (std.mem.eql(u8, arg, "--sysroot")) { i += 1; if (i >= args.len) { std.debug.print("error: --sysroot requires a value\n", .{}); return; } diff --git a/src/target.zig b/src/target.zig index fc63b60..0b72c46 100644 --- a/src/target.zig +++ b/src/target.zig @@ -1,6 +1,8 @@ const std = @import("std"); +const builtin = @import("builtin"); const llvm = @import("llvm_api.zig"); const c = llvm.c; +const zig_backend = @import("zig_backend.zig"); /// One `#jni_main #jni_class("...")` declaration's Java-source emission. /// Populated by lowering and surfaced to the sx Android bundler in @@ -78,6 +80,13 @@ pub const TargetConfig = struct { /// The object is kept at `.sx-tmp/main.o` (its link-time path, so the /// debug map resolves when lldb is run from the project root). emit_obj: bool = false, + /// Self-contained link backend (bundled `zig cc`). `.auto` uses it when a + /// `zig` is discoverable, the target is Linux, and no explicit `--linker` + /// was given; `.on` forces it (error if no zig / non-Linux target); `.off` + /// uses the system `cc`. See design/bundled-zig-link-backend-design.md. + self_contained: SelfContained = .auto, + + pub const SelfContained = enum { auto, on, off }; pub const OptLevel = enum { none, @@ -194,8 +203,119 @@ pub const TargetConfig = struct { pub fn getLinker(self: TargetConfig) []const u8 { return self.linker orelse "cc"; } + + /// True when this target is in scope for the bundled-`zig` backend: + /// the three desktop OSes (macOS, Linux, Windows). iOS/Android/wasm keep + /// their specialized toolchains. + pub fn zigBackendInScope(self: TargetConfig) bool { + return self.isMacOS() or self.isLinux() or self.isWindows(); + } + + /// The zig `-target` for the bundled-zig link backend. sx triples already + /// use zig's scheme, so this is pure pass-through; only the null + /// (host-default) case synthesizes a portable triple from the host arch + + /// host OS (musl on Linux for static portability, mingw on Windows). + /// Caller owns the returned slice. + pub fn zigTargetTriple(self: TargetConfig, allocator: std.mem.Allocator) ![]const u8 { + if (self.triple) |t| return allocator.dupe(u8, std.mem.span(t)); + const arch: []const u8 = if (self.isAarch64()) "aarch64" else "x86_64"; + const os_abi: []const u8 = switch (builtin.os.tag) { + .linux => "linux-musl", + .macos => "macos-none", + .windows => "windows-gnu", + else => "linux-musl", + }; + return std.fmt.allocPrint(allocator, "{s}-{s}", .{ arch, os_abi }); + } }; +/// Decide whether the link step drives the bundled-`zig` backend +/// (`zig cc -target …`) or the system linker. Returns the zig path to use, +/// or null for the system linker. Errors loudly when a self-contained link is +/// requested but cannot be satisfied — never silently falls back in that case. +/// +/// Auto mode engages ONLY for a *bundled* zig (a real distribution): a +/// PATH-only zig is a dev convenience and never hijacks a native build, so the +/// dev/CI corpus keeps using the system toolchain. `--self-contained` forces +/// the backend with either bundled or PATH zig. +/// See design/bundled-zig-link-backend-design.md §5.5. +fn selectZigLinker(allocator: std.mem.Allocator, tc: TargetConfig) !?[]const u8 { + switch (tc.self_contained) { + .off => return null, + .on => { + if (!tc.zigBackendInScope()) { + std.debug.print("error: --self-contained supports macOS/Linux/Windows targets only\n", .{}); + return error.LinkError; + } + const found = zig_backend.discoverZig(allocator) orelse { + std.debug.print("error: --self-contained requested but no usable `zig` was found (set $SX_ZIG or put zig on PATH)\n", .{}); + return error.LinkError; + }; + return found.path; + }, + .auto => { + if (!tc.zigBackendInScope()) return null; + if (tc.linker != null) return null; // explicit --linker wins + const found = zig_backend.discoverZig(allocator) orelse return null; + if (!found.bundled) return null; // PATH zig does not auto-engage + return found.path; + }, + } +} + +/// Build the `zig cc` link argv (shared across macOS/Linux/Windows). zig cc is +/// a clang-compatible driver, so `-o`/`-L`/`-l`/`-framework`/extra objects all +/// pass through. `-static` is added only for musl (the portable Linux path); +/// macOS cannot static-link libSystem and Windows uses dynamic mingw. +fn emitZigLinkArgv( + argv: *std.ArrayList([]const u8), + allocator: std.mem.Allocator, + zig_path: []const u8, + output_obj: []const u8, + extra_objects: []const []const u8, + output_bin: []const u8, + libraries: []const []const u8, + frameworks: []const []const u8, + tc: TargetConfig, +) !void { + try argv.appendSlice(allocator, &.{ zig_path, "cc" }); + if (tc.isMacOS()) { + // The object stays Mach-O (emitted from Apple's `apple-darwin` triple, + // which LLVM needs), but zig's -target parser rejects that scheme — so + // hand it zig's vendor-less `-macos`. No -static (libSystem can't + // be statically linked). Cross-to-macOS needs an SDK (out of scope). + const arch: []const u8 = if (tc.isAarch64()) "aarch64" else "x86_64"; + try argv.appendSlice(allocator, &.{ "-target", try std.fmt.allocPrint(allocator, "{s}-macos", .{arch}) }); + } else { + const ztriple = try tc.zigTargetTriple(allocator); + try argv.appendSlice(allocator, &.{ "-target", ztriple }); + if (std.mem.indexOf(u8, ztriple, "musl") != null) try argv.append(allocator, "-static"); + } + try argv.appendSlice(allocator, &.{ output_obj, "-o", output_bin }); + for (extra_objects) |eo| try argv.append(allocator, eo); + + if (tc.sysroot) |sr| { + try argv.append(allocator, try std.fmt.allocPrint(allocator, "--sysroot={s}", .{sr})); + } + for (tc.lib_paths) |lp| { + try argv.append(allocator, try std.fmt.allocPrint(allocator, "-L{s}", .{lp})); + } + for (libraries) |lib| { + try argv.append(allocator, try std.fmt.allocPrint(allocator, "-l{s}", .{lib})); + } + // Frameworks are Apple-only; ignored on Linux/Windows. + if (tc.isMacOS()) { + for (frameworks) |fw| { + try argv.append(allocator, "-framework"); + try argv.append(allocator, fw); + } + } + for (tc.extra_link_flags) |flag| { + var it = std.mem.tokenizeScalar(u8, flag, ' '); + while (it.next()) |part| try argv.append(allocator, part); + } +} + /// Execute a precompiled object file in-process using LLVM's ORC JIT. /// Takes ownership of obj_buf. Returns the exit code from main(). /// `priority_dylibs` are consulted for symbols BEFORE the process-wide @@ -345,7 +465,11 @@ pub fn discoverAppleSdk(allocator: std.mem.Allocator, io: std.Io, sdk_name: []co pub fn link(allocator: std.mem.Allocator, io: std.Io, output_obj: []const u8, extra_objects: []const []const u8, output_bin: []const u8, libraries: []const []const u8, frameworks: []const []const u8, target_config: TargetConfig, has_jni_main: bool) !void { var argv = std.ArrayList([]const u8).empty; - if (target_config.isIOS()) { + if (try selectZigLinker(allocator, target_config)) |zig_path| { + // Bundled-zig backend: macOS/Linux/Windows linked uniformly via + // `zig cc`, which supplies lld + CRT + libc with no host toolchain. + try emitZigLinkArgv(&argv, allocator, zig_path, output_obj, extra_objects, output_bin, libraries, frameworks, target_config); + } else if (target_config.isIOS()) { // iOS: clang driver with -isysroot pointing at the iOS SDK. // -l libraries are generally wrong for iOS (Apple ships system code // as frameworks); user-declared #library still pass through. @@ -652,7 +776,6 @@ pub fn postProcessWasmHtml(allocator: std.mem.Allocator, io: std.Io, html_path: /// Common library paths for the host OS, computed at comptime. pub const host_lib_paths = blk: { - const builtin = @import("builtin"); var paths: []const []const u8 = &.{}; if (builtin.os.tag == .macos) { if (builtin.cpu.arch == .aarch64) { diff --git a/src/zig_backend.zig b/src/zig_backend.zig new file mode 100644 index 0000000..5f379c2 --- /dev/null +++ b/src/zig_backend.zig @@ -0,0 +1,107 @@ +//! Discovery for the bundled-`zig` link backend. +//! +//! When `sx build` links a native binary, it can drive a bundled `zig` as +//! `zig cc` instead of the host's system `cc`. `zig cc` brings its own lld, +//! CRT objects, and libc (musl/glibc/mingw) for the target — making a +//! distributed sx able to finish a build with no host toolchain installed. +//! +//! This module only *locates* a usable `zig`. The decision of whether to use +//! it, and the construction of the `zig cc -target … -static` argv, live in +//! `target.zig` (which has the TargetConfig it needs). Design-of-record: +//! `design/bundled-zig-link-backend-design.md`. + +const std = @import("std"); +const builtin = @import("builtin"); + +extern "c" fn _NSGetExecutablePath(buf: [*]u8, len: *u32) c_int; +extern "c" fn access(path: [*:0]const u8, mode: c_int) c_int; + +/// Trace discovery when `SX_DEBUG_ZIG` is set (mirrors `SX_DEBUG_STDLIB`). +fn dbg(comptime fmt: []const u8, args: anytype) void { + if (std.c.getenv("SX_DEBUG_ZIG") != null) std.debug.print("[sx] " ++ fmt, args); +} + +fn fileExists(path: []const u8) bool { + var buf: [4096]u8 = undefined; + if (path.len >= buf.len) return false; + @memcpy(buf[0..path.len], path); + buf[path.len] = 0; + return access(@ptrCast(&buf), 0) == 0; // 0 == F_OK +} + +/// Path of the running `sx` binary. Mirrors imports.zig's resolver (no Io +/// dependency): `_NSGetExecutablePath` on Darwin, `/proc/self/exe` on Linux. +fn selfExePath(buf: []u8) ![]const u8 { + switch (builtin.os.tag) { + .macos, .ios => { + var len: u32 = @intCast(buf.len); + if (_NSGetExecutablePath(buf.ptr, &len) != 0) return error.PathBufferTooSmall; + return std.mem.sliceTo(buf[0..buf.len], 0); + }, + .linux => return std.posix.readlink("/proc/self/exe", buf), + else => return error.UnsupportedHostOS, + } +} + +/// A discovered `zig`. `bundled` distinguishes a distribution-bundled (or +/// deliberately-pinned) zig — which auto-activates the backend — from a +/// PATH-resolved one, which is a dev convenience and only used when forced +/// via `--self-contained`. +pub const Found = struct { + path: []const u8, + bundled: bool, +}; + +/// Resolution order (first hit wins): +/// 1. $SX_ZIG — explicit override (bundled=true) +/// 2. /../libexec/zig/zig — install layout (bundled=true) +/// 3. /../../zig-bundle/zig — dev vendored layout (bundled=true) +/// 4. `zig` on $PATH — dev fallback (bundled=false) +/// Returns an allocator-owned path + provenance, or null if none resolve. +pub fn discoverZig(allocator: std.mem.Allocator) ?Found { + // 1. Explicit override — a deliberate pin, treated as bundled. + if (std.c.getenv("SX_ZIG")) |env| { + const p = std.mem.span(env); + if (fileExists(p)) { + dbg("zig: SX_ZIG={s}\n", .{p}); + return .{ .path = allocator.dupe(u8, p) catch return null, .bundled = true }; + } + dbg("zig: SX_ZIG={s} (not found, ignoring)\n", .{p}); + } + + // 2 & 3. Exe-relative candidates — a real distribution. + var buf: [4096]u8 = undefined; + if (selfExePath(&buf)) |exe| { + const exe_dir = std.fs.path.dirname(exe) orelse exe; + const rels = [_][]const u8{ "../libexec/zig/zig", "../../zig-bundle/zig" }; + for (rels) |rel| { + const cand = std.fs.path.join(allocator, &.{ exe_dir, rel }) catch continue; + if (fileExists(cand)) { + dbg("zig: bundled={s}\n", .{cand}); + return .{ .path = cand, .bundled = true }; + } + dbg("zig: tried {s} (absent)\n", .{cand}); + allocator.free(cand); + } + } else |_| {} + + // 4. $PATH fallback — dev convenience; does not auto-engage. + if (findOnPath(allocator, "zig")) |p| { + dbg("zig: PATH={s}\n", .{p}); + return .{ .path = p, .bundled = false }; + } + + dbg("zig: none found — falling back to system cc\n", .{}); + return null; +} + +fn findOnPath(allocator: std.mem.Allocator, name: []const u8) ?[]const u8 { + const path_env = std.c.getenv("PATH") orelse return null; + var it = std.mem.tokenizeScalar(u8, std.mem.span(path_env), ':'); + while (it.next()) |dir| { + const cand = std.fs.path.join(allocator, &.{ dir, name }) catch continue; + if (fileExists(cand)) return cand; + allocator.free(cand); + } + return null; +}