From c562fe236d63235baa623e033afda206f125c082 Mon Sep 17 00:00:00 2001 From: agra Date: Sun, 14 Jun 2026 12:14:16 +0300 Subject: [PATCH] docs(plans): inline-asm design + ASM and FFI-linkage plans/checkpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new workstreams: - ASM: inline assembly — asm { "tmpl", "=r" -> T, "r" = expr, clobbers(.…) }, multi-return tuples; lowers via the existing llvm_api.c (no shim). - FFI-linkage: add extern/export postfix keywords, migrate every #foreign onto them, then purge 'foreign' from the tree (end-state invariant). Drop current/ from .gitignore so plans + checkpoints are tracked normally (the dir was ignored; only checkpoints had been force-added). Includes docs/inline-asm-design.md. specs.md change left uncommitted. --- .gitignore | 3 +- current/CHECKPOINT-ASM.md | 24 + current/CHECKPOINT-EXTERN-EXPORT.md | 35 + current/PLAN-ASM.md | 82 +++ current/PLAN-EXTERN-EXPORT.md | 198 ++++++ docs/inline-asm-design.md | 1005 +++++++++++++++++++++++++++ 6 files changed, 1345 insertions(+), 2 deletions(-) create mode 100644 current/CHECKPOINT-ASM.md create mode 100644 current/CHECKPOINT-EXTERN-EXPORT.md create mode 100644 current/PLAN-ASM.md create mode 100644 current/PLAN-EXTERN-EXPORT.md create mode 100644 docs/inline-asm-design.md diff --git a/.gitignore b/.gitignore index 87557c3..c0bef33 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,4 @@ zig-out .DS_Store .vscode/ .sx-cache -.sx-tmp -current/ \ No newline at end of file +.sx-tmp \ No newline at end of file diff --git a/current/CHECKPOINT-ASM.md b/current/CHECKPOINT-ASM.md new file mode 100644 index 0000000..2164caa --- /dev/null +++ b/current/CHECKPOINT-ASM.md @@ -0,0 +1,24 @@ +# sx Inline Assembly — Checkpoint (ASM stream) + +Companion to `current/PLAN-ASM.md`; design in +[docs/inline-asm-design.md](../docs/inline-asm-design.md). Update after every +commit, one step at a time per the cadence rule (no commit may both add a test +and make it pass). + +## Last completed step +None — plan authored, not yet started. + +## Current state +Design fully converged (`docs/inline-asm-design.md`). Feasibility confirmed: +`llvm_api.c.*` exposes `LLVMGetInlineAsm` / `LLVMBuildCall2` / +`LLVMAppendModuleInlineAsm` (LLVM@19). No code written. + +## Next step +**A.0** — add the `kw_asm` keyword (`src/token.zig` Tag + `StaticStringMap`) and a +unit lex test. Then A.1 (parse `asm { … }` → `AsmExpr`, lowering bails loudly). + +## Log +- (init) Plan + design doc written; ASM stream opened. + +## Known issues +None yet. diff --git a/current/CHECKPOINT-EXTERN-EXPORT.md b/current/CHECKPOINT-EXTERN-EXPORT.md new file mode 100644 index 0000000..3b85886 --- /dev/null +++ b/current/CHECKPOINT-EXTERN-EXPORT.md @@ -0,0 +1,35 @@ +# sx `extern`/`export` + `#foreign` retirement — Checkpoint (FFI-linkage stream) + +Companion to `current/PLAN-EXTERN-EXPORT.md` — one merged plan: **Part A** adds +`extern`/`export`, **Part B** migrates `#foreign` and purges `foreign`. Update after +every commit, one step at a time per the cadence rule. + +## Last completed step +None — plan authored, not yet started. (Merged the former EXTERN-EXPORT + +FOREIGN-MIGRATION plans into this single plan.) + +## Current state +Syntax decided + ratified: bare `extern`/`export`, postfix in the `callconv(.c)` +slot, `extern ⇒ callconv(.c)`, library separate. Touch-points mapped — token +`token.zig:45,282`; parser `1950,3669,316,425,1305`; lowering +`decl.zig:1123,387,2110,2382,2514`; IR/emit already capable (no codegen change). +Export gap = 4 lowering conditions. Part B `foreign` footprint to purge: 643 lines / +~57 identifiers in `src/` + 28 doc lines. End-state invariant: **zero `foreign`** in +the live tree (Phase 9.4 gate). No code written. + +## Next step +**Phase 0.0** — add `kw_extern`/`kw_export` tokens + keyword-map entries (beside +`kw_callconv`) + unit lex test. Then 0.1 (parser plumbing, unconsumed), then Phase 1 +(`extern` working). See the plan's **"Kickoff prompt"** section. + +## Open decisions +Part A ratified (bare / postfix / `⇒ callconv(.c)` / lib-separate). Part B (confirm +before Phase 9): runtime-class rename target — `Runtime*Class*` (recommended); +historical carve-out — keep `issues/*.md` provenance, gate the live tree only. + +## Log +- (init) Plan written; FFI-linkage stream opened. +- (merge) Folded FOREIGN-MIGRATION in as Part B; deleted the split plan + checkpoint. + +## Known issues +None yet. diff --git a/current/PLAN-ASM.md b/current/PLAN-ASM.md new file mode 100644 index 0000000..4a7f20e --- /dev/null +++ b/current/PLAN-ASM.md @@ -0,0 +1,82 @@ +# sx Inline Assembly — Implementation Plan (ASM stream) + +**Design source of truth:** [docs/inline-asm-design.md](../docs/inline-asm-design.md). +This plan turns that doc's §II.7 stage-map + §II.8 phasing into ordered, +commit-sized, testable steps. Read the design doc first — this file is the +*how/when*, not the *what/why*. + +**Surface (decided):** +`asm volatile { "template", "=r" -> T, "r" = expr, clobbers(.cc, .memory) }` +— brace block; `->` output / `=` input; `clobbers(.…)` dot-name list; N `-> Type` +outputs return a tuple; templates are pure AT&T (via LLVM). + +**Feasibility (confirmed):** sx links LLVM@19; `src/llvm_api.zig` `@cImport`s +`llvm-c/Core.h`, so `llvm_api.c.*` already exposes `LLVMGetInlineAsm` (9-arg), +`LLVMInlineAsmDialectATT`, `LLVMBuildCall2`, `LLVMAppendModuleInlineAsm`. No shim. + +**Relationship to other streams:** +- Phases A–E (the inline-asm *expression*) are independent of EXTERN-EXPORT. +- Phase F (global asm) consumes `extern`/`export` to import/expose asm symbols — + do it **after** `PLAN-EXTERN-EXPORT.md` Phase 2. + +## Cadence (IMPASSIBLE) +No commit may both add a test AND make it pass. Each feature step is either a +behavior-locking PASSING test, or an xfail test the *next* commit turns green. +Arch-pinned tests live in `examples/16xx-platform-asm-*` (must declare `target=`). +Never regenerate snapshots while red. + +## Phase A — keyword + AST + parser (parses; no codegen) +| Step | Commit | What | Files | +|---|---|---|---| +| A.0 | lock | add `kw_asm` keyword + map entry; unit lex test `asm → kw_asm` | `src/token.zig`, `src/lexer.zig` + `.test.zig` | +| A.1 | xfail | parse `asm { … }` → `AsmExpr`/`AsmOperand` in `parsePrimary`; pin an AST/`sx ir` parse snapshot; lowering still `bailDetail("inline asm codegen unimplemented")` | `src/ast.zig` (:85 union arm, :721 structs), `src/parser.zig` (parsePrimary), `src/ir/interp.zig` | +| A.2 | green | parse-shape snapshot lands green; the unimplemented bail is loud + named | — | + +## Phase B — sema / typing +| Step | Commit | What | Files | +|---|---|---|---| +| B.0 | xfail | result-type rule (0→`void` / 1→`T` / N→named-or-positional tuple) + checklist (no-output⇒`volatile`, layout, comptime-string template) — pin error messages | `src/ir/expr_typer.zig` | +| B.1 | green | typing + diagnostics implemented; `.unresolved` sentinel on failure (no silent default) | `src/ir/expr_typer.zig`, `src/ir/semantic_diagnostics.zig` | + +## Phase C — IR op + lowering +| Step | Commit | What | Files | +|---|---|---|---| +| C.0 | lock | add `inline_asm: InlineAsm` to `Op` + `AsmOperand` (role/name/constraint/operand) + interp `bailDetail` arm; unit tests for the IR shape | `src/ir/inst.zig` (:80), `src/ir/interp.zig` | +| C.1 | xfail→green | `lowerAsmExpr` in `lowerExpr` dispatch — interns template/constraints/clobber-names, lowers input `Ref`s, sets result `TypeId` | `src/ir/lower/expr.zig` | + +## Phase D — LLVM emit (single value-output; the core) +| Step | Commit | What | Files | +|---|---|---|---| +| D.0 | xfail | `examples/16xx-platform-asm-syscall-write.sx` + `…-register-read.sx` + `…-no-output-volatile.sx` + `…-missing-volatile.sx` (expected compile error) — all red | examples + `expected/` markers | +| D.1 | green | `emitInlineAsm`: **port `FuncGen.airAssembly`** — constraint-string assembler (outputs `=`/`+`, inputs, `clobbers(.name)`→`~{name}`), `%[name]`→`${N}` / `%%` / `%=` template rewriter, `LLVMGetInlineAsm`+`LLVMBuildCall2`, `sideeffect=volatile`, AT&T dialect | `src/ir/emit_llvm.zig` (emitInst dispatch + handler) | +| D.2 | green | lock the template-rewrite + constraint string via an `expected/*.ir` snapshot on `…-template-subst.sx` | examples | + +**Phase D verification:** `zig build test`; the syscall example runs on +`x86_64-linux`; IR snapshot matches the design doc's worked `sys_write` lowering. + +## Phase E — multi-return tuples + `clobbers(.…)` +| Step | Commit | What | Files | +|---|---|---|---| +| E.0 | xfail | `…-asm-multi-return.sx` (`divmod`→`(quot,rem)`, `cpuid`→4-tuple) red | examples | +| E.1 | green | N `out_value` → LLVM struct return + `extractvalue i` → sx tuple (named when operands named); `clobbers(.name)` dot-name lowering finalized | `src/ir/emit_llvm.zig`, `src/ir/lower/expr.zig` | + +## Phase F — global asm (needs EXTERN-EXPORT Phase 2) +| Step | Commit | What | Files | +|---|---|---|---| +| F.0 | xfail | top-level `asm { … }` decl parsed (reject operands/`volatile`); `…-asm-global.sx` (defines a symbol, imported via `extern`) red | `src/parser.zig`, `src/ast.zig` | +| F.1 | green | lower `asm_global` → `c.LLVMAppendModuleInlineAsm`; comptime-call guard (dlsym-miss is loud); blocks concatenate in source order | `src/ir/lower/decl.zig`, `src/ir/emit_llvm.zig`, `src/ir/interp.zig` | + +## Phase G — later (own steps when scheduled) +`-> @place` write-through + read-write (`"+r" -> @place`) + indirect-memory +(`"=*m"`) outputs · `%=` unique-id · output-to-const rejection · Intel-dialect +opt-in · naked functions (`callconv(.naked)`, coordinate with EXTERN-EXPORT). + +## Open decisions (design doc §II.10) +Dialect (AT&T-only v1, recommended) · `volatile` contextual-keyword (recommended) +· brace separator comma (recommended) · `clobbers(.name)` dot-name sugar now → +checked per-arch `Clobber` enum later (Phase 4 of the design doc). + +## End-to-end verification (per phase) +`zig build && zig build test`; for arch-pinned examples confirm they run on a +matching host or assert on `sx ir`/`.s` snapshots. After intentional output +changes only: `zig build test -Dupdate-goldens`, then review the diff. diff --git a/current/PLAN-EXTERN-EXPORT.md b/current/PLAN-EXTERN-EXPORT.md new file mode 100644 index 0000000..d9a1310 --- /dev/null +++ b/current/PLAN-EXTERN-EXPORT.md @@ -0,0 +1,198 @@ +# sx `extern` / `export` + `#foreign` retirement — Plan (FFI-linkage stream) + +**One stream, two parts.** **Part A** adds `extern`/`export` (the linkage surface); +**Part B** migrates every `#foreign` onto it and purges `foreign` from the tree. +They are *one* plan: Part B can't start until Part A is a behavior-equivalent +superset of `#foreign`, and Part A isn't "done" until Part B reaches the invariant. + +**Design rationale:** [docs/inline-asm-design.md](../docs/inline-asm-design.md) §II.2 +(Deviation 6) + §II.10 #4 + the syntax evaluation. + +**Decided syntax** +```sx +name :: (sig) -> Ret [callconv(.x)] [extern | export] [;|{…}]; // functions +Name :: #objc_class("X") [extern | export] { … }; // aggregates (mirrors `struct #compiler`) +g : Type extern ["csym"]; // extern global +``` +- `extern` = import (no body, external linkage, C ABI, no sx ctx) — `#foreign`'s role. +- `export` = define **and** expose (body + external linkage + C ABI + no ctx) — **new**. +- `extern`/`export` imply `callconv(.c)`; write `callconv` only to override. +- Library stays a separate axis (`#library`/build flags), not folded into `extern`. + +> **END-STATE INVARIANT (hard requirement).** After this stream, `foreign` appears +> **nowhere** in the live tree — not the `#foreign` surface, and **not** internal +> identifiers. The extern AST is **not** named `foreign_expr`. Enforced by the +> Phase 9.4 grep gate. Scope today: 643 `foreign` lines / ~57 identifiers in `src/` +> + 28 in live docs — most of it the objc/jni **runtime-class** machinery. + +**Naming constraint (so we can actually reach the invariant):** introduce +`extern`-named representations only — do **not** reuse or extend +`ForeignExpr`/`foreign_expr`/`VarDecl.is_foreign`. Carry extern/export on a new +`FnDecl.extern_export` modifier with a `;`/`{…}` body (so there is **no** `*_expr` +node for it); add `VarDecl.is_extern`/`extern_name`. The IR is already extern-named +(`Function.is_extern`, `Builder.declareExtern`). + +**Key finding (scopes Part A):** the IR + LLVM emit **already support everything** — +`Function.linkage` (`.external/.internal/.private`), `is_extern`, `call_conv`, and a +raw un-mangled symbol name are all emitted by `declareFunction` +(`emit_llvm.zig:1225-1300`). Part A is a **parser + lowering** job, no codegen change. + +## Cadence (IMPASSIBLE) +No commit may both add a test AND make it pass (xfail-then-green, or a behavior-lock). +`zig build && zig build test` after every step. Never regenerate snapshots while red. + +--- + +# PART A — add `extern` / `export` (alongside `#foreign`) + +## Phase 0 — tokens + parser plumbing +| Step | Commit | What | Files | +|---|---|---|---| +| 0.0 | lock | add `kw_extern`, `kw_export` (Tag enum + `StaticStringMap`, beside `kw_callconv` at `token.zig:45,282`); unit lex test | `src/token.zig` | +| 0.1 | lock | `parseOptionalExternExport()` (mirror `parseOptionalCallConv`, `parser.zig:3669`) + `ast.ExternExportModifier` enum + `FnDecl.extern_export` + `VarDecl.is_extern`/`extern_name` fields; **not yet consumed**; unit AST test | `src/parser.zig`, `src/ast.zig` | + +## Phase 1 — `extern` (import; equivalent to lib-less `#foreign`) +| Step | Commit | What | Files | +|---|---|---|---| +| 1.0 | xfail | accept postfix `extern` after the callconv slot (`parser.zig:1950`); `examples/12xx-ffi-extern-fn.sx` extern-binds a libc symbol — red (lowering not wired) | `src/parser.zig` | +| 1.1 | green | lowering: `extern` ⇒ `is_extern`, `.external`, `callconv(.c)`, no ctx — route through `declareExtern` like a lib-less `#foreign` (anchors `decl.zig:1123,387,2110,2113`). Example green | `src/ir/lower/decl.zig` | +| 1.2 | green | optional `extern "csym"` rename + extern-global form `g : T extern;` (`parser.zig:425` path) | `src/parser.zig`, `src/ir/lower/decl.zig` | + +## Phase 2 — `export` (define + expose; the NEW capability) +Fills the four export-gap conditions (all in `src/ir/lower/decl.zig`): +| Gap | Anchor | Fix | +|---|---|---| +| (i) linkage forced `.internal` | `:2382`, `:2514` | also `.external` when `extern_export == .export` | +| (ii) C ABI not promoted | `:2110` | also `.c` when `== .export` | +| (iii) no symbol-name override | `emit_llvm.zig:1226` raw name | parse optional `export "csym"`; map in the name map | +| (iv) ctx param not suppressed | `:387` `funcWantsImplicitCtx` | also suppress when `== .export` | + +| Step | Commit | What | Files | +|---|---|---|---| +| 2.0 | xfail | multi-file test: an `export fn` called from a companion `.c` caller (same `XXXX-` prefix) — red (still internal) | `examples/12xx-ffi-export-fn.{sx,c}` + `expected/` | +| 2.1 | green | gaps (i),(ii),(iv): `export` ⇒ external + C-ABI + no-ctx on a **defined** fn (uses `beginFunction`, not `declareExtern`) | `src/ir/lower/decl.zig` | +| 2.2 | green | gap (iii): `export "csym"` symbol-name override | `src/parser.zig`, `src/ir/lower/decl.zig` | + +## Phase 3 — aggregates (objc / jni runtime classes) +| Step | Commit | What | Files | +|---|---|---|---| +| 3.0 | xfail | `#objc_class("X") extern { … }` (import) + `… export { … }` (define) parse alongside legacy `#foreign #objc_class` | `src/parser.zig` (`tryParseForeignClassPrefix` :1305, `parseForeignClassDecl` :1369) | +| 3.1 | green | map postfix `extern`→reference, `export`→define+register; per-runtime tests (objc, jni) | `src/parser.zig`, `src/ir/lower/decl.zig`, `src/ir/lower/objc_class.zig` | + +## Phase 4 — interplay, diagnostics, docs +`extern`+`callconv` stacking/redundancy; reject `extern`+`export` together; +`specs.md` documents `extern`/`export` (the three axes); `#foreign` still documented +until Part B cutover. + +> **GATE A→B.** `extern`/`export` are a behavior-equivalent **superset** of +> `#foreign`. Lock with a unit test asserting `#foreign` and `extern` lower to +> identical IR for a sample fn / global / class. Do not start Part B before this. + +--- + +# PART B — migrate `#foreign` → `extern`/`export`, then purge `foreign` + +**Inventory (drives the batches):** `#foreign` = 466 uses. ~391 sx-code (308 fns +[207 lib / 196 rename], 75 classes [39 objc / 31 jni], 8 globals) + ~145 example +snapshots. 6 libs (`sqlib`98 `libc`61 `objc`22 `tlib`12 `raylib`7 `clib/pcaplib`3). +Hotspots: `vendors/sqlite`(98), `platform/{android,uikit,android_jni,sdl3}`, +`std/{socket,thread,fs,time}`, `ffi/{objc,raylib}`. + +## Phase 5 — `#foreign` becomes an alias for `extern` +| Step | Commit | What | Files | +|---|---|---|---| +| 5.0 | lock | route the `#foreign` parser paths (`parser.zig:316,425,1305,1970`) to build the *same extern-named* AST as `extern`/`export`. Suite green, snapshots unchanged | `src/parser.zig` | +| 5.1 | lock | unit test: `#foreign` and `extern` produce identical IR (fn/global/class) | `src/ir/lower/decl.test.zig` | + +## Phase 6 — migrate stdlib (behavior-preserving; snapshot diff must be EMPTY) +One commit per batch; rewrite `#foreign`→`extern` (fns/globals), +`#foreign #objc_class`→`#objc_class … extern`, defined classes → `… export`. +| Step | Batch | ~sites | +|---|---|---| +| 6.1 | `library/vendors/sqlite/` | 98 | +| 6.2 | `library/modules/platform/` (uikit/android/android_jni/sdl3) | ~95 | +| 6.3 | `library/modules/std/` (socket/thread/fs/time/process/…) | ~60 | +| 6.4 | `library/modules/ffi/` (objc/raylib/objc_block/…) | ~50 | +| 6.5 | remaining `library/` + `vendors/` | remainder | + +## Phase 7 — migrate examples + issues (empty snapshot diff; review every diff) +| Step | Batch | +|---|---| +| 7.1 | `examples/12xx-ffi-*` (plain C) | +| 7.2 | `examples/13xx-ffi-objc-*` | +| 7.3 | `examples/14xx-ffi-jni-*` | +| 7.4 | `issues/*` repros + stragglers | +A non-empty diff ⇒ the alias wasn't behavior-equivalent — stop, fix Phase 5. + +## Phase 8 — cutover +| Step | Commit | What | +|---|---|---| +| 8.0 | xfail | `examples/11xx-diagnostics-foreign-removed.sx` expects a "`#foreign` removed; use `extern`/`export`" diagnostic — still accepted (red) | +| 8.1 | green | parser hard-rejects `#foreign` (mirrors the variadic `name: ..T` cutover); `specs.md` drops `#foreign`, documents `extern`/`export` | + +## Phase 9 — total `foreign` purge (the invariant) +`foreign` must not appear anywhere in the live tree, surface *or* internal. Each step +a mechanical, behavior-preserving rename commit (snapshots unchanged), small +per-file/subsystem commits — not one sweep. +| Step | What | Identifiers (count → new) | +|---|---|---| +| 9.0 | delete the surface | `hash_foreign`(11) + lexer entry + the 4 parse paths + the alias | +| 9.1 | rename **linkage** → `extern*` | `foreign_expr`(25) **eliminated** (folds into modifier) · `is_foreign`(39)→`is_extern` · `foreign_lib`/`foreign_name`→`extern_*` · `foreign_name_map`→`extern_name_map` · `callForeign`(8)→`callExtern` · `marshalForeignArg`→`marshalExternArg` · `is_foreign_c_api`(5)→`is_extern_c_api` · `dedupeForeignSymbol`→`dedupeExternSymbol` | +| 9.2 | rename **runtime-class** machinery → `runtime*` (decision 5) | `ForeignClassDecl`(65) · `ForeignMethodDecl`(31) · `ForeignClassMember`(20) · `ForeignFieldDecl`(15) · `foreign_class_map`(44) · `current_foreign_class`(34)/`_method` · `foreign_path`(62) · `ForeignRuntime` · `parse/tryParseForeignClass*` · `lowerForeign{Method,Static}Call` · `findForeign{Method,Property}InChain` · `resolveForeign*` · `register*ForeignClass*` · `foreignClass*Type` · `*ForeignRefs` | +| 9.3 | purge **live docs** (28 lines) | `specs.md`/`readme.md`/`CLAUDE.md`: drop `#foreign`, document `extern`/`export`; fix file-roles + FFI/bundling notes | +| 9.4 | **acceptance gate** | `grep -rniE 'foreign' src/ library/ examples/ specs.md readme.md CLAUDE.md` → **0** | + +--- + +## Open decisions +*Part A (ratified — recommendations stand):* 1. bare keywords (not `#extern`). +2. aggregate position postfix (`#objc_class(…) extern`, like `struct #compiler`). +3. `extern ⇒ callconv(.c)`. 4. library separate. +*Part B (confirm before Phase 9):* 5. runtime-class rename target — **`Runtime*Class*`** +(recommended; it's the object-model axis, not linkage) vs `Extern*Class*`. +6. historical carve-out — keep `issues/*.md` (+ design-doc prose) as provenance, +gate only the live tree (recommended) vs purge everything. + +## Relationship to ASM +`PLAN-ASM.md` Phase F (global asm) consumes `extern` (import the asm symbol) and +`export` (let asm call back into sx) — do it after **Part A Phase 2**. + +--- + +## Kickoff prompt (paste into a fresh session to start Part A) + +> Work the FFI-linkage stream per `current/PLAN-EXTERN-EXPORT.md` (+ checkpoint +> `current/CHECKPOINT-EXTERN-EXPORT.md`). First read the plan's header (Decided +> syntax, Naming constraint, Key finding) and Part A; rationale is in +> `docs/inline-asm-design.md` §II.2 (Deviation 6) + §II.10 #4. +> +> **This session = Part A, Phases 0 and 1 only** (`extern` works as a bare postfix +> keyword equivalent to a lib-less `#foreign` fn/global binding; `#foreign` stays +> untouched). Do NOT start Phase 2 (`export`) or Part B (migration). +> +> **Cadence (IMPASSIBLE):** no commit may both add a test and make it pass — lock +> behavior with a passing test, or land an xfail the next commit turns green. +> `zig build && zig build test` after every step. +> +> **Naming constraint (hard):** introduce only `extern`-named AST — do NOT reuse or +> extend `ForeignExpr`/`foreign_expr`/`VarDecl.is_foreign`. Use a new +> `FnDecl.extern_export` modifier (body `;` or `{…}`) and `VarDecl.is_extern`/ +> `extern_name`. IR is already extern-named (`Function.is_extern`, `declareExtern`). +> +> Steps (commit after each; update the checkpoint each time): +> - 0.0 lock: `kw_extern`/`kw_export` tokens + map entries beside `kw_callconv` +> (`src/token.zig:45,282`) + unit lex test. +> - 0.1 lock: `parseOptionalExternExport()` (mirror `parseOptionalCallConv`, +> `parser.zig:3669`) + `ast.ExternExportModifier` + `FnDecl.extern_export` + +> `VarDecl.is_extern`/`extern_name` (parsed, unconsumed) + unit AST test. +> - 1.0 xfail: accept postfix `extern` after the callconv slot (`parser.zig:1950`); +> add `examples/12xx-ffi-extern-fn.sx` that extern-binds a libc symbol (red). +> - 1.1 green: in `src/ir/lower/decl.zig`, lower `extern` like a lib-less `#foreign` +> import — `is_extern`, `.external`, `callconv(.c)`, no ctx, via `declareExtern` +> (anchors :1123, :387, :2110, :2113). Example goes green. +> - 1.2 green: optional `extern "csym"` rename + extern-global `g : T extern;` +> (`parser.zig:425`). +> +> Stop at end of Phase 1. Verify: suite green; the `extern` libc binding runs; +> `#foreign` still works with no snapshot diffs. If you hit an unrelated compiler +> bug, follow the CLAUDE.md IMPASSIBLE RULE (file an issue, stop). diff --git a/docs/inline-asm-design.md b/docs/inline-asm-design.md new file mode 100644 index 0000000..4811ee8 --- /dev/null +++ b/docs/inline-asm-design.md @@ -0,0 +1,1005 @@ +# Inline Assembly for sx — Design Doc & Proposal + +**Status:** proposal / not yet scheduled into a workstream +**Author:** research pass over the Zig compiler (`~/projects/zig`, 0.16-dev) + the sx compiler +**Scope:** how Zig implements inline assembly end-to-end, and a minimal-deviation proposal to bring the same model to sx. + +> Guiding constraint for this doc: **mirror Zig's design; deviate only where sx's +> grammar or stdlib makes a 1:1 copy impossible, and call every deviation out +> explicitly with its justification.** Every deviation below is tagged +> **[DEVIATION]** with a reason. + +--- + +## 0. TL;DR + feasibility + +* **Feasible today, no new infrastructure.** sx already links LLVM (`build.zig:10` + → `/opt/homebrew/opt/llvm@19`) and `@cImport`s `llvm-c/Core.h` + (`src/llvm_api.zig:1-17`). That header exposes everything inline asm needs, + reachable right now through `llvm_api.c.*`: + * `LLVMGetInlineAsm(Ty, AsmString, AsmStringSize, Constraints, ConstraintsSize, HasSideEffects, IsAlignStack, Dialect, CanThrow)` — builds the asm callee (LLVM 19/21 share this 9-arg signature). + * `LLVMInlineAsmDialectATT` / `LLVMInlineAsmDialectIntel`. + * `LLVMBuildCall2(...)` — already used pervasively in `src/ir/emit_llvm.zig` (e.g. the Obj-C msgSend path) — calls the asm value like a function. + * `LLVMAppendModuleInlineAsm(M, Asm, Len)` — module-level (global) asm. +* **The hard part is not codegen.** Codegen is ~80 lines of well-trodden LLVM-C. + The real work is (a) the parser grammar, (b) a faithful port of Zig's + *LLVM constraint-string assembly* and *`%[name]`→`$N` template rewrite*, and + (c) Sema validation rules. All three are fully specified below. +* **Surface form (decided, §II.2):** `asm volatile { "tmpl", "=r" -> T, "r" = x, clobbers(.cc, .memory) }` + — a brace block; `->` marks outputs / `=` marks inputs (no positional `:` + sections); enum-literal `clobbers(.…)`; and N `-> Type` outputs return a + **tuple** (sx has tuples — Zig caps at one output). +* **Inline asm is never comptime-evaluable.** The interpreter must bail loudly + (`bailDetail`), per CLAUDE.md's "no silent unimplemented arms" rule. +* **One naming note:** sx already has a `sx asm ` *CLI subcommand* + (`src/main.zig:203,386`) that emits a `.s` file. That is a compiler output + mode, a different namespace from a language token. No conflict, but worth + knowing so nobody confuses the two. + +--- + +# PART I — How Zig implements inline assembly + +All file references in Part I are under `~/projects/zig` (0.16-dev, +commit `3deb86bafd`). Parser/AST/AstGen live in `lib/std/zig/`; Sema/AIR/codegen +in `src/`. + +## I.1 Surface syntax + +The canonical example (`doc/langref/inline_assembly.zig`), a Linux x86_64 syscall: + +```zig +pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { + return asm volatile ("syscall" + : [ret] "={rax}" (-> usize), + : [number] "{rax}" (number), + [arg1] "{rdi}" (arg1), + [arg2] "{rsi}" (arg2), + [arg3] "{rdx}" (arg3), + : .{ .rcx = true, .r11 = true }); +} +``` + +Grammar shape: + +``` +asm volatile? ( + : , , ... # outputs (optional section) + : , , ... # inputs (optional section) + : ) # clobbers (optional section) + +output-item : [name] "constraint" (-> Type) # asm result becomes the value + | [name] "constraint" (lvalue) # asm writes through the pointer +input-item : [name] "constraint" (expr) +clobbers : .{ .reg0 = true, .reg1 = true } # struct literal (0.16-dev) +``` + +Key semantics (from `doc/langref.html.in:4217-4300`): + +* **`volatile`** marks side effects. Without it, an asm expression whose result + is unused may be deleted. An asm expression with **no outputs must be + `volatile`** (else compile error). +* **x86/x86_64 use AT&T syntax** (LLVM provides the parser; Intel support is + "buggy and not well tested"). +* **`%[name]`** in the template refers to a named operand's register; **`%%`** is + a literal `%`. +* **Clobbers** are registers the asm trashes that are *not* inputs/outputs. + `"memory"` (the `.memory = true` field) means "writes to arbitrary memory." + Failing to declare a clobber is unchecked illegal behavior. +* **Global assembly** = an `asm(...)` in a namespace-level `comptime` block. It + has *different rules*: `volatile` is forbidden, there are **no inputs/outputs/ + clobbers**, no `%` substitution, and all global asm is concatenated verbatim: + + ```zig + // doc/langref/test_global_assembly.zig + comptime { + asm ( + \\.global my_func; + \\.type my_func, @function; + \\my_func: + \\ lea (%rdi,%rsi,1),%eax + \\ retq + ); + } + extern fn my_func(a: i32, b: i32) i32; // call into the global-asm symbol + ``` + +## I.2 Pipeline, stage by stage + +### Tokenizer — `lib/std/zig/tokenizer.zig` + +Two keywords in the `StaticStringMap`: `.{ "asm", .keyword_asm }` and +`.{ "volatile", .keyword_volatile }`. + +### AST — `lib/std/zig/Ast.zig` + +Four node tags (`Ast.zig:3789-3817`): + +* `asm_simple` — `asm(template)` only, no operands. +* `@"asm"` — full form; `data` is `node_and_extra` → (template node, `ExtraIndex` to an `Asm`). +* `asm_output` — `[a] "b" (-> Type)` or `[a] "b" (ident)`. +* `asm_input` — `[a] "b" (expr)`. + +The "full" view the rest of the compiler consumes (`Ast.zig:2797-2809`): + +```zig +pub const Asm = struct { + ast: Components, + volatile_token: ?TokenIndex, + outputs: []const Node.Index, + inputs: []const Node.Index, + pub const Components = struct { + asm_token: TokenIndex, + template: Node.Index, + items: []const Node.Index, // outputs ++ inputs, interleaved order preserved + clobbers: Node.OptionalIndex, // a comptime expression (the struct literal) + rparen: TokenIndex, + }; +}; +``` + +The on-disk extra record (`Ast.zig:3969-3975`) stores `items_start/items_end` +(a span into the node list), `clobbers` (optional node), and `rparen`. + +### Parser — `lib/std/zig/Parse.zig` + +`expectAsmExpr` (`Parse.zig:2771-2838`) implements the grammar: + +```zig +fn expectAsmExpr(p: *Parse) !Node.Index { + const asm_token = p.assertToken(.keyword_asm); + _ = p.eatToken(.keyword_volatile); + _ = try p.expectToken(.l_paren); + const template = try p.expectExpr(); + if (p.eatToken(.r_paren)) |rparen| { /* asm_simple */ } + _ = try p.expectToken(.colon); + // ... parse output items until a `:`/`)` ... + const clobbers: Node.OptionalIndex = if (p.eatToken(.colon)) |_| clobbers: { + // ... parse input items until a `:`/`)` ... + _ = p.eatToken(.colon) orelse break :clobbers .none; + break :clobbers (try p.expectExpr()).toOptional(); // clobbers = an expression + } else .none; + // ... +} +``` + +* `parseAsmOutputItem` (`Parse.zig:2840-2864`): + `LBRACKET IDENT RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr | IDENT) RPAREN`. +* `parseAsmInputItem` (`Parse.zig:2866-2883`): + `LBRACKET IDENT RBRACKET STRINGLITERAL LPAREN Expr RPAREN`. +* **Clobbers parse as a generic expression** (`(try p.expectExpr())`), not a + string list — this is the 0.16-dev change. It is later coerced to a + `std.lang.assembly.Clobbers` struct at Sema time. + +### AST → ZIR — `lib/std/zig/AstGen.zig` + +`asmExpr` (`AstGen.zig:8553-8669`) + `addAsm` (`12257-12310`). The ZIR payload +(`lib/std/zig/Zir.zig:2531-2564`): + +```zig +pub const Asm = struct { + src_node: Ast.Node.Offset, + asm_source: NullTerminatedString, // template (string-literal case) + output_type_bits: u32, // bit i = output i uses `-> T` (vs ptr) + clobbers: Ref, // comptime ref → assembly.Clobbers value + pub const Small = packed struct(u16) { is_volatile: bool, outputs_len: u7, inputs_len: u8 }; + pub const Output = struct { name: NullTerminatedString, constraint: NullTerminatedString, operand: Ref }; + pub const Input = struct { name: NullTerminatedString, constraint: NullTerminatedString, operand: Ref }; +}; +``` + +AstGen already enforces the structural rules: + +* Global (container-level) asm: rejects `volatile`, rejects any + outputs/inputs/clobbers (`AstGen.zig:8583-8587`). +* Local asm: **"assembly expression with no output must be marked volatile."** +* `outputs.len < 16`, `inputs.len < 32` (fit `Small.outputs_len`/`inputs_len`). +* At most one output may use the `-> T` form ("inline assembly allows up to one + output value"); `output_type_bits` records which. +* Two ZIR tags: `.@"asm"` (string-literal template) vs `.asm_expr` (comptime + expression template). + +### ZIR → AIR (Sema) — `src/Sema.zig` + +`zirAsm` (`Sema.zig:15044-15231`, dispatched at `1396-1397`). This is where all +*semantic* validation happens. It: + +* Resolves the template to a comptime string (`resolveConstString`). +* **Global asm** (`func_index == .none`): asserts no operands, then + `zcu.addGlobalAssembly(owner, asm_source)` and returns `.void_value`. +* `requireRuntimeBlock` — local asm can't run at comptime. +* Per output: if `-> T`, resolve the type, `ensureLayoutResolved`, set the + expression's result type; else resolve the operand pointer. Validates: + * **output type has a well-defined in-memory layout** (else error); + * **cannot output to a `const` pointer** (`"asm cannot output to const '{s}'"`); + * output must be a runtime value (no reference to a comptime var). +* Per input: resolve operand, reject comptime-only refs, **coerce + `comptime_int`→`usize`, `comptime_float`→`f64`**. +* Clobbers: coerce the expression to `std.lang.assembly.Clobbers`, resolve to a + comptime value. + +The AIR payload (`src/Air.zig:1485-1497`): + +```zig +pub const Asm = struct { + source_len: u32, + inputs_len: u32, + clobbers: InternPool.Index, // comptime assembly.Clobbers value + flags: packed struct(u32) { outputs_len: u31, is_volatile: bool }, +}; +// trailing: out operand refs, in operand refs, then the template bytes and +// (constraint\0 name\0) pairs packed into air_extra. +``` + +### AIR → LLVM — `src/codegen/llvm/FuncGen.zig` + +`airAssembly` (`FuncGen.zig:2473-2852`) is the crux. **This is the algorithm sx +must port.** Three sub-tasks: + +**(a) Assemble the LLVM constraint string.** Comma-separated. For each output: +emit `=` (write-only) or `+` (read-write, recorded in `llvm_rw_vals`); a `*` +prefix marks an *indirect* (memory) output passed as a pointer parameter; a +non-indirect output contributes to the return type. The user's leading `=`/`+` +in `constraint[0]` is consumed and re-emitted; the rest is copied with Zig +commas rewritten to LLVM `|` (alternative constraints). Inputs are copied +similarly (no `=`). Clobbers: iterate the `Clobbers` struct's bool fields as a +bigint; for each `true` field emit `~{fieldname}` (via `appendConstraints`, +which also expands target-specific aliases). + +**(b) Rewrite the template** `%[name]` → LLVM positional `${N}` (state machine, +`FuncGen.zig:2735-2802`): + +| input | output | note | +|---|---|---| +| `$` | `$$` | escape LLVM's `$` | +| `%%` | `%` | literal percent | +| `%=` | `${:uid}` | unique id | +| `%[name]` | `${N}` | `N` = position in `name_map` | +| `%[name:mod]` | `${N:mod}` | with modifier | + +`name_map` maps each operand's `[name]` to its positional index across all +outputs+inputs. + +**(c) Build & call.** Pick the LLVM function type: +`return_count == 0` → `void`; `== 1` → the single return type; `> 1` → an +anonymous struct of the return types. Then: + +```zig +const call = try self.wip.callAsm( + attributes, llvm_fn_ty, + .{ .sideeffect = is_volatile }, // Assembly.Info: sideeffect/alignstack/inteldialect/unwind + rendered_template, llvm_constraints, llvm_param_values, ""); +``` + +`callAsm` (`lib/std/zig/llvm/Builder.zig:6131-6143`) is a thin wrapper that +builds the asm constant (`asmValue`) and emits a normal `call`. In LLVM-C terms +this is exactly `LLVMGetInlineAsm(...)` + `LLVMBuildCall2(...)`. Finally, +non-indirect outputs are read back: with one return it's the call result; with +several it's `extractvalue i` per output; indirect outputs were already written +by the asm via their pointer parameter. + +### C backend — `src/codegen/c.zig` + +No `airAssembly` for *inline* asm in the C backend in this tree; only global asm +flows out (as `module asm`). For sx this is irrelevant — sx only has an LLVM +backend. + +### Global asm & naked functions + +* **Global asm** bypasses everything above: `Sema.addGlobalAssembly` accumulates + the verbatim source; the LLVM object emits it via the module-level asm string + (LLVM-C: `LLVMAppendModuleInlineAsm`). Symbols it defines are reached with + `extern fn`. +* **Naked functions** (`callconv(.naked)`) drop the prologue/epilogue; the body + is entirely inline asm. This is an orthogonal calling-convention feature, not + part of the asm expression itself. + +--- + +# PART II — Proposal for sx + +## II.1 Design principles + +1. **Copy Zig's *semantic* model exactly**: a template + register/memory operands + + clobbers + a `volatile` flag; AT&T syntax via LLVM; "no-output asm must be + volatile"; `%[name]` substitution; AT&T-by-default. +2. **Copy the LLVM lowering exactly** (the constraint-string assembler + template + rewriter from `FuncGen.zig` are reproduced verbatim in §II.6 — these are the + parts where "inventing our own" would silently miscompile). +3. **Diverge from Zig's *surface* syntax where sx has a better-fitting idiom**, and + only there. The deviations (§II.2) are deliberate: a brace block instead of + `( … )`; `->`/`=` operand markers instead of positional `:` sections; an + enum-literal `clobbers(.…)` list; and — because sx has tuples and Zig does not — + **true multiple return values** instead of Zig's one-output cap. + +## II.2 sx surface syntax + +`asm` is an **expression** (it yields the output value/tuple), introduced by a new +`asm` keyword. The body is a **brace block** of comma-separated parts: a template +string first, then operands, then an optional `clobbers(.…)` clause. Each operand +is `[name]? "constraint" `, where the role marker is: + +* **`-> Type`** — an **output** that produces a value (joins the result). +* **`-> @place`** — an output that writes through to existing storage (Phase 2). +* **`= expr`** — an **input** (the value fed in). + +`->` reuses sx's "produces" arrow (as in `(a: i32) -> i32`); `=` reuses sx's +"is set to" binding. There are no positional `:` sections. + +```sx +// x86_64-linux — write(2) via syscall +sys_write :: (fd: i64, buf: [*]u8, len: u64) -> i64 { + return asm volatile { + "syscall", + "={rax}" -> i64, // output → the expression's value + "{rax}" = 1, // SYS_write + "{rdi}" = fd, + "{rsi}" = buf, + "{rdx}" = len, + clobbers(.rcx, .r11, .memory), + }; +} + +// read a register, no inputs, named operand for %[out] +sp :: () -> u64 { + return asm { "mov %%rsp, %[out]", [out] "=r" -> u64 }; +} +``` + +Multi-instruction templates use sx's existing **`#string` heredoc** +(`src/lexer.zig:402`) or a multi-line `"..."` literal — no new lexer feature: + +```sx +serialize :: () { + asm volatile { + #string ATT + mfence + lfence +ATT, + }; +} +``` + +**Outputs and the result type.** A `-> Type` output contributes one value to the +asm expression's result; the count decides the shape: + +| `-> Type` outputs | result | spelling | +|---|---|---| +| 0 | `void` (must be `volatile`) | `asm volatile { … }` | +| 1 | that type `T` | `x := asm { …, "=r" -> T };` | +| N | a **tuple** `(T1,…,Tn)` (declaration order) | `a, b := asm { … };` | + +A `[name]` on an output becomes a **named tuple field** — the same name you'd use +for `%[name]` does double duty: + +```sx +// sx has tuples, so asm gets real multiple return values (Zig caps you at one). +divmod :: (n: u64, d: u64) -> (quot: u64, rem: u64) { + return asm { + "divq %[d]", + [quot] "={rax}" -> u64, // → .quot (operand 0) + [rem] "={rdx}" -> u64, // → .rem (operand 1) + "{rax}" = n, + "{rdx}" = 0, + [d] "r" = d, + clobbers(.cc), + }; +} +q, r := divmod(17, 5); // q = 3, r = 2 +``` + +### Deviations from Zig (each deliberate; semantics unchanged) + +* **[DEVIATION 1 — brace block, not `( … )`.]** The asm body is `asm { … }`, a + comma-separated brace block (trailing comma allowed, per `specs.md:226,501`), + not Zig's parenthesised form. Braces read as "a block of code," which is what an + asm template is; `#string` heredoc templates especially benefit. `asm` is a + keyword, so `asm {` / `asm volatile {` is unambiguous. + +* **[DEVIATION 2 — `->`/`=` operand markers, not `:` sections.]** Zig groups + operands into positional `: outputs : inputs : clobbers` sections (count the + colons; `: :` for an empty one). sx tags each operand by role instead — `-> Type` + / `-> @place` (output) and `= expr` (input) — so the list is flat, + order-independent, with no positional colons. *(`<-` for inputs was considered + and rejected: it can't be a global token without mis-lexing `a < -b`; `=` reuses + an existing token and the existing "binding" meaning.)* + +* **[DEVIATION 3 — clobbers are an enum-literal list `clobbers(.cc, .memory)`.]** + Zig 0.16 uses a struct literal `: .{ .rcx = true }` coerced to a per-arch + `std.lang.assembly.Clobbers`; older Zig used a string list. sx uses a dot-literal + list, cleaner than both. **v1:** each `.name` is a dot-name lowered straight to + `~{name}` (`.memory`/`.cc` are recognized specials; register names pass through + verbatim; LLVM validates). **Phase 4:** upgrade `.name` to members of a + compile-time-checked per-arch `Clobber` enum — *same syntax*, gains typo-checking. + Note the call-looking `clobbers(…)` is a declarative clause, **not** a call — + nothing executes; it only feeds the register allocator. + +* **[DEVIATION 4 — `volatile` is a *contextual* keyword.]** sx's keyword set + (`specs.md:168`) has neither `asm` nor `volatile`. `asm` becomes a real keyword; + `volatile` appears *only* right after `asm`, so it can be recognized contextually + (a plain identifier everywhere else), avoiding reserving it globally. The surface + is byte-identical to Zig. (Alternative: reserve globally — simpler lexer, small + source-compat risk. Recommend contextual.) + +* **[DEVIATION 5 — multiple value-outputs return a tuple (sx ⊃ Zig).]** Zig allows + at most one `-> T` output; the rest must be pointer/lvalue outputs. sx has + tuples, so N `-> Type` outputs return `(T1,…,Tn)` (named when operands are + named), destructured with `a, b := …`. A deliberate *improvement* over Zig, + enabled by a feature Zig lacks, and maps onto LLVM's existing multi-output + struct return (§II.6). The other output flavor — `-> @place` write-through, plus + read-write (`"+r" -> @place`) and indirect-memory (`"=*m"`) outputs — is + **Phase 2** (needs indirect-constraint handling); the value-tuple form does not. + +* **[DEVIATION 6 — global asm is a top-level `asm { … }` declaration.]** sx has no + namespace-level `comptime {}` block (it has `#run`, `specs.md:2598`), so global + asm is a top-level statement: + + ```sx + asm { + #string ATT + .global my_func + .type my_func, @function + my_func: + lea (%rdi,%rsi,1), %eax + retq +ATT, + }; + + my_func :: (a: i32, b: i32) -> i32 #foreign; // extern, no library — valid sx today + ``` + + Only the `comptime {}` wrapper is dropped; lowers to `LLVMAppendModuleInlineAsm`. + + **Calling the asm symbol reuses the C-FFI *import* path** (no new mechanism for + v1) — but note `#foreign` is **not** a general `extern`. A lib-less `#foreign` + (its library is optional: `src/parser.zig:319-325`; used in 50+ stdlib sites, + e.g. `chdir :: (path: [*]u8) -> i32 #foreign;`) emits exactly the artifact needed + to *call into* the asm symbol — an external-linkage, **C-calling-convention**, + raw-named (`emit_llvm.zig:1279`), link-time-resolved declaration — the same thing + Zig's `extern fn` produces (Zig's `extern fn` is also C-callconv). At the IR level + `is_extern` is set straight from `is_foreign` (`decl.zig:1123`) and `#foreign` + forces the C ABI (`decl.zig:2110`). The two real differences from a general + `extern`: (1) `#foreign` is **import-only** — sx has no `#export`/linkname, so the + reverse direction (asm calling *back into* an sx function) is unsupported; (2) it + carries C-ABI marshaling and reads as "a foreign C function," a category-borrow + for a symbol your own module defines. It is the right *mechanism* but an imperfect + *spelling*; a dedicated `#extern`/linkname is an open question (§II.10). + (`specs.md:1209` was corrected to drop the false "library mandatory" claim.) + +Everything *semantic* — comptime-known template, register/memory constraints +verbatim to LLVM, clobber meaning, "no-output ⇒ must be volatile," AT&T default, +`%[name]`/`%%` substitution — is **identical to Zig**. Only the surface (block, +`->`/`=`, `clobbers(.…)`, tuple returns) differs. + +## II.3 sx AST + +sx's AST is a pointer-based tagged union (`Data = union(enum)` at +`src/ast.zig:13`, nodes built via `Parser.createNode`), much simpler than Zig's +SoA `extra_data` scheme — so we can store slices directly. Add one arm next to +`foreign_expr` (`src/ast.zig:85`): + +```zig +// in Node.Data union(enum): +asm_expr: AsmExpr, + +// new, near ForeignExpr (src/ast.zig:721): +pub const AsmExpr = struct { + template: *Node, // string-literal / #string node (comptime string) + is_volatile: bool = false, + operands: []const AsmOperand, // declaration order preserved (= %N indexing) + clobbers: []const []const u8, // dot-names from clobbers(.…): "rcx","cc","memory" +}; + +pub const AsmOperand = struct { + name: ?[]const u8 = null, // optional [name]; only needed for %[name] + constraint: []const u8, // verbatim, e.g. "={rax}", "=r", "+r", "{rdi}", "r" + role: Role, + payload: *Node, // out_value → Type node; out_place/input → expr node + + pub const Role = enum { + out_value, // `-> Type` value output; N of these → a tuple result + out_place, // `-> @place` write-through to existing storage (Phase 2) + input, // `= expr` + }; +}; +``` + +A single flat `operands` list (not split into outputs/inputs) preserves source +order — what the `%0`/`%[name]` indices and the LLVM constraint order key off. The +result type is derived in Sema from the `out_value` operands (§II.5). + +## II.4 sx parser + +`asm` is parsed in expression position. sx dispatches primary expressions in +`Parser.parsePrimary` (`src/parser.zig`); add a `.kw_asm` case (mirroring how +existing keyword/`#`-directive expressions like `#run` are handled): + +1. consume `asm`; contextually consume `volatile` if the next token is the word + `volatile` (Deviation 4). +2. `expect(.l_brace)`; parse the first element as the **template** expression. +3. then a comma-separated list until `}`. Each element is either: + * an **operand** — `[name]?` (a bracketed identifier), a string-literal + constraint, then a role: `->` `Type` (out_value) · `->` `@`-place + (out_place, Phase 2) · `=` `expr` (input); or + * the **clobbers clause** — `clobbers` `(` `.`ident (`,` `.`ident)* `)`. +4. allow a trailing comma; `expect(.r_brace)`; + `createNode(start, .{ .asm_expr = … })`. + +The first element is unambiguously the template (a string not followed by a role +marker). `->` vs `=` after the constraint disambiguates output vs input; inside a +`->` target, a leading `@` marks a write-through place vs a type. + +Top-level/global asm (Deviation 6): recognize `asm {` at declaration scope and +build a dedicated `asm_global` decl (template only — reject operands/`volatile`). + +Lexer/token: add `kw_asm` to the `Token.Tag` enum + keyword `StaticStringMap` in +`src/token.zig`; `volatile` and `clobbers` stay out of the global table +(contextual). **No new operator tokens** — `->` (`arrow`), `=` (`equal`), `.` +(`dot`) and `{}` already exist. + +## II.5 sx Sema / typing + +* **Result type** from the `out_value` operands (`-> Type`), in declaration order: + 0 → `void` (and the asm **must** be `volatile`); 1 → that operand's type `T`; + N → a tuple `(T1,…,Tn)`, **named** when the operands carry `[name]`s + (`(name1: T1, …)`), positional otherwise. Implement in the expression typer + (`src/ir/expr_typer.zig` / wherever `inferExprType` lives), returning the resolved + `TypeId` (a tuple `TypeId` for N>1). **Do not** fall back to a silent default — an + unresolvable output type is a real error (CLAUDE.md silent-default rule): emit a + diagnostic and return the project's `.unresolved` sentinel. +* Port Zig's validation checklist (these are the user-facing error messages): + 1. no output operand ⇒ the asm **must** be `volatile`; + 2. each `out_value` result type must have a well-defined in-memory layout; + 3. inputs must be runtime values; coerce comptime int→`i64`, float→`f64`; + 4. template must be a comptime-known string; + 5. (Phase 2) `out_place` cannot write a `const`; indirect-memory rules. +* Every `%[name]` referenced in the template must name an operand (best surfaced as + a Sema diagnostic; also caught at codegen during the rewrite — §II.6). + +Note: there is **no** "≤1 output" rule (that was Zig's limit; sx's tuples lift it). + +## II.6 sx IR + LLVM codegen (the part that must match Zig bit-for-bit) + +### IR op — `src/ir/inst.zig` + +Add to `Op = union(enum)` (`src/ir/inst.zig:80`), next to `objc_msg_send` +(`:219`). Strings are interned (`StringId`, as `const_string` at `:85`); operands +are SSA `Ref`s: + +```zig +inline_asm: InlineAsm, + +pub const InlineAsm = struct { + template: StringId, // interned, RAW (rewritten at emit) + operands: []const AsmOperand, // declaration order (= %N indexing) + clobbers: []const StringId, // interned dot-names: "rcx","cc","memory" + has_side_effects: bool, + // result rides on Inst.ty: void / a scalar TypeId / a tuple TypeId (N outputs) +}; + +pub const AsmOperand = struct { + role: enum { out_value, out_place, input }, + name: StringId, // .none when unnamed + constraint: StringId, // verbatim "={rax}" / "=r" / "+r" / "{rdi}" + operand: Ref, // out_value → .none; out_place/input → the Ref +}; +``` + +### Lowering — `src/ir/lower/expr.zig` + +Add `.asm_expr => self.lowerAsmExpr(...)` to the `lowerExpr` dispatch. It interns +the template + constraint strings + clobber names, lowers each input operand to a +`Ref`, computes the result `TypeId` (§II.5), and emits the `inline_asm` op. (Same +shape as the existing `objc_msg_send` lowering.) + +### Emit — `src/ir/emit_llvm.zig` + +Add `.inline_asm => self.emitInlineAsm(...)` to the `emitInst` dispatch. This is a +**direct port of `FuncGen.airAssembly`**. Using the already-imported +`llvm_api.c`: + +```zig +fn emitInlineAsm(self: *Emitter, inst: *const Inst, a: InlineAsm) void { + // 1) result LLVM type + param types/values from constraints + const ret_ty = self.lowerType(inst.ty); // void if no typed output + var param_tys: ...; var args: ...; // one per `input` constraint + // 2) assemble the LLVM constraint string (see algorithm below) + // outputs first ("=..."/"+..."), then inputs, then "~{reg}" clobbers, comma-joined + // 3) rewrite the template %[name]->${N}, %%->%, %=->${:uid}, $->$$ (state machine below) + const fn_ty = c.LLVMFunctionType(ret_ty, param_tys.ptr, n_params, 0); + const asm_val = c.LLVMGetInlineAsm( + fn_ty, + rendered_template.ptr, rendered_template.len, + constraint_str.ptr, constraint_str.len, + @intFromBool(a.has_side_effects), // HasSideEffects (volatile) + 0, // IsAlignStack + c.LLVMInlineAsmDialectATT, // AT&T (Deviation: none — matches Zig default) + 0, // CanThrow + ); + const result = c.LLVMBuildCall2(self.builder, fn_ty, asm_val, args.ptr, n_params, ""); + self.mapRef(inst, result); // 1 output: the value; N: extractvalue i per out_value → tuple +} +``` + +(Optionally cache the asm value keyed by `(template, constraints, fn_ty)` the way +`emit_llvm.zig:167` caches `objc_msg_send_value` — but per-site construction is +fine; LLVM uniques inline-asm constants internally.) + +**Constraint-string assembler (port of `FuncGen.airAssembly`):** + +``` +parts = [] +for op in operands where role == out_value or out_place: # outputs first + parts.append( op.constraint with ',' replaced by '|' ) # "={rax}", "=r", "+r" … +for op in operands where role == input: + parts.append( op.constraint with ',' replaced by '|' ) # "{rdi}", "r" … +for name in clobbers: # from clobbers(.name,…) + parts.append( "~{" + name + "}" ) # "~{rcx}", "~{cc}", "~{memory}" +constraint_str = ",".join(parts) +``` + +LLVM return type follows the `out_value` count: **0** → `void`; **1** → that type; +**N** → an anonymous struct `{T1,…,Tn}` — after the call, `extractvalue i` per +`out_value` builds the sx tuple (the multi-return path, §II.2 Dev 5). `out_place` +outputs are `store`d through their `Ref` afterward instead. + +For `sys_write` (one output): constraint +`={rax},{rax},{rdi},{rsi},{rdx},~{rcx},~{r11},~{memory}`, `fn_ty = i64 (i64,ptr,i64)`, +`args = [1, fd, buf, len]`, `sideeffect = true`. For `divmod` (two outputs): +`={rax},={rdx},{rax},{rdx},r,~{cc}`, `fn_ty = {i64,i64} (i64,i64,i64)`, and the two +`extractvalue`s become the `(quot, rem)` tuple. + +**Template rewriter (port verbatim from `FuncGen.zig:2735-2802`):** state machine +over the template bytes with a `name_map: [name] -> positional index` built from +`outputs ++ inputs`: + +``` +state start: '%' -> percent ; '$' -> emit "$$" ; else emit byte +state percent: '%' -> emit '%', start + '[' -> emit "${", state input + '=' -> emit "${:uid}", start + else -> emit '%', emit byte, start +state input: ']' -> emit name_map[name], emit '}', start + ':' -> emit name_map[name], emit ':', state modifier + else accumulate name +state modifier:']' -> emit accumulated modifier, emit '}', start + else accumulate +``` + +An unknown `%[name]` is a hard error (mirror Zig's `todo`/diagnostic — **not** a +silent pass-through; CLAUDE.md no-silent-arms rule). + +### Interpreter — `src/ir/interp.zig` + +Inline asm cannot be comptime-evaluated. In the interpreter's op switch: + +```zig +.inline_asm => return bailDetail("inline asm requires native execution; not available at comptime"), +``` + +(Same `bailDetail` pattern as the Obj-C/JNI ops — surfaces `op=inline_asm: ...` +rather than a silent default.) + +### Global asm (Deviation 6) + +Lower the top-level `asm_global` decl to a one-shot emit: +`c.LLVMAppendModuleInlineAsm(module, src.ptr, src.len)` (present in the linked +LLVM — `@19/include/llvm-c/Core.h:971`). No operands, no rewrite, no volatile; +multiple blocks concatenate in source order (as Zig does). + +**Calling into an asm-defined symbol needs no new machinery** — declare it with a +lib-less `#foreign` (Deviation 6, §II.2): `my_func :: (sig) -> R #foreign;` emits +an external-linkage, raw-named, C-ABI extern that the linker resolves against the +`.global` the asm block defines. + +**Guard (CLAUDE.md no-silent-arms):** a global-asm symbol exists only in the final +linked binary, not in the `#run`/JIT host process. The interpreter resolves +externs via `dlsym(RTLD_DEFAULT)` (`host_ffi.zig`), which won't find it — calling +such a symbol at comptime must fail **loudly** (it should already, via the +dlsym-miss diagnostic; pin it with a test). Edge case: a symbol referenced *only* +by other asm/external code may need `llvm.used` / `.no_dead_strip` to survive +dead-stripping; the common "sx references it" case is safe. + +## II.7 Stage-to-file map (implementation checklist) + +| Stage | Zig reference | sx file + insertion point | New code | +|---|---|---|---| +| Keyword | `tokenizer.zig` keywords | `src/token.zig` — `Token.Tag` + keyword `StaticStringMap` | `kw_asm` (+ contextual `volatile`) | +| AST node | `Ast.zig:2797,3789` | `src/ast.zig:13,85,721` — `Node.Data` + new `AsmExpr`/`AsmOperand` | ~25 lines | +| Parser | `Parse.zig:2771-2883` | `src/parser.zig` — `parsePrimary` `.kw_asm` case + global-asm at decl scope | ~120 lines | +| Sema/typing | `Sema.zig:15044` | `src/ir/expr_typer.zig` (`inferExprType`) + validation | ~80 lines | +| IR op | `Air.zig:1485`, `Zir.zig:2531` | `src/ir/inst.zig:80` — `inline_asm: InlineAsm` | ~25 lines | +| Lowering | `AstGen.zig:8553` | `src/ir/lower/expr.zig` — `lowerExpr` `.asm_expr` case | ~60 lines | +| LLVM emit | `FuncGen.zig:2473-2852` | `src/ir/emit_llvm.zig` — `emitInst` `.inline_asm` case | ~120 lines (constraint asm + template rewrite + `LLVMGetInlineAsm`/`BuildCall2`) | +| Global asm | `Sema.addGlobalAssembly` + `module asm` | decl lowering → `c.LLVMAppendModuleInlineAsm` | ~15 lines | +| Interp bail | n/a | `src/ir/interp.zig` op switch | 1 line | + +No change to `src/codegen.zig` is needed (the IR/LLVM path owns this). + +## II.8 Phasing + +* **Phase 1 (MVP).** `asm { … }` block; `asm volatile`; string-literal/`#string` + template; `= expr` inputs; `-> Type` outputs **including N→tuple multi-return**; + `clobbers(.…)` dot-name list; `%[name]`/`%%` substitution; "no-output ⇒ volatile" + check; AT&T. Target: Linux/macOS `x86_64` + `aarch64` syscalls, intrinsics, and + multi-value ops (`divmod`, `cpuid`, `add_carry`). +* **Phase 2.** `-> @place` write-through outputs, read-write (`"+r" -> @place`) and + indirect-memory (`"=*m"`) constraints, `%=` unique-id, output-to-const rejection. +* **Phase 3.** Global/module asm decl (`LLVMAppendModuleInlineAsm`) + the + comptime-call guard, plus Intel-dialect opt-in. Small: the extern-call path + already exists (lib-less `#foreign`). +* **Phase 4 (optional).** Upgrade `clobbers(.name)` from dot-name sugar to a + compile-time-checked per-architecture `Clobber` enum (typo-checking; same syntax). +* **Phase 5 (optional).** Naked functions (`callconv`-equivalent) for full + freestanding entry points. + +## II.9 Testing + +asm output is target-specific, so tests must pin a target and assert on +emitted IR/exit, not run host-natively unless the host matches. Use the existing +corpus harness and the **`16xx` platform block** (the closest fit in the +`XXXX-category` scheme; `specs.md`/CLAUDE.md test-layout). Mirror Zig's own +matrix: + +* `examples/16xx-platform-asm-syscall-write.sx` — x86_64-linux write(2), assert exit/stdout. +* `examples/16xx-platform-asm-register-read.sx` — `mov %%rsp,%[out]`, no-input output. +* `examples/16xx-platform-asm-no-output-volatile.sx` — bare `asm volatile { "nop" }`. +* `examples/16xx-platform-asm-missing-volatile.sx` — **expected compile error** + (no output, no volatile) — pins the diagnostic. +* `examples/16xx-platform-asm-template-subst.sx` — `%[a]`/`%%` rewriting, assert + on the `sx ir`/`.s` snapshot. +* `examples/16xx-platform-asm-multi-return.sx` — `divmod` → `(quot, rem)` tuple, destructured. +* `examples/16xx-platform-asm-global.sx` (Phase 3) — global asm + extern call. + +Add an IR/`.s` snapshot (`expected/*.ir`) for the substitution test so the +constraint-string + template-rewrite output is locked. Seed markers and +regenerate with `zig build test -Dupdate-goldens`, then review the diff +(CLAUDE.md snapshot-integrity rule). + +## II.10 Open decisions for the user + +Largely settled through design review; what remains: + +1. **Dialect:** AT&T only (Zig's default) for v1, or expose an Intel opt-in + (`LLVMInlineAsmDialectIntel`) from the start? **Recommend AT&T-only v1.** +2. **`volatile` keyword (Deviation 4):** contextual *(recommended, no + source-compat risk)* vs globally reserved *(simpler lexer)*. +3. **Brace separator:** comma *(recommended — trailing-comma-friendly, + literal-style)* vs `;` *(matches sx statement blocks)*. +4. **Asm-symbol extern spelling (Deviation 6):** reuse lib-less `#foreign` for v1 + *(works, zero new surface — but it is a C-FFI **import** binding: import-only, + C-ABI, spelled "foreign")* vs a dedicated `#extern`/linkname *(cleaner spelling, + and the only path that could also **export** an sx symbol so asm can call back + in — which `#foreign` cannot)*. **Recommend `#foreign` for v1**; revisit + `#extern` if/when asm-calls-into-sx or non-C-ABI symbols are needed. + +*Decided:* brace block `{ … }` (Dev 1) · `->`/`=` markers, `:` sections dropped, +`<-` rejected (Dev 2) · `clobbers(.…)` enum-literal list, dot-name sugar now → +checked enum later (Dev 3) · multiple value-outputs return a tuple (Dev 5). For +global asm (Dev 6) the call-*into*-asm direction reuses lib-less `#foreign` +(`specs.md:1209` updated); the extern *spelling* is open decision 4 above. + +## II.11 Risks + +* **Constraint/template correctness is silent if wrong** — a bad constraint + string miscompiles with no diagnostic. Mitigation: port Zig's assembler/rewrite + verbatim (don't paraphrase) and lock IR snapshots in tests. +* **Register-name validity is unchecked** in v1's `clobbers(.name)` dot-name form — + a typo'd register (`.raxx`) surfaces only as an LLVM error. This is exactly the + gap the Phase-4 checked `Clobber` enum closes; acceptable for v1 (LLVM validates + the emitted `~{…}`). +* **`#string` heredoc + AT&T `%`/`$`** interplay: ensure the heredoc delivers the + template bytes literally (no sx-level escape processing of `%`/`$`) before the + rewrite stage. +* **Target gating:** asm examples must declare their target or they break the + corpus on other hosts; the test plan pins targets. + +--- + +## Appendix A — exact LLVM-C calls (already reachable via `llvm_api.c`) + +```c +// src/llvm_api.zig @cInclude("llvm-c/Core.h") exposes all of these: +LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, + const char *AsmString, size_t AsmStringSize, + const char *Constraints, size_t ConstraintsSize, + LLVMBool HasSideEffects, LLVMBool IsAlignStack, + LLVMInlineAsmDialect Dialect, LLVMBool CanThrow); // LLVM 19 & 21: identical +LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn, + LLVMValueRef *Args, unsigned NumArgs, const char *Name); +void LLVMAppendModuleInlineAsm(LLVMModuleRef M, const char *Asm, size_t Len); // global asm +// enum: LLVMInlineAsmDialectATT, LLVMInlineAsmDialectIntel +``` + +## Appendix B — file index + +**Zig (reference, `~/projects/zig`):** `lib/std/zig/tokenizer.zig` (keywords) · +`lib/std/zig/Ast.zig:2797,3789,3969` (nodes) · `lib/std/zig/Parse.zig:2771-2883` +(grammar) · `lib/std/zig/AstGen.zig:8553-8669,12257` + `lib/std/zig/Zir.zig:2531` +(ZIR) · `src/Sema.zig:15044-15231` (validation) · `src/Air.zig:1485` (AIR) · +`src/codegen/llvm/FuncGen.zig:2473-2852` + `lib/std/zig/llvm/Builder.zig:6131` +(LLVM) · `doc/langref/inline_assembly.zig`, `doc/langref/test_global_assembly.zig` +(syntax) · `doc/langref.html.in:4217-4300` (spec). + +**sx (target, `~/projects/sx`):** `src/token.zig` · `src/lexer.zig:402` (#string) · +`src/ast.zig:13,85,721` · `src/parser.zig` (`parsePrimary`), `:319` (optional +`#foreign` library) · `src/ir/expr_typer.zig` · `src/ir/inst.zig:80,219,260` · +`src/ir/lower/expr.zig` · `src/ir/module.zig:300` (`declareExtern`) · +`src/ir/emit_llvm.zig:167` (msgSend cache), `:1244` (extern⇒C-ABI), `:1279` +(raw symbol name) · `src/ir/interp.zig` (`bailDetail`) · `src/llvm_api.zig:1-17` · +`build.zig:10` (LLVM@19). + +## Appendix C — Cookbook (final form: `asm { … }`, `->`/`=`, `clobbers(.…)`, pure AT&T) + +```sx +// ── v1 ──────────────────────────────────────────────────────────────────── + +asm volatile { "nop" }; // bare side-effecting + +// write(2) syscall — register-pinned inputs, one value-output +sys_write :: (fd: i64, buf: [*]u8, len: u64) -> i64 { + return asm volatile { + "syscall", + "={rax}" -> i64, + "{rax}" = 1, "{rdi}" = fd, "{rsi}" = buf, "{rdx}" = len, + clobbers(.rcx, .r11, .memory), + }; +} + +// mmap — full 6-arg syscall ABI (arg4 in r10, not rcx) +mmap :: (addr: *void, len: u64, prot: i32, flags: i32, fd: i32, off: i64) -> *void { + return asm volatile { + "syscall", + "={rax}" -> *void, + "{rax}" = 9, "{rdi}" = addr, "{rsi}" = len, "{rdx}" = prot, + "{r10}" = flags, "{r8}" = fd, "{r9}" = off, + clobbers(.rcx, .r11, .memory), + }; +} + +// AT&T scaled-index addressing — arr[i] +load_idx :: (arr: *i64, i: u64) -> i64 { + return asm { + "movq (%[arr],%[i],8), %[out]", + [out] "=r" -> i64, [arr] "r" = arr, [i] "r" = i, + }; +} + +// CPUID AVX probe — immediates, heavy clobber set, single value-result +has_avx :: () -> bool { + return asm volatile { + #string ATT + movl $1, %%eax + cpuid + andl $0x10000000, %%ecx + setne %[ok] +ATT, + [ok] "=r" -> bool, + clobbers(.rax, .rbx, .rcx, .rdx, .cc), + }; +} + +// SSE packed add — xmm regs, no outputs ⇒ volatile +vadd4 :: (a: *f32, b: *f32, out: *f32) { + asm volatile { + #string ATT + movups (%[a]), %%xmm0 + movups (%[b]), %%xmm1 + addps %%xmm1, %%xmm0 + movups %%xmm0, (%[out]) +ATT, + [a] "r" = a, [b] "r" = b, [out] "r" = out, + clobbers(.xmm0, .xmm1, .memory), + }; +} + +// ── multi-return (v1; sx has tuples, Zig caps at one output) ──────────────── + +// 64-bit divide → (quotient, remainder) +divmod :: (n: u64, d: u64) -> (quot: u64, rem: u64) { + return asm { + "divq %[d]", + [quot] "={rax}" -> u64, + [rem] "={rdx}" -> u64, + "{rax}" = n, "{rdx}" = 0, [d] "r" = d, + clobbers(.cc), + }; +} + +// rdtsc → two 32-bit halves, destructured straight out of the asm +rdtsc :: () -> u64 { + lo, hi := asm volatile { + "rdtsc", + [lo] "={eax}" -> u32, + [hi] "={edx}" -> u32, + }; + return (xx hi << 32) | xx lo; +} + +// cpuid → a clean 4-tuple +cpuid :: (leaf: u32, subleaf: u32) -> (eax: u32, ebx: u32, ecx: u32, edx: u32) { + return asm volatile { + "cpuid", + [eax] "={eax}" -> u32, [ebx] "={ebx}" -> u32, + [ecx] "={ecx}" -> u32, [edx] "={edx}" -> u32, + "{eax}" = leaf, "{ecx}" = subleaf, + }; +} + +// add-with-carry → (sum, carry): value-output + tied input + flag capture +add_carry :: (a: u64, b: u64) -> (sum: u64, carry: u8) { + return asm { + #string ATT + addq %[b], %[sum] + setc %[carry] +ATT, + [sum] "=r" -> u64, + [carry] "=r" -> u8, + [a] "0" = a, [b] "r" = b, + clobbers(.cc), + }; +} + +// ── Phase 2 (write-through / read-write / indirect) ───────────────────────── + +// byte memcpy — labels, loop, read-write operands +memcpy_bytes :: (dst: [*]u8, src: [*]u8, n: u64) { + d := dst; s := src; c := n; + asm volatile { + #string ATT + testq %[c], %[c] + jz 2f + 1: movb (%[s]), %%al + movb %%al, (%[d]) + incq %[s] + incq %[d] + decq %[c] + jnz 1b + 2: +ATT, + [d] "+r" -> @d, [s] "+r" -> @s, [c] "+r" -> @c, + clobbers(.rax, .cc, .memory), + }; +} + +// lock cmpxchg CAS — lock prefix, pinned read-write rax, two outputs +cas :: (ptr: *i64, expected: i64, desired: i64) -> bool { + old := expected; ok: bool = ---; + asm volatile { + #string ATT + lock cmpxchgq %[desired], (%[ptr]) + sete %[ok] +ATT, + [ok] "=r" -> @ok, + [old] "+{rax}" -> @old, + [ptr] "r" = ptr, + [desired] "r" = desired, + clobbers(.cc, .memory), + }; + return ok; +} + +// fill an existing struct (write-through, no tuple) +cpuid_into :: (out: *CpuId, leaf: u32) { + asm volatile { + "cpuid", + "={eax}" -> @out.eax, "={ebx}" -> @out.ebx, + "={ecx}" -> @out.ecx, "={edx}" -> @out.edx, + "{eax}" = leaf, + }; +} +``` + +Global asm + extern (Phase 3): + +```sx +asm { + #string ATT + .global my_add + my_add: + lea (%rdi,%rsi,1), %eax + retq +ATT, +}; +my_add :: (a: i32, b: i32) -> i32 #foreign; // lib-less extern = Zig's `extern fn` +```