Compare commits
40 Commits
d6a9c4f0c4
...
ded106333b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ded106333b | ||
|
|
b6a7378af4 | ||
|
|
0e0ee40528 | ||
|
|
066ba54346 | ||
|
|
79042ab9ab | ||
|
|
17e3b91eb9 | ||
|
|
a0face7571 | ||
|
|
10f4137cbd | ||
|
|
c187122531 | ||
|
|
1346a2d020 | ||
|
|
e7eeecc0f3 | ||
|
|
b4d1ce78c3 | ||
|
|
73f5f0ed11 | ||
|
|
ab7fc393b6 | ||
|
|
66e1e39418 | ||
|
|
e954f044d8 | ||
|
|
d5aee7a222 | ||
|
|
cb6c032c58 | ||
|
|
2a43713d7f | ||
|
|
59469f2b2f | ||
|
|
cdd920b692 | ||
|
|
9e7661b915 | ||
|
|
2a954ceeb6 | ||
|
|
c760b92548 | ||
|
|
97a4050462 | ||
|
|
4128416d48 | ||
|
|
335ac52374 | ||
|
|
967005621a | ||
|
|
b8800a234c | ||
|
|
4d75b9323c | ||
|
|
d3c6ffed5a | ||
|
|
5a5e04c6d5 | ||
|
|
6c08de8ec1 | ||
|
|
5f444aae26 | ||
|
|
1040b8c776 | ||
|
|
f8e029d719 | ||
|
|
3c9ecd0b42 | ||
|
|
c92d11e748 | ||
|
|
0095584105 | ||
|
|
c88f4fbcef |
25
CLAUDE.md
25
CLAUDE.md
@@ -431,10 +431,12 @@ After any compiler change:
|
||||
- A test is still keyed off its `expected/<name>.exit` marker, so seed an
|
||||
empty marker first for a brand-new example (see "Adding a feature").
|
||||
`zig build test` is the only way to run the corpus — there is no standalone
|
||||
shell runner (the legacy `tests/run_examples.sh` was removed). An
|
||||
`expected/<name>.aot` marker switches an example from JIT `sx run` to a
|
||||
`sx build` + execute flow (needed to exercise a C-ABI symbol exported FROM sx
|
||||
— a JIT-resident symbol is invisible to a dlopen'd C dylib).
|
||||
shell runner (the legacy `tests/run_examples.sh` was removed). Per-example
|
||||
build/run directives live in an optional `expected/<name>.build` **JSON** sidecar
|
||||
(see "Test layout" below): `{ "aot": true }` switches an example from JIT `sx run`
|
||||
to a `sx build` + execute flow (needed to exercise a C-ABI symbol exported FROM sx
|
||||
— a JIT-resident symbol is invisible to a dlopen'd C dylib); `{ "target":
|
||||
"x86_64-linux" }` threads `--target` and arch-gates the example.
|
||||
|
||||
### Test layout
|
||||
|
||||
@@ -453,6 +455,7 @@ split into three streams (no more merged `2>&1`) plus an optional IR snapshot:
|
||||
<root>/expected/XXXX-category-name.stdout # normalized stdout
|
||||
<root>/expected/XXXX-category-name.stderr # normalized stderr
|
||||
<root>/expected/XXXX-category-name.ir # optional `sx ir` snapshot
|
||||
<root>/expected/XXXX-category-name.build # optional JSON build/run directives
|
||||
```
|
||||
|
||||
A test is any `<name>.sx` with an `expected/<name>.exit` marker. The runner
|
||||
@@ -460,6 +463,20 @@ scans two roots: `examples/` (the feature suite) and `issues/` (pinned bug
|
||||
repros). Multi-file tests keep companions (`.c`/`.h`, imported `.sx`, fixture
|
||||
dirs) under the same `XXXX-` prefix.
|
||||
|
||||
The optional `<name>.build` JSON sidecar carries per-example directives
|
||||
(unknown keys are a hard error — never silently ignored):
|
||||
|
||||
- `"aot": true` — build a native binary and execute it instead of JIT `sx run`.
|
||||
- `"target": "<triple|shorthand>"` — thread `--target` into every `sx`
|
||||
invocation and gate on the host. If the target's arch+os **match** the host,
|
||||
the example runs normally; if they **mismatch** (e.g. `x86_64-linux` on an
|
||||
aarch64 host), the runner switches to **ir-only** mode — it skips
|
||||
run/build/exec and asserts only `.exit` + `.ir` + `.stderr` from
|
||||
`sx ir --target` (`.stdout` is not asserted). An `.ir` snapshot is **required**
|
||||
in ir-only mode (its absence is a loud failure). This is how arch-pinned
|
||||
examples (e.g. x86_64 inline-asm) are tested on a non-matching dev host while
|
||||
still running end-to-end on a matching CI runner.
|
||||
|
||||
### Snapshot integrity
|
||||
|
||||
**Never regenerate snapshots while tests are failing.** `-Dupdate-goldens` (and the legacy `--update`) blindly overwrite expected output with whatever the compiler produces — including error messages. If you regenerate during a broken state, the test suite will "pass" against garbage output and real regressions become invisible.
|
||||
|
||||
@@ -1,24 +1,394 @@
|
||||
# sx Inline Assembly — Checkpoint (ASM stream)
|
||||
|
||||
Companion to `current/PLAN-ASM.md`; design in
|
||||
[docs/inline-asm-design.md](../docs/inline-asm-design.md). Update after every
|
||||
[design/inline-asm-design.md](../design/inline-asm-design.md). Update after every
|
||||
commit, one step at a time per the cadence rule (no commit may both add a test
|
||||
and make it pass).
|
||||
|
||||
## Last completed step
|
||||
None — plan authored, not yet started.
|
||||
**G (indirect-memory `=*m` place outputs)** — the LAST substantive asm feature.
|
||||
Unlike a write-through `=` output (which returns a value then stored), an
|
||||
indirect output passes the place ADDRESS to the asm and the asm writes through
|
||||
it — no return slot. `emitInlineAsm` (`src/backend/llvm/ops.zig`): indirect
|
||||
outputs are excluded from the LLVM return type; their pointer is an opaque `ptr`
|
||||
call arg placed **first** (arg-consuming constraint order = output-section
|
||||
indirect pointers → inputs → read-write tied seeds); each gets an
|
||||
`elementtype(T)` call-site attribute (required in the opaque-pointer era) via
|
||||
`LLVMCreateTypeAttribute`/`LLVMAddCallSiteAttribute`; the store-back loop skips
|
||||
them. New `asmIsIndirect(e, op)` helper. Lowering (`lowerAsmExpr`) stops
|
||||
rejecting `*` (constraint kept verbatim, `=*m` reaches the constraint string
|
||||
as-is). `asmOperandIndex` unchanged — indirect outputs still count as operands,
|
||||
so `%[name]`→`${N}` holds. Verified by **running** on aarch64: store-through-
|
||||
pointer (`str x9, %[out]` → 42, IR `"=*m,~{x9}"(ptr elementtype(i64) …)`) and a
|
||||
mixed case (indirect + value output + input → `"=*m,=r,r"`, indirect ptr arg
|
||||
first, `${0}/${1}/${2}` correct). Two commits per cadence: (1)
|
||||
`examples/1652-platform-asm-indirect-mem.sx` locked the rejection; (2) implemented
|
||||
+ flipped 1652 to a runnable aarch64-pinned example (`{ "target": "macos" }`,
|
||||
ir-only elsewhere). `zig build test` green (661 corpus, 446 unit). Files:
|
||||
`src/ir/lower/expr.zig`, `src/backend/llvm/ops.zig`, `examples/1652-*`.
|
||||
|
||||
Prior: **G (read-write `+` place outputs)** — a `+r` / `+{reg}` `-> @place` output is now
|
||||
implemented. LLVM has no `+` constraint, so a
|
||||
read-write place lowers to: an output **`=`** constraint (return slot, stored back
|
||||
through the place after the call; the leading `+` rewritten to `=` in
|
||||
`appendAsmConstraints`), **plus** a **tied input** (the decimal index of that
|
||||
output) appended **after** the regular inputs, seeded with the place's loaded
|
||||
value passed as a call arg. Tied inputs come **last** so existing operand indices
|
||||
(`%[name]`→`${N}`) are undisturbed — `asmOperandIndex` unchanged. Lowering
|
||||
(`lowerAsmExpr`) no longer rejects `+` (indirect `*` still rejected loudly).
|
||||
`emitInlineAsm` (`src/backend/llvm/ops.zig`): grows arg/param arrays by the rw
|
||||
count (`n_args = n_inputs + n_rw`), loads each seed (`asm.rw.seed`), emits the
|
||||
tied constraint, and the existing store-back path writes the modified output back.
|
||||
New `asmIsReadWrite(e, op)` helper. Verified by **running**: increment-in-place
|
||||
(41→42, IR `"=r,0"`) and a mixed case (rw place + regular input + value output) →
|
||||
textbook `"=r,=r,r,0"` with correct `${N}` indices and args `(input, seed)`. Two
|
||||
commits per cadence: (1) `examples/1650-platform-asm-rw-place.sx` locked the
|
||||
rejection; (2) implemented + flipped 1650 to a runnable aarch64-pinned example
|
||||
(`{ "target": "macos" }`, ir-only elsewhere). `zig build test` green (658 corpus,
|
||||
446 unit). Files: `src/ir/lower/expr.zig`, `src/backend/llvm/ops.zig`,
|
||||
`examples/1650-*`.
|
||||
|
||||
Prior: **2** — `-> @place` write-through outputs. An asm result can be **stored through
|
||||
a place** (local / struct field) instead of returned; the place output does NOT
|
||||
join the result tuple. Parser: `-> @place` parses the `@place` as an ordinary
|
||||
address-of expression → an `out_place` operand (`src/parser.zig`). Lowering
|
||||
(`lowerAsmExpr`): out_place operand = the lowered `@place` address, `out_ty` =
|
||||
the pointee; read-write (`+`) and indirect-memory (`*`) constraints rejected
|
||||
loudly (not yet implemented). Added `out_ty: TypeId` to the IR `AsmOperand`
|
||||
(`src/ir/inst.zig`) so emit builds the **combined** return struct (ALL outputs).
|
||||
`emitInlineAsm` rewrite (`src/backend/llvm/ops.zig`): the LLVM return type is now
|
||||
built from every output's `out_ty`; after the call, out_place slots are
|
||||
`store`d through their address and out_value slots rebuild the sx result — with a
|
||||
**fast path** (no place outputs → the asm's struct return IS the result, so
|
||||
pure-value asm IR is unchanged). Verified: write-to-local (`get42`→42), struct
|
||||
field (`@p.b`), mixed value+place (`v=10 b=20`), `+` rejected. Locked with
|
||||
`examples/1649-platform-asm-place-output.sx` (mixed, runs on aarch64). `zig build
|
||||
test` green (657 corpus, 446 unit). Files: `src/parser.zig`, `src/ir/inst.zig`,
|
||||
`src/ir/lower/expr.zig`, `src/backend/llvm/ops.zig`, `examples/1649-*`.
|
||||
|
||||
Prior: **F** — global (module-scope) asm. A top-level `asm { "tmpl", };` block (template
|
||||
only) lowers to LLVM `module asm`, and a lib-less `extern` calls into the symbols
|
||||
it defines. New `asm_global` AST node (`src/ast.zig`) + `parseAsmGlobal`
|
||||
(`src/parser.zig`, dispatched from `parseTopLevel` on `kw_asm`) — rejects
|
||||
`volatile` and any operands/clobbers. The node forced (and got) arms in the same
|
||||
three `Node.Data` switches as `asm_expr` (`sema.zig` ×2, `semantic_diagnostics.zig`).
|
||||
`Module` gains a `global_asm: ArrayList([]const u8)` (`src/ir/module.zig`);
|
||||
`lowerMainAndComptime` captures each template (the dead `lowerDecls` is NOT the
|
||||
top-level pass — `lowerRoot` Pass 2 uses `lowerMainAndComptime`); `emit_llvm.zig`'s
|
||||
`emit()` appends each via `LLVMAppendModuleInlineAsm` (source order). Verified
|
||||
end-to-end: an aarch64 `_my_add` global routine called via `extern` returns 42.
|
||||
Locked with `examples/1648-platform-asm-global.sx`
|
||||
(`.build { "aot": true, "target": "macos" }` → AOT build+run on aarch64, ir-only
|
||||
elsewhere). `zig build test` green (656 corpus, 446 unit). **(Correction, later:
|
||||
module asm ALSO runs under the JIT — `sx run` compiles to an in-memory object,
|
||||
the integrated assembler assembles the `module asm` into it, ORC relocates and
|
||||
runs it, so the symbol is resolvable at JIT main execution. The original "AOT
|
||||
only" note was wrong; see 1653 for the JIT sibling. The genuine boundary is a
|
||||
COMPILE-TIME `#run` call into a module-asm symbol, which fails loud via host
|
||||
dlsym-miss — see 1654.)** Files: `src/ast.zig`, `src/parser.zig`, `src/sema.zig`,
|
||||
`src/ir/semantic_diagnostics.zig`, `src/ir/module.zig`, `src/ir/lower/decl.zig`,
|
||||
`src/ir/emit_llvm.zig`, `examples/1648-*`.
|
||||
|
||||
Prior: **E** — multi-output tuples. **Inline asm now returns tuples.** Replaced the
|
||||
N>1 bail with a shared `asmResultType` helper (`src/ir/lower/expr.zig`, mixed
|
||||
into `Lowering`) that derives the result type from the `out_value` operands
|
||||
(0→void, 1→T, N→named tuple, named via the §II.5 effective-name rule). The key
|
||||
realization: `toLLVMType(tuple)` already produces a literal struct `{T1,…,Tn}` —
|
||||
exactly LLVM's multi-output asm return — so **emit needed NO change**; building
|
||||
the op with a tuple result type makes the asm call return the struct, which IS
|
||||
sx's tuple value (destructured by the normal `tuple_get` path). `inferType`'s
|
||||
`.asm_expr` arm now also delegates to `asmResultType` (single owner), so
|
||||
`return asm`, `x := asm`, and `q, r := asm` all agree on the type. Verified
|
||||
end-to-end on aarch64: `split(0x1234)`→`(lo=52, hi=18)`, a udiv/msub divmod→
|
||||
`(3, 2)`. IR is textbook: `call { i64, i64 } asm "divq ${4}",
|
||||
"={rax},={rdx},{rax},{rdx},r,~{cc}"(…)` → extractvalue → tuple. Converted 1640 to
|
||||
the x86_64 multi-output IR lock (ir-only) + added `1647-platform-asm-aarch64-multi`
|
||||
(runs on aarch64). `zig build test` green (655 corpus, 446 unit). Files:
|
||||
`src/ir/lower/expr.zig`, `src/ir/lower.zig`, `src/ir/expr_typer.zig`,
|
||||
`examples/164{0,7}-*`.
|
||||
|
||||
Prior: **C.1 + D** — inline asm CODEGEN (lowering builds the op + LLVM emit). **Inline
|
||||
assembly now runs end-to-end.** `lowerAsmExpr` (`src/ir/lower/expr.zig`) stops
|
||||
bailing: it resolves each operand's effective name (§II.5 auto-naming), interns
|
||||
template/constraints/clobbers, lowers input `Ref`s, derives the result `TypeId`
|
||||
(0→void, 1→T), and builds the `inline_asm` op. Added a `%[name]`-references-a-
|
||||
real-operand check (the last deferred validation). Multi-output (N>1) still bails
|
||||
loudly ("Phase E"). `emitInlineAsm` (`src/backend/llvm/ops.zig`, port of Zig's
|
||||
`airAssembly`): assembles the LLVM constraint string (outputs→inputs→`~{clobber}`,
|
||||
`,`→`|`), rewrites the template (`%[name]`→`${N}`, `%%`→`%`, `$`→`$$`, `%=`→
|
||||
`${:uid}`), then `LLVMGetInlineAsm` + `LLVMBuildCall2` (AT&T). Dispatch wired
|
||||
(`emit_llvm.zig`, replacing the C.0 `@panic`). **`llvm_shim.c`**: added
|
||||
`LLVMInitializeNativeAsmParser()` — the JIT must assemble inline asm at run time.
|
||||
Verified end-to-end: aarch64 `add`/`mov` run on the host (exit 42), `nop volatile`
|
||||
runs (1642 now exit 0), IR is textbook (`call i64 asm "add ${0},${1},${2}",
|
||||
"=r,r,r"(…)`). Locked with `examples/1645-platform-asm-aarch64-add.sx` (runs on
|
||||
aarch64, ir-only elsewhere via `.build` + `.ir`). Also added the `inferType`
|
||||
`.asm_expr` arm (`src/ir/expr_typer.zig`, 0→void / 1→T) — without it a bare
|
||||
`x := asm {…-> T}` binding inferred `.unresolved` and silently produced 0;
|
||||
regression-locked with `examples/1646-platform-asm-value-binding.sx`. Updated
|
||||
1640 (now Phase-E bail) + 1642 (now runs). `zig build test` green (654 corpus,
|
||||
446 unit). Files: `src/ir/lower/expr.zig`, `src/backend/llvm/ops.zig`,
|
||||
`src/ir/emit_llvm.zig`, `src/ir/expr_typer.zig`, `llvm_shim.c`,
|
||||
`examples/164{0,2,5,6}-*`.
|
||||
|
||||
Prior: **C.0** — IR op `inline_asm` (lock; no behavior change). Added `inline_asm:
|
||||
InlineAsm` to the IR `Op` union + the `InlineAsm` struct (`template: StringId`,
|
||||
`operands: []const AsmOperand` {role/name/constraint/operand}, `clobbers:
|
||||
[]const StringId`, `has_side_effects`) in `src/ir/inst.zig` — all strings
|
||||
interned, operands in source order, result on `Inst.ty`. The new variant forced
|
||||
(and got) arms in two exhaustive `Op` switches: `src/ir/interp.zig` (loud
|
||||
`bailDetail` — inline asm is never comptime-evaluable) and `src/ir/print.zig`
|
||||
(IR dump). `src/ir/emit_llvm.zig` gets a `@panic` **tripwire** — emit lands in
|
||||
Phase D, and until then `lowerAsmExpr` still bails so no `inline_asm` op is ever
|
||||
created (reaching emit would be a lowering-switched-over-too-early bug). Unit
|
||||
test `inline_asm op shape` in `src/ir/inst.test.zig`. `zig build test` green
|
||||
(652 corpus, 446 unit). Files: `src/ir/inst.zig`, `src/ir/interp.zig`,
|
||||
`src/ir/print.zig`, `src/ir/emit_llvm.zig`, `src/ir/inst.test.zig`.
|
||||
|
||||
Prior: **B.1** — operand-name validation (design §II.5 auto-naming rule). Extended
|
||||
`lowerAsmExpr` with a `pinnedRegister(constraint)` helper (`"={eax}"`→`eax`,
|
||||
`"+{rax}"`→`rax`, `"=r"`→null) and two checks: (1) **reject the echo form**
|
||||
`[eax] "={eax}"` — a label identical to its own pinned register is redundant
|
||||
(the operand is already auto-named after the register); (2) **reject duplicate
|
||||
operand names** (ambiguous `%[name]` / result field). Locked with
|
||||
`examples/1643-platform-asm-echo-name.sx` + `1644-platform-asm-duplicate-name.sx`.
|
||||
`zig build test` green (652 corpus, 0 failed; 445 unit). Files:
|
||||
`src/ir/lower/expr.zig`.
|
||||
|
||||
Prior: **B.0** — asm shape validation (compile-path diagnostics). Restructured the
|
||||
`.asm_expr` lowering arm into `lowerAsmExpr` (`src/ir/lower/expr.zig`, mixed into
|
||||
`Lowering` in `src/ir/lower.zig`): it validates BEFORE the not-yet-implemented
|
||||
codegen bail, so the user sees the real problem first. Two checklist items now
|
||||
enforced with named diagnostics: (1) **template must be a compile-time-known
|
||||
string** (`"..."` / `#string`); (2) **no value outputs ⇒ must be `volatile`**
|
||||
(mirrors Zig — a result-less asm could be deleted). Valid shapes still bail with
|
||||
the "codegen not yet implemented" message. Result-type derivation + auto-naming
|
||||
stay deferred to a later step (observable only once Phase C produces a real IR
|
||||
op). Locked with `examples/1641-platform-asm-missing-volatile.sx` (volatile
|
||||
error) + `1642-platform-asm-nop-volatile.sx` (volatile no-output accepted →
|
||||
codegen bail). `zig build test` green (650 corpus, 0 failed; 445 unit). Files:
|
||||
`src/ir/lower/expr.zig`, `src/ir/lower.zig`, `examples/164{1,2}-*`.
|
||||
|
||||
Prior: **A.1** — parse `asm { … }` + loud lowering bail (folded A.1+A.2 into one honest
|
||||
lock commit, since the loud bail IS current correct behavior — cadence option
|
||||
(a)). Added `AsmExpr`/`AsmOperand` to `src/ast.zig` + the `asm_expr` `Node.Data`
|
||||
arm; `parseAsmExpr` in `src/parser.zig` (`parsePrimary` `.kw_asm` dispatch) —
|
||||
parses the template, flat operand list (`[name]? "constraint" -> Type` value
|
||||
output / `= expr` input), and `clobbers(.…)`; `volatile`/`clobbers` recognized
|
||||
contextually via `isContextualWord`. The new `asm_expr` tag forced (and got)
|
||||
arms in three exhaustive `Node.Data` switches: `src/sema.zig` `analyzeNode` +
|
||||
`findNodeAtOffset`, `src/ir/semantic_diagnostics.zig` `checkBindingNames` (all
|
||||
recurse into template + operand payloads). Lowering bails LOUD + named in
|
||||
`src/ir/lower/expr.zig` ("inline assembly codegen is not yet implemented…") via
|
||||
an explicit `.asm_expr` arm (not the generic `unknown_expr` else) returning
|
||||
`emitPlaceholder`. `-> @place` write-through is rejected with a clear "Phase 2"
|
||||
parse error. Locked with `examples/1640-platform-asm-parse.sx` (multi-output
|
||||
`divmod`, named operands, register pins, clobbers — parses then bails; called
|
||||
from `main`). `zig build test` green (648 corpus, 0 failed; 445 unit). Files:
|
||||
`src/ast.zig`, `src/parser.zig`, `src/sema.zig`, `src/ir/semantic_diagnostics.zig`,
|
||||
`src/ir/lower/expr.zig`, `examples/1640-*`.
|
||||
|
||||
Prior: **A.0** — `kw_asm` keyword (first compiler code). Added the `kw_asm` `Token.Tag`
|
||||
variant + `.{ "asm", .kw_asm }` keyword-map entry in `src/token.zig`; `volatile` /
|
||||
`clobbers` deliberately stay OUT of the global table (contextual). New exhaustive
|
||||
`Tag` switch in `src/lsp/server.zig` `classifyToken` flagged the missing arm (the
|
||||
intended coverage tripwire) — added `.kw_asm` to the keyword group. Lock test in
|
||||
new `src/lexer.test.zig` (`asm`→`kw_asm`, `volatile`/`clobbers`→`identifier`),
|
||||
wired into the `src/root.zig` barrel as `lexer_tests`. `zig build test` green (648
|
||||
corpus, 0 failed; 445 unit, 0 failed — +1). Files: `src/token.zig`,
|
||||
`src/lexer.test.zig`, `src/root.zig`, `src/lsp/server.zig`.
|
||||
|
||||
Prior: **0.2** — CLAUDE.md docs for `<name>.build`; **Phase 0 COMPLETE**.
|
||||
**0.1** — corpus runner **ir-only branch** for cross-target examples. Replaced
|
||||
0.0's loud placeholder bail: when `cfg.target` doesn't match the host (`ir_only`),
|
||||
`sweepRoot` skips run/build/exec and verifies via `sx ir --target` only —
|
||||
asserting `.exit` (ir cmd) + `.ir` (normalized stdout) + `.stderr`, never
|
||||
`.stdout` (write skipped in update mode, assertion skipped in verify mode). An
|
||||
`.ir` snapshot is **required** in ir-only mode — its absence is a loud failure
|
||||
("needs an .ir snapshot for ir-only mode"). Locked with
|
||||
`examples/1639-platform-target-cross.sx` (asm-free `main :: () -> i64 { return 0;
|
||||
}`), `.build` `{ "target": "x86_64-linux" }`, + checked-in `.ir`. Verified both
|
||||
guards fire: corrupting the `.ir` → IR mismatch; deleting it → the require-failure.
|
||||
`zig build test` green (647 corpus, 0 failed; 444 unit). Files:
|
||||
`src/corpus_run.test.zig`, `examples/1639-*`.
|
||||
|
||||
## Current state
|
||||
Design fully converged (`docs/inline-asm-design.md`). Feasibility confirmed:
|
||||
`llvm_api.c.*` exposes `LLVMGetInlineAsm` / `LLVMBuildCall2` /
|
||||
`LLVMAppendModuleInlineAsm` (LLVM@19). No code written.
|
||||
**Inline assembly works end-to-end: 0, 1, and N value outputs (tuples).** Full
|
||||
pipeline: lex (A.0) → parse (A.1) → validate (B.0/B.1 + `%[name]` check) → IR op
|
||||
(C.0) → lower-builds-op + LLVM emit + JIT asm-parser init (C.1/D) → multi-output
|
||||
tuples (E). Register-class + register-pinned operands, inputs, **symbol operands
|
||||
(`"s"` → direct `bl`/`call` to a function/global by mangled name)**, clobbers,
|
||||
`#string` multi-instruction templates, `%[name]`/`%%` rewriting, and the §II.5
|
||||
auto-naming rule all work and execute on the host JIT. Global `asm { … }` (Phase F) works via
|
||||
lib-less `extern` under BOTH the JIT (`sx run` → 1653) and AOT (1648) — `sx run`
|
||||
compiles to an object, so the integrated assembler bakes the `module asm` symbol
|
||||
in and ORC resolves it. All three `-> @place` output forms now work and execute
|
||||
on aarch64: **write-through** `=` (Phase 2), **read-write** `+` (tied input), and
|
||||
**indirect-memory** `=*m` (pointer arg + `elementtype`, asm writes through it).
|
||||
**Inline assembly is now feature-complete — no substantive features remain.** The
|
||||
x86_64 syscall-write ir-only example is DONE (1651). Global asm runs under both
|
||||
JIT (1653) and AOT (1648). `readme.md` now has an "Inline Assembly" section.
|
||||
|
||||
Known orthogonal bug: **issue 0137** — `sx run` on a program with no `main`
|
||||
segfaults (`src/target.zig:256-273`, unguarded JIT entry lookup). Pre-existing,
|
||||
asm-independent; does NOT block the ASM stream (every example has a `main`).
|
||||
|
||||
Phase E–F feasibility already confirmed against the live tree
|
||||
(`LLVMGetInlineAsm` / `LLVMBuildCall2` / `LLVMAppendModuleInlineAsm` in LLVM@19
|
||||
`Core.h`; ERR-stream `extractvalue`→tuple in `emit_llvm.zig:726-927`; lib-less
|
||||
`extern`, 60 sites; `--target` a global CLI flag).
|
||||
|
||||
## Next step
|
||||
**A.0** — add the `kw_asm` keyword (`src/token.zig` Tag + `StaticStringMap`) and a
|
||||
unit lex test. Then A.1 (parse `asm { … }` → `AsmExpr`, lowering bails loudly).
|
||||
**Inline assembly is feature-complete.** All substantive features are done:
|
||||
0/1/N value outputs (tuples), register-class + pinned operands, inputs, clobbers,
|
||||
`#string` templates, `%[name]`/`%%`/`$`/`%=` rewriting, §II.5 auto-naming, global
|
||||
`asm { … }` (AOT), and all three `-> @place` output forms — write-through (`=`),
|
||||
read-write (`+`), and indirect-memory (`=*m`). The x86_64 syscall-write ir-only
|
||||
example (1651) and the output-to-`const` rejection (issue 0138) are also done.
|
||||
|
||||
Global asm runs under BOTH the JIT (`sx run` → object → ORC; 1653) and AOT (1648)
|
||||
— the earlier "AOT only / `sx run` mishandles module-asm" note was stale and has
|
||||
been corrected. The one genuine boundary is a COMPILE-TIME `#run` into a
|
||||
module-asm symbol: the interpreter resolves externs via host dlsym, the symbol
|
||||
isn't linked yet, so it already fails loud (`comptime extern call: symbol not
|
||||
found via dlsym`) — pinned by 1654.
|
||||
|
||||
Remaining work, all **polish** (optional):
|
||||
- None substantive. Possible niceties: tighten the `#run`-into-module-asm error
|
||||
text to name module-asm specifically; broaden clobber validation to a checked
|
||||
per-arch enum (design doc Phase 4).
|
||||
|
||||
Orthogonal: **issue 0137** (no-`main` JIT segfault).
|
||||
|
||||
Done since last: output-to-`const` rejection (issue 0138), x86_64 syscall-write
|
||||
ir-only example (1651).
|
||||
|
||||
Orthogonal: **issue 0137** (no-`main` segfault).
|
||||
|
||||
## Log
|
||||
- (init) Plan + design doc written; ASM stream opened.
|
||||
- (0.0) Corpus runner target-gating: `<name>.build` JSON config (replaces `.aot`
|
||||
marker), `--target` threading, `hostMatchesTarget` execute-gate, loud
|
||||
cross-target placeholder bail. Migrated 1226/1227 `.aot`→`.build`; locked with
|
||||
1638 fixture + unit tests. `zig build test` green.
|
||||
- (0.1) ir-only branch: cross-target examples verify via `sx ir --target` only
|
||||
(exit+ir+stderr, no stdout; `.ir` required). Locked with 1639 fixture; verified
|
||||
corrupt-.ir → mismatch and missing-.ir → loud failure. `zig build test` green.
|
||||
- (0.2) docs: CLAUDE.md documents `<name>.build` JSON sidecar (aot + target +
|
||||
ir-only gating), replacing stale `.aot` marker prose. **Phase 0 COMPLETE.**
|
||||
- (A.0) `kw_asm` keyword in token.zig (+ map entry); LSP `classifyToken` switch
|
||||
coverage; lock test in new `lexer.test.zig` (wired via root.zig). `volatile` /
|
||||
`clobbers` stay contextual identifiers. `zig build test` green (445 unit, +1).
|
||||
- (A.1) parse `asm { … }` → `AsmExpr` + loud lowering bail; `asm_expr` arms in 3
|
||||
exhaustive `Node.Data` switches; `-> @place` rejected (Phase 2). Adopted operand
|
||||
auto-naming rule (design §II.5). Locked with 1640 fixture. Filed orthogonal
|
||||
issue 0137 (no-`main` JIT segfault). `zig build test` green (648 corpus, 445 unit).
|
||||
- (B.0) asm shape validation in `lowerAsmExpr`: comptime-string template +
|
||||
no-output⇒volatile, with named diagnostics before the codegen bail. Locked with
|
||||
1641 (volatile error) + 1642 (volatile accepted). `zig build test` green (650
|
||||
corpus, 445 unit).
|
||||
- (B.1) operand-name validation: `pinnedRegister` helper + reject echo form
|
||||
(`[eax] "={eax}"`) and duplicate names. Locked with 1643 + 1644. `zig build
|
||||
test` green (652 corpus, 445 unit).
|
||||
- (C.0) IR op `inline_asm: InlineAsm` + interp `bailDetail` + print arm + emit
|
||||
`@panic` tripwire (Phase D). No behavior change (lowering still bails). Unit
|
||||
test `inline_asm op shape`. `zig build test` green (652 corpus, 446 unit).
|
||||
- (C.1+D) CODEGEN — `lowerAsmExpr` builds the op (effective names, interned
|
||||
strings, input Refs, 0/1 result type) + `%[name]` validation; `emitInlineAsm`
|
||||
(constraint string + template rewrite + `LLVMGetInlineAsm`/`BuildCall2`, AT&T);
|
||||
`inferType` arm; `LLVMInitializeNativeAsmParser` for the JIT. **Inline asm runs
|
||||
end-to-end.** N>1 bails (Phase E). Locked with 1645 (aarch64 add, runs) + 1646
|
||||
(`:=` binding); updated 1640/1642. `zig build test` green (654 corpus, 446 unit).
|
||||
- (E) multi-output tuples — `asmResultType` helper (0→void/1→T/N→named tuple),
|
||||
shared by lowering + `inferType`. `toLLVMType(tuple)` == LLVM multi-output
|
||||
struct, so emit unchanged; the asm struct return IS the sx tuple. Runs on
|
||||
aarch64 (1647: `split`→`(lo,hi)`); 1640 → x86 multi-output IR lock (ir-only).
|
||||
`zig build test` green (655 corpus, 446 unit).
|
||||
- (F) global asm — `asm_global` AST node + `parseAsmGlobal` (top-level, rejects
|
||||
volatile/operands); `Module.global_asm` captured in `lowerMainAndComptime`;
|
||||
`emit()` appends via `LLVMAppendModuleInlineAsm`; call-into via lib-less
|
||||
`extern`. AOT-verified (1648, `_my_add`→42). `zig build test` green (656 corpus).
|
||||
- (docs) readme.md "Inline Assembly" section (b8800a2).
|
||||
- (2) `-> @place` write-through — `out_place` operand; `out_ty` on the IR
|
||||
AsmOperand; `emitInlineAsm` builds the combined output struct + splits
|
||||
(out_place → store-through, out_value → result), fast-path when no places.
|
||||
`+`/`*` rejected. Locked with 1649 (mixed, runs). `zig build test` green (657
|
||||
corpus, 446 unit).
|
||||
- (G) read-write `+` place outputs — `+` lowers to an output `=` + a tied input
|
||||
(output-index constraint) seeded with the place's loaded value, tied inputs
|
||||
appended last (operand indices undisturbed). `appendAsmConstraints` rewrites
|
||||
`+`→`=`; `emitInlineAsm` grows args by the rw count + loads seeds;
|
||||
`asmIsReadWrite` helper. Lowering stops rejecting `+` (`*` still rejected). Two
|
||||
commits (cadence): 1650 locked the rejection, then flipped to a runnable
|
||||
aarch64 example (`"=r,0"` IR). `zig build test` green (658 corpus, 446 unit).
|
||||
- (0138) output-to-`const` rejection — fixed the underlying general bug: scalar
|
||||
`@const` (address-of a folded `::` constant) reinterpreted the value as a
|
||||
pointer (`inttoptr`). `src/ir/lower/expr.zig` `.address_of` now diagnoses a
|
||||
scalar const (local + module) instead of falling through; array/struct consts
|
||||
keep storage. asm `-> @const` gets the clean diagnostic for free (same path).
|
||||
Regression `examples/1177-diagnostics-addr-of-const-rejected.sx`. Issue 0138
|
||||
RESOLVED. `zig build test` green (659 corpus, 446 unit).
|
||||
- (x86 syscall) x86_64 Linux `write(2)` via raw `syscall` — locks the constraint
|
||||
string `={rax},{rax},{rdi},{rsi},{rdx},~{rcx},~{r11},~{memory}` (register-pinned
|
||||
inputs + pinned value output + pointer input + clobbers). ir-only on aarch64
|
||||
(`.ir` asserted), runs on x86_64-linux (hand-authored `"ok\n"` stdout).
|
||||
`examples/1651-platform-asm-x86-syscall-write.sx`. Pure additive lock, no
|
||||
compiler change. `zig build test` green (660 corpus, 446 unit).
|
||||
- (G indirect) indirect-memory `=*m` place outputs — the place address is passed
|
||||
as an opaque `ptr` arg (with an `elementtype(T)` call-site attr), placed before
|
||||
inputs; asm writes through it; no return slot; store-back skips it.
|
||||
`asmIsIndirect` helper; lowering stops rejecting `*`. Verified by running on
|
||||
aarch64 (store-through → 42; mixed indirect+value+input → `"=*m,=r,r"`). Two
|
||||
commits (cadence): 1652 locked the rejection, then flipped to a runnable aarch64
|
||||
example. **Inline asm now feature-complete.** `zig build test` green (661 corpus,
|
||||
446 unit).
|
||||
- (jit) explored "asm in JIT": found it ALREADY works — `sx run` emits an
|
||||
in-memory object (integrated assembler bakes in both in-function inline asm and
|
||||
`module asm`), then ORC relocates+runs it. The stale "AOT only / `sx run`
|
||||
mishandles module-asm" checkpoint prose was corrected. Locked global-asm-under-
|
||||
JIT with `examples/1653-platform-asm-global-jit.sx` (`{ "target": "macos" }`, no
|
||||
aot, → 42). `zig build test` green (662 corpus, 446 unit).
|
||||
- (comptime guard) pinned the one genuine module-asm boundary:
|
||||
`examples/1654-platform-asm-global-comptime-call.sx` — `#run` into a module-asm
|
||||
symbol fails loud (`comptime extern call: symbol not found via dlsym`) because
|
||||
the interpreter resolves externs via host dlsym before link. Arch-independent
|
||||
(no `.build`). `zig build test` green (663 corpus, 446 unit).
|
||||
- (round trip) `examples/1655-platform-asm-callback-into-sx.sx` — global-asm
|
||||
trampoline that `bl _cb` back into an `export`ed sx function (sx→asm→sx, → 42).
|
||||
Documented that `export` (external linkage + C symbol + C ABI) is what makes
|
||||
the callback resolvable; `callconv(.c)` alone leaves it `internal` (DCE'd).
|
||||
`zig build test` green (664 corpus, 446 unit).
|
||||
- (symbol ops) symbol operands (`"s"`) — feed a function/global symbol; the
|
||||
template emits its platform-mangled name so `bl %[fn]` is a DIRECT branch (one
|
||||
fewer indirection than register-indirect `blr`, portable — no hardcoded `_`).
|
||||
Emit passes the operand with its own llvm type (LLVMTypeOf), no coercion
|
||||
(`asmIsSymbol` helper); lowering lowers the function RHS to `ptr @fn`. Decided
|
||||
AGAINST mirroring Zig (which has no symbol operand — 483 std asm sites, none
|
||||
call a function) because the direct `bl` matters. Two commits (cadence): 1656
|
||||
locked the rejection (replacing an LLVM-verifier crash), then implemented +
|
||||
flipped to a runnable aarch64 example (objdump-confirmed direct `bl <_cb>`).
|
||||
`zig build test` green (665 corpus, 446 unit).
|
||||
- (x86 cross-arch) ir-only x86_64 siblings so each emit path is locked on BOTH
|
||||
arches: 1657 read-write (`"incq ${0}","=r,0"`), 1658 indirect (`"movq $$42,
|
||||
${0}","=*m"`(ptr elementtype)), 1659 symbol (`"call ${2:P}"`, direct call). x86
|
||||
templates validated by cross-emitting an object (integrated assembler accepts;
|
||||
objdump confirms 1659's direct `call` reloc). Pure additive locks. `zig build
|
||||
test` green (668 corpus, 446 unit).
|
||||
- (symbol portability) made `%[fn]` portable across arches — `renderAsmTemplate`
|
||||
auto-injects LLVM's `:c` modifier (`${N}`→`${N:c}`) for symbol (`"s"`) operands
|
||||
lacking an explicit modifier (`asmNamedIsSymbol` helper). Without it x86 renders
|
||||
`$cb` (a bad `call` target needing a hand-written `:P`); aarch64 unaffected.
|
||||
Verified `:c` ≡ `:P` for x86-64 calls (both → `R_X86_64_PLT32`). Explicit
|
||||
`%[fn:X]` still wins (escape hatch). 1659 dropped its `:P` → same plain `%[fn]`
|
||||
as aarch64 1656; both IRs regen to `${N:c}`. `zig build test` green (668 corpus,
|
||||
446 unit).
|
||||
|
||||
## Known issues
|
||||
None yet.
|
||||
- **0138** — RESOLVED. `@const` (address-of a `::` comptime constant) yielded a
|
||||
wild pointer (`inttoptr (i64 <value> to ptr)`). Fixed by diagnosing scalar
|
||||
`@const` in `src/ir/lower/expr.zig` `.address_of` (no storage; array/struct
|
||||
consts unaffected). Delivered the ASM "output-to-`const` rejection" for free.
|
||||
Regression `examples/1177-diagnostics-addr-of-const-rejected.sx`.
|
||||
- **0137** — `sx run` on a program with no `main` segfaults (unguarded JIT entry
|
||||
lookup, `src/target.zig:256-273`). Pre-existing, asm-independent. Filed
|
||||
`issues/0137-jit-run-no-main-segfault.md`. Does not block A.1.
|
||||
|
||||
32
current/CHECKPOINT-REIFY.md
Normal file
32
current/CHECKPOINT-REIFY.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# CHECKPOINT-REIFY — comptime `type_info` / `reify` (async-first foundation, step 3)
|
||||
|
||||
Companion to [PLAN-REIFY.md](PLAN-REIFY.md). Update after every step (one step at a
|
||||
time, per the cadence rule).
|
||||
|
||||
## Last completed step
|
||||
**None — stream just carved.** Design validated (3 codebase reviewers; all five reify
|
||||
contracts confirmed feasible). No code written yet.
|
||||
|
||||
## Current state
|
||||
- The plan + the five locked contracts exist in `PLAN-REIFY.md`; design-of-record is
|
||||
`design/execution-evolution-roadmap.md` §7 step 3 + §8.1.
|
||||
- **Nothing built.** `reify`/`type_info`/`field_type` do not exist in the compiler.
|
||||
- Confirmed against the source (anchors in the plan): type minting via
|
||||
`intern`/`internNominal` is programmatic and AST-free; type-fns memoize by mangled
|
||||
name; enum codegen is fully type-table-driven (zero AST coupling); recursive
|
||||
forward-declaration (reserve→complete) already exists for source types.
|
||||
|
||||
## Next step
|
||||
**Phase 0.0 (lock):** add `TypeInfo`/`EnumInfo`/`EnumVariant` data types + bodyless
|
||||
`#builtin` decls for `reify`/`type_info`/`field_type` to `library/modules/std/core.sx`
|
||||
(parsed, unimplemented → loud bail), with a unit test that the decls parse. Then 0.1
|
||||
(xfail: `examples/06xx-comptime-reify-enum.sx`) → 0.2 (green: implement `reify(.enum_)`).
|
||||
|
||||
## Known issues
|
||||
None yet.
|
||||
|
||||
## Log
|
||||
- **Stream carved.** Selected as the first async-first foundation: `reify` gates both
|
||||
channel result types (`RecvResult($T)`) and `race`'s synthesized union, is fully
|
||||
validated (3 reviewers), and is a self-contained compiler/type-system feature
|
||||
testable in isolation (`06xx` comptime). Generic-enum syntax dropped in its favor.
|
||||
@@ -1,6 +1,6 @@
|
||||
# sx Inline Assembly — Implementation Plan (ASM stream)
|
||||
|
||||
**Design source of truth:** [docs/inline-asm-design.md](../docs/inline-asm-design.md).
|
||||
**Design source of truth:** [design/inline-asm-design.md](../design/inline-asm-design.md).
|
||||
This plan turns that doc's §II.7 stage-map + §II.8 phasing into ordered,
|
||||
commit-sized, testable steps. Read the design doc first — this file is the
|
||||
*how/when*, not the *what/why*.
|
||||
@@ -22,8 +22,93 @@ outputs return a tuple; templates are pure AT&T (via LLVM).
|
||||
## Cadence (IMPASSIBLE)
|
||||
No commit may both add a test AND make it pass. Each feature step is either a
|
||||
behavior-locking PASSING test, or an xfail test the *next* commit turns green.
|
||||
Arch-pinned tests live in `examples/16xx-platform-asm-*` (must declare `target=`).
|
||||
Never regenerate snapshots while red.
|
||||
Arch-pinned tests live in `examples/16xx-platform-asm-*` and declare their target
|
||||
via the `expected/<name>.target` sidecar marker (Phase 0). Never regenerate
|
||||
snapshots while red.
|
||||
|
||||
## Phase 0 — corpus target-gating (test-infra prerequisite; no compiler code)
|
||||
**Why first.** The flagship v1 examples are `x86_64` (syscall-write, divmod,
|
||||
cpuid) but the dev host is `aarch64`-Darwin, and the corpus runner
|
||||
([src/corpus_run.test.zig](../src/corpus_run.test.zig)) currently (a) never threads
|
||||
a per-example `--target` and (b) has no host-arch gate — its only skip is "marker
|
||||
has no `.sx`". So D.0's `…-syscall-write` markers asserting exit/stdout describe
|
||||
output the harness *cannot* produce on this host, which would violate the cadence
|
||||
rule (the "next commit turns it green" can never happen). Phase 0 closes that gap.
|
||||
It touches **only the runner + two fixtures** — zero compiler code, zero risk to
|
||||
A–E, and unblocks every arch-pinned asm example.
|
||||
|
||||
**Marker taxonomy (the cleanup).** The runner currently spreads per-example
|
||||
*directives* across standalone boolean/value sidecars (`.aot` now, `.target`
|
||||
proposed, more later). Replace that sprawl with **one optional config file,
|
||||
`expected/<name>.build`**, holding all build/run directives; the output snapshots
|
||||
(`.exit` / `.stdout` / `.stderr` / `.ir`) stay separate — they are
|
||||
machine-regenerated data, not config. `.exit` remains the **test-discovery key**
|
||||
(every test has one; `.build` is optional).
|
||||
|
||||
**`.build` format** — JSON, parsed with `std.json`:
|
||||
```json
|
||||
{ "aot": true, "target": "x86_64-linux" }
|
||||
```
|
||||
Parse via `std.json.parseFromSlice(BuildConfig, …)` into
|
||||
`struct { aot: bool = false, target: ?[]const u8 = null }`. Field defaults cover
|
||||
omitted keys; `std.json`'s default `ignore_unknown_fields = false` makes an
|
||||
**unknown key a loud `error.UnknownField`** (surfaced as a runner failure, never a
|
||||
silent ignore — CLAUDE.md no-silent-default rule). Extensible: future `"cpu"`,
|
||||
`"link"`, `"cwd"` are just new optional struct fields, no new sidecar file and no
|
||||
custom parser.
|
||||
|
||||
**What the directives do:**
|
||||
|
||||
1. **`target = <triple|shorthand>`** threads `--target <value>` into every `sx`
|
||||
invocation for that example (`run` / `build` / `ir` — `--target` is a global
|
||||
flag, confirmed [main.zig:39](../src/main.zig#L39)), AND **host-match selects
|
||||
the mode.** The runner parses the leading `arch` + `os` tokens of the resolved
|
||||
triple and compares them to `@import("builtin").target` (normalizing
|
||||
`arm64`→`aarch64`):
|
||||
- **match** → *execute* exactly as today (`sx run`, or `aot` build+exec) with
|
||||
the target threaded, plus the `.ir` diff if an `.ir` snapshot exists. ⇒ an
|
||||
x86_64 example gives **real end-to-end coverage on an x86_64 CI runner**.
|
||||
- **mismatch** → **ir-only**: run *only* `sx ir <file> --target <t>`; assert
|
||||
`.exit` (the ir command's exit), `.ir` (normalized stdout), and `.stderr`
|
||||
(diagnostics, normally empty). Do **not** run/build/exec; do **not** assert
|
||||
`.stdout`. An `.ir` snapshot is **required** in ir-only mode — its absence is
|
||||
a loud runner failure ("arch-pinned <name>: ir-only mode requires an .ir
|
||||
snapshot"), never a silent pass. Robust even if `sx ir` treats `--target` as
|
||||
a partial no-op: the `inline_asm` op carries the template + constraint string
|
||||
verbatim, so the IR snapshot still locks the exact thing §II.11 flags as
|
||||
silently-miscompiling (the constraint assembler + template rewrite).
|
||||
2. **`aot`** is the existing JIT-vs-build+exec switch, just relocated from the
|
||||
standalone `.aot` marker into `.build`.
|
||||
|
||||
**Negative compile-error examples need NO `.build`.** `…-missing-volatile`
|
||||
(no-output-without-`volatile`) is a Sema diagnostic raised before codegen/JIT, so
|
||||
plain `sx run` reports it identically on any host — it stays a normal example with
|
||||
no config file.
|
||||
|
||||
**update-goldens interaction:** in ir-only mode, `-Dupdate-goldens` writes `.exit`
|
||||
(ir exit) + `.ir` (+ `.stderr` if non-empty) and skips `.stdout`. Execute mode
|
||||
(incl. `aot`) is unchanged. `.build` is hand-authored — update-goldens never
|
||||
writes it.
|
||||
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 0.0 | lock | Add `BuildConfig` + `std.json` parse of `expected/<name>.build` (unknown-key ⇒ `error.UnknownField`); **migrate** the 2 existing `.aot` markers → `.build` (content `{ "aot": true }`) and delete them; thread `target`'s `--target` into the spawned argv; add `hostMatchesTarget(value) bool` (arch+os token parse, `arm64`→`aarch64`) gating the **execute** path. Lock with `examples/16xx-platform-target-host.sx` (trivial `main`) + a `.build` `{ "target": "<host arch triple>" }` (still runs+passes) and unit `test`s for the JSON parse + `hostMatchesTarget`. | `src/corpus_run.test.zig`, `examples/expected/1226-*.{aot→build}`, `…/1227-*`, + fixture |
|
||||
| 0.1 | lock | Implement the **mismatch ⇒ ir-only** branch (skip run/build/exec; assert `.exit`+`.ir`+`.stderr` from `sx ir --target`; require `.ir`). Lock with `examples/16xx-platform-target-cross.sx` (asm-free `() -> i64 { return 0; }`) + `.build` `{ "target": "x86_64-linux" }` + a checked-in `.ir` snapshot — exercises ir-only on the arm64 host. | `src/corpus_run.test.zig` + fixture |
|
||||
| 0.2 | docs | Update CLAUDE.md §"Test layout"/§"Testing" to document `.build` (format + `aot`/`target` keys) replacing the standalone `.aot` marker prose (lines ~435, ~492). | `CLAUDE.md` |
|
||||
|
||||
Both 0.0 and 0.1 are **lock** commits: the runner change and the fixture that
|
||||
exercises it land together and pass the moment they land (the mechanism works
|
||||
immediately — nothing is left red), which is the cadence rule's "lock in current
|
||||
behavior" flavor, not a feature red→green. No asm lowering is gated on either.
|
||||
|
||||
**Phase 0 verification:** `zig build test` green; deliberately corrupt the
|
||||
cross-target `.ir` fixture and confirm the runner reports an IR mismatch (proves
|
||||
ir-only actually asserts, isn't a no-op); delete it and confirm the
|
||||
"requires an .ir snapshot" failure fires.
|
||||
|
||||
**Estimated runner delta:** ~70–90 lines (sidecar read + `--target` argv threading
|
||||
+ `hostMatchesTarget` + the ir-only branch + update-mode tweak). Within the
|
||||
"no step > ~500 new lines" rule; well under the read budget.
|
||||
|
||||
## Phase A — keyword + AST + parser (parses; no codegen)
|
||||
| Step | Commit | What | Files |
|
||||
|
||||
124
current/PLAN-DIST.md
Normal file
124
current/PLAN-DIST.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# PLAN-DIST — bundle `zig` as sx's hermetic link/libc backend
|
||||
|
||||
## Goal
|
||||
|
||||
`sx build` produces a native binary by driving a **bundled `zig`**
|
||||
(`zig cc`) as the linker, so a distributed sx on Linux needs no system
|
||||
`cc`/lld/libc/CRT. `sx run` (JIT) is unaffected — it never links.
|
||||
|
||||
This is the "be like Zig" move: reuse Zig's hermetic toolchain (lld +
|
||||
crt objects + musl/glibc, all bundled in the `zig` distribution) instead
|
||||
of building our own lld-in-process + libc-from-source pipeline.
|
||||
|
||||
> **Configuration surface** (env vars, flags, resolution order,
|
||||
> activation truth table, target→ABI map, distribution layout) is
|
||||
> specified in [../design/bundled-zig-link-backend-design.md](../design/bundled-zig-link-backend-design.md) — the design-of-record
|
||||
> for how the backend is configured. Keep the two files in sync.
|
||||
|
||||
## Locked decisions
|
||||
|
||||
1. **Default Linux output ABI = static musl** (`x86_64-linux-musl`,
|
||||
`-static`). Output runs on ANY Linux with zero deps — the property
|
||||
that makes Zig binaries portable. glibc/dynamic only via explicit
|
||||
`--target x86_64-linux-gnu`.
|
||||
2. **Activation = auto** when a bundled/resolvable `zig` exists AND the
|
||||
user passed no `--linker`. Falls back to system `cc` otherwise.
|
||||
3. **Dev uses PATH `zig`** (0.16.0 already installed). Defer copying a
|
||||
vendored toolchain into `libexec/` until Phase 3 packaging.
|
||||
|
||||
## Why `zig cc`, not raw `ld.lld`
|
||||
|
||||
`zig cc` is a clang-compatible driver, so it slots into the **existing**
|
||||
cc-style argv branch in `src/target.zig` almost unchanged, and supplies
|
||||
lld + crt objects + musl/glibc automatically per `-target`. Driving
|
||||
`ld.lld` directly would force us to locate/pass crt1.o/crti.o/libc
|
||||
ourselves — exactly the work we're avoiding.
|
||||
|
||||
## Key code anchors (verified)
|
||||
|
||||
- Linker selection hook: `TargetConfig.getLinker()` — `src/target.zig:194-196`
|
||||
(`self.linker orelse "cc"`).
|
||||
- Unix `cc`-style link branch: `src/target.zig:524-564` (this is where
|
||||
the zig backend hooks in; `-o`/`-L`/`-l`/extra objects already pass
|
||||
through clang-compatibly).
|
||||
- Exe-relative resolution pattern to mirror for finding zig:
|
||||
`src/imports.zig:204-227` (`discoverStdlibPaths`, `$SX_STDLIB_PATH`
|
||||
override + `<exe>/..` candidates).
|
||||
- `--linker` CLI flag parsing: `src/main.zig:87-90`.
|
||||
- Emit triple (must agree with link target): `src/ir/emit_llvm.zig`
|
||||
(`LLVMSetTarget`, ~L246-284).
|
||||
|
||||
## Phases
|
||||
|
||||
### Phase 0 — Resolve a bundled/host zig
|
||||
- New `src/zig_backend.zig`: `discoverZig(alloc) -> ?[]const u8`.
|
||||
Resolution order:
|
||||
1. `$SX_ZIG` env override.
|
||||
2. `<exe>/../libexec/zig/zig` (install layout, Phase 3).
|
||||
3. `<exe>/../../zig-bundle/zig` (dev vendored layout, Phase 3).
|
||||
4. `zig` on `PATH` (dev fallback — active now).
|
||||
- Add `SX_DEBUG_ZIG` trace, matching existing `SX_DEBUG_*` hooks.
|
||||
- No behavior change yet; just resolution + a debug/print hook to confirm.
|
||||
|
||||
### Phase 1 — `zig cc` link backend (core change)
|
||||
- `src/target.zig`: generalize the linker from a single token to a
|
||||
**driver argv**. Today `getLinker()` returns one string at `argv[0]`;
|
||||
introduce a `LinkBackend` so the internal backend contributes
|
||||
`{zigPath, "cc"}` as leading entries.
|
||||
- In the Unix branch (L524-564), when backend = zig:
|
||||
- prepend `zig cc`,
|
||||
- append `-target <mapped triple>`,
|
||||
- add `-static` for musl,
|
||||
- everything else (`-o`, `-L`, `-l`, extra objects, extra link flags)
|
||||
passes through unchanged.
|
||||
- Add `sxTripleToZig()` mapping (sx shorthand/triple → zig `-target`);
|
||||
unspecified-on-Linux → `x86_64-linux-musl`.
|
||||
- Align emit triple: when the zig backend is selected, set the LLVM
|
||||
module triple in `emit_llvm.zig` to match the link target
|
||||
(x86_64-linux), so the `.o` links cleanly against musl crt.
|
||||
|
||||
### Phase 2 — Activation
|
||||
- Auto-enable: if `discoverZig()` succeeds and no `--linker` override,
|
||||
use the zig backend for `sx build`. System `cc` remains the fallback.
|
||||
- Optional explicit `--self-contained` / `--no-self-contained` to force.
|
||||
- Confirm `sx run`/JIT path is untouched (no link step).
|
||||
|
||||
### Phase 3 — Distribution packaging
|
||||
- `build.zig`: a `dist` step assembling
|
||||
- `bin/sx` (built with `-Dstatic-llvm`),
|
||||
- `libexec/zig/` (vendored zig binary **and its `lib/`**, copied from a
|
||||
pinned ziglang.org release per host arch),
|
||||
- `library/` (stdlib),
|
||||
into a relocatable tarball.
|
||||
- Pin the zig version (currently 0.16.0).
|
||||
|
||||
### Phase 4 — Verify & lock
|
||||
- Manual first: `sx build hello.sx` (auto zig backend) then `file`/`ldd`
|
||||
the output → expect "statically linked".
|
||||
- Honor snapshot-integrity + FFI-cadence rules before adding a corpus
|
||||
test (host/arch-gated, likely a `.build` sidecar).
|
||||
|
||||
## Risks / watch
|
||||
|
||||
- **Bundle size**: zig + its `lib/` ≈ 50–60 MB.
|
||||
- **gnu vs musl ABI**: pure codegen objects link fine against musl;
|
||||
TLS/stack-protector are the only realistic friction. Aligning the emit
|
||||
triple (Phase 1) covers the common path.
|
||||
- **macOS/Windows cross** via the same `zig cc -target` is nearly free
|
||||
after Phase 1, but Apple-SDK linking has caveats — scope to Linux
|
||||
target first; treat the rest as follow-up.
|
||||
- **c_import.zig** also shells `cc` for C imports (JIT). Out of scope
|
||||
here; same backend can absorb it later.
|
||||
|
||||
## Status
|
||||
|
||||
- [x] Phase 0 — resolve zig (`src/zig_backend.zig`)
|
||||
- [x] Phase 1 — zig cc link backend (`target.zig` + `emit_llvm` triple normalize)
|
||||
- [x] Phase 2 — activation (`--self-contained`/`--no-self-contained`; auto on bundled zig)
|
||||
- [ ] Phase 3 — dist packaging (vendor `zig` into `libexec/`)
|
||||
- [ ] Phase 4 — verify & lock (manual ✓ macOS/Linux/Windows; corpus test pending runner `--self-contained` support)
|
||||
|
||||
Scope landed as **macOS + Linux + Windows** (not Linux-first). See the
|
||||
"Implementation status" section in
|
||||
[../design/bundled-zig-link-backend-design.md](../design/bundled-zig-link-backend-design.md)
|
||||
for what refined the original locked decisions.
|
||||
@@ -5,7 +5,7 @@
|
||||
They are *one* plan: Part B can't start until Part A is a behavior-equivalent
|
||||
superset of `#foreign`, and Part A isn't "done" until Part B reaches the invariant.
|
||||
|
||||
**Design rationale:** [docs/inline-asm-design.md](../docs/inline-asm-design.md) §II.2
|
||||
**Design rationale:** [design/inline-asm-design.md](../design/inline-asm-design.md) §II.2
|
||||
(Deviation 6) + §II.10 #4 + the syntax evaluation.
|
||||
|
||||
**Decided syntax**
|
||||
@@ -173,7 +173,7 @@ gate only the live tree (recommended) vs purge everything. Confirm 6 before Phas
|
||||
> Work the FFI-linkage stream per `current/PLAN-EXTERN-EXPORT.md` (+ checkpoint
|
||||
> `current/CHECKPOINT-EXTERN-EXPORT.md`). First read the plan's header (Decided
|
||||
> syntax, Naming constraint, Key finding) and Part A; rationale is in
|
||||
> `docs/inline-asm-design.md` §II.2 (Deviation 6) + §II.10 #4.
|
||||
> `design/inline-asm-design.md` §II.2 (Deviation 6) + §II.10 #4.
|
||||
>
|
||||
> **This session = Part A, Phases 0 and 1 only** (`extern` works as a bare postfix
|
||||
> keyword equivalent to a lib-less `#foreign` fn/global binding; `#foreign` stays
|
||||
|
||||
125
current/PLAN-REIFY.md
Normal file
125
current/PLAN-REIFY.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# PLAN-REIFY — comptime type reflection + construction (`type_info` / `reify`)
|
||||
|
||||
## Goal
|
||||
|
||||
Add the two comptime metaprogramming builtins — **`type_info($T) -> TypeInfo`**
|
||||
(reflect a type → data) and **`reify(info: TypeInfo) -> Type`** (construct a *new
|
||||
nominal type* from data) — plus the sx-lib helpers (`make_enum`, `field_type`,
|
||||
`RecvResult`/`TryResult`) built over them. This is **step 3 of the async-first
|
||||
sequence** ([../design/execution-evolution-roadmap.md](../design/execution-evolution-roadmap.md)
|
||||
§7); it gates channel result types (`RecvResult($T)`) and `race`'s synthesized
|
||||
tagged-union, and **replaces** a would-be `enum($T)` generic-enum language feature.
|
||||
|
||||
> Rationale + the five validated contracts: design doc §7 step 3 + §8.1. The approach
|
||||
> was grounded by three codebase reviewers — it is a **small extension reusing existing
|
||||
> machinery**, not net-new architecture.
|
||||
|
||||
## Locked design (the five reify contracts — all codebase-validated)
|
||||
|
||||
1. **Nominal identity via type-fn memoization.** `RecvResult(i64)` is one `TypeId`
|
||||
because type-fns dedup by mangled `(fn,args)` name (`generic.zig:1620-1629`) +
|
||||
reify `findByName`. NOT structural dedup — enums are nominal (`types.zig:1110`).
|
||||
2. **Functional through codegen.** A reify'd enum has **no backing AST decl**, and
|
||||
every enum stage is type-table-driven (layout, construct, match+exhaustiveness,
|
||||
`toLLVMType`, `type_name`/format) — so it flows through **unmodified**.
|
||||
3. **Validate loudly** at the `intern`/`internNominal` choke point (`types.zig:411-439`).
|
||||
4. **Comptime-only, JIT-free** — a type-table op in the interpreter; no S1 dependency.
|
||||
5. **Reference-based self-reference** (`*Self`/`[]Self`) via reserve-placeholder→
|
||||
complete (`nominal.zig:86/108/120`, `types.zig:442`); **by-value recursion rejected**.
|
||||
|
||||
Surface follows the **`#builtin`** pattern of the existing reflection builtins
|
||||
(`type_of`/`field_count`/`field_name` in `library/modules/std/core.sx`,
|
||||
`specs.md:2594-2600`) — NOT the BuildOptions compiler-hook registry.
|
||||
|
||||
## Key code anchors (verified by review)
|
||||
|
||||
- Type minting: `TypeTable.intern` / `internNominal` — `src/ir/types.zig:411-439`.
|
||||
- Type-fn instantiation + mangled-name cache — `src/ir/lower/generic.zig:1575-1689`
|
||||
(cache check `:1620-1629`; register inline-struct result `:1663-1689`).
|
||||
- Forward-declare reserve (recursive types) — `src/ir/lower/nominal.zig:86/108/120`;
|
||||
complete a forward-declared type — `src/ir/types.zig:442`.
|
||||
- Enum codegen (all type-table-driven, the reify target shape): size `types.zig:633-636`;
|
||||
`resolveVariantIndex` `lower/expr.zig:1159-1177`; match `lower/control_flow.zig:748-945`;
|
||||
`toLLVMType` `backend/llvm/types.zig:111-154`; `type_name` `types.zig:846-882`.
|
||||
- Existing reflection builtins to mirror — `core.sx` (`#builtin`) + their interp/lower
|
||||
handlers (`src/ir/interp.zig` `type_name`/reflection at ~`:1911`).
|
||||
- Match form — `specs.md:408-424`.
|
||||
|
||||
## Cadence (IMPASSIBLE)
|
||||
|
||||
No commit may both add a test AND make it pass (xfail-then-green, or a behavior-lock).
|
||||
`zig build && zig build test` after every step. Never regenerate snapshots while red.
|
||||
Examples: `06xx` (comptime, deterministic), `11xx` (diagnostics for loud failures).
|
||||
|
||||
---
|
||||
|
||||
## Phases
|
||||
|
||||
### Phase 0 — `reify` of a flat enum (the core)
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 0.0 | lock | `TypeInfo`/`EnumInfo`/`EnumVariant` lib types in `core.sx` (data only); `reify`/`type_info`/`field_type` as bodyless `#builtin` decls (parsed, unimplemented → loud bail). Unit: decls parse. | `library/modules/std/core.sx`, `src/ir/interp.zig` |
|
||||
| 0.1 | xfail | `examples/06xx-comptime-reify-enum.sx` — `reify(.enum_(.{variants=[.{name="value",payload=i64},.{name="closed",payload=void}]}))`, construct `.value(3)`, match it. Red (reify unimplemented). | `examples/06xx-*` |
|
||||
| 0.2 | green | implement `reify(.enum_)` → build `EnumInfo`/`TaggedUnionInfo` `TypeInfo`, `internNominal(info, fresh_nominal_id)`, return `TypeId`. Example green; construct + match work unmodified (Contract 2). | `src/ir/interp.zig`, (`src/ir/types.zig` if a helper is wanted) |
|
||||
|
||||
### Phase 1 — type-fn → reify identity
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 1.0 | xfail | `examples/06xx-comptime-reify-typefn-identity.sx` — `R :: ($T)->Type { reify(...) }`; assert `R(i64)` from two sites is ONE type (assignable/matchable across sites). Red if reify-result not registered by mangled name. | `examples/06xx-*` |
|
||||
| 1.1 | green | register a reify-returning type-fn's result under the instantiation mangled name (mirror the inline-struct path `generic.zig:1663-1689`). Identity holds (Contract 1). | `src/ir/lower/generic.zig` |
|
||||
|
||||
### Phase 2 — `type_info` (reflect) + `field_type`
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 2.0 | xfail | reflect a struct/tuple → read variant/field names + **types** (`field_type($T,i)`). Red. | `examples/06xx-*` |
|
||||
| 2.1 | green | implement `type_info`/`field_type` over the type table (reuse the `field_count`/`field_name` reflection path). | `src/ir/interp.zig` |
|
||||
|
||||
### Phase 3 — `make_enum` + `RecvResult`/`TryResult` (sx lib)
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 3.0 | lock | `make_enum(variants) -> Type` (sx lib over `reify`); `RecvResult($T)`/`TryResult($T)` as type-fns. Behavior-lock: `RecvResult(i64)` constructs + matches. | `library/modules/std/*` |
|
||||
|
||||
### Phase 4 — reference-based self-reference
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 4.0 | xfail | recursive enum via `*Self` (tree/list): `reify_rec((self)=> .enum_(... payload = ptr_to(self) ...))`. Red. | `examples/06xx-*` |
|
||||
| 4.1 | green | `reify_rec` reserve-placeholder→complete (reuse `nominal.zig:86`/`types.zig:442` recursive path). | `src/ir/interp.zig`, `src/ir/types.zig` |
|
||||
|
||||
### Phase 5 — validation + loud diagnostics
|
||||
| Step | Commit | What | Files |
|
||||
|---|---|---|---|
|
||||
| 5.0 | xfail | `examples/11xx-diagnostics-reify-*` — dup variant names, non-integer backing, **by-value self-reference** ("infinite size; use `*Self`"). Pin the messages. | `examples/11xx-*` |
|
||||
| 5.1 | green | validate `TypeInfo` at the `intern`/`internNominal` choke point; emit diagnostics, never a broken type (Contract 3). | `src/ir/interp.zig` / `src/ir/types.zig` |
|
||||
|
||||
> `RaceResult` (tuple→tagged-union synthesis) is **not** in this stream — it lands with
|
||||
> `race` (async cluster), but it consumes exactly the `type_info`+`field_type`+`reify`
|
||||
> primitives built here.
|
||||
|
||||
## Risks / watch
|
||||
|
||||
- **Mangled-name plumbing (Phase 1)** is the one real unknown — confirm the type-fn
|
||||
path registers a *reify-returned* result (not just inline `struct {…}` literals).
|
||||
Fallback: have `reify` itself name the type by the instantiation key + `findByName`.
|
||||
- **Self-ref completion (Phase 4)** must reuse the existing recursive-type
|
||||
reserve→complete path; do not invent a new mutate-after-intern mechanism.
|
||||
- Keep `reify` **comptime-only**: a `reify` reached at runtime is a hard error.
|
||||
|
||||
## Status
|
||||
|
||||
- [ ] Phase 0 — `reify` flat enum
|
||||
- [ ] Phase 1 — type-fn identity
|
||||
- [ ] Phase 2 — `type_info` + `field_type`
|
||||
- [ ] Phase 3 — `make_enum` + `RecvResult`/`TryResult`
|
||||
- [ ] Phase 4 — reference self-reference
|
||||
- [ ] Phase 5 — validation + diagnostics
|
||||
|
||||
## Kickoff prompt (paste into a fresh session)
|
||||
|
||||
> Work the REIFY stream per `current/PLAN-REIFY.md` (+ checkpoint
|
||||
> `current/CHECKPOINT-REIFY.md`). Read the plan header (goal, five locked contracts,
|
||||
> key anchors) first; rationale is in `design/execution-evolution-roadmap.md` §7 step 3
|
||||
> + §8.1. **This session = Phase 0 only** (`TypeInfo` lib types + `reify` of a flat
|
||||
> enum: construct + match). Cadence (IMPASSIBLE): no commit both adds a test and makes
|
||||
> it pass — lock, then xfail→green. `zig build && zig build test` after every step. If
|
||||
> you hit an unrelated compiler bug, follow the CLAUDE.md IMPASSIBLE RULE (file an
|
||||
> issue, stop). Stop at the end of Phase 0; update the checkpoint.
|
||||
384
design/bundled-zig-link-backend-design.md
Normal file
384
design/bundled-zig-link-backend-design.md
Normal file
@@ -0,0 +1,384 @@
|
||||
# Bundled `zig` Link Backend for sx — Design Doc & Proposal
|
||||
|
||||
> Status: **core landed (macOS / Linux / Windows).** This is the
|
||||
> design-of-record for how a distributed sx links native binaries
|
||||
> hermetically. The phased plan lives in
|
||||
> [../current/PLAN-DIST.md](../current/PLAN-DIST.md); keep the two in sync.
|
||||
> User-facing surface is documented in `readme.md` (Cross-Compilation §).
|
||||
|
||||
---
|
||||
|
||||
## Implementation status (landed)
|
||||
|
||||
The core backend is implemented and verified on a macOS host:
|
||||
|
||||
| Target | Result | Notes |
|
||||
|--------|--------|-------|
|
||||
| `--target linux-musl` | static ELF | `zig cc -target x86_64-linux-musl -static` |
|
||||
| `--target windows-gnu` | PE32+ | `zig cc -target x86_64-windows-gnu` |
|
||||
| `--target macos` | Mach-O (runs) | `zig cc -target <arch>-macos`, no `-static` |
|
||||
|
||||
What shipped, and where it **refined** the original locked decisions:
|
||||
|
||||
- **Scope = macOS + Linux + Windows** (not Linux-first). iOS/Android/wasm keep
|
||||
their specialized toolchains. (`TargetConfig.zigBackendInScope`.)
|
||||
- **Auto-activation = a *bundled* zig is found** (a real distribution, or a
|
||||
pinned `$SX_ZIG`). A `PATH`-only zig is the dev fallback and engages **only**
|
||||
under `--self-contained` — so native dev/CI builds are never silently
|
||||
rerouted, across all three OSes. This is the precise meaning of the §5.5
|
||||
"zig found (B)" column: **B = bundled**. *(Refinement of "auto when zig
|
||||
found": PATH-zig does not auto-engage; the musl-only auto gating considered
|
||||
mid-design was dropped in favor of bundled-vs-PATH, which is OS-agnostic.)*
|
||||
- **No translation table** (per the triple-scheme decision): sx triples are
|
||||
passed straight to `zig cc`, and `emit_llvm` runs them through
|
||||
`LLVMNormalizeTargetTriple` so vendor-less zig triples (e.g.
|
||||
`x86_64-windows-gnu`) land their OS/env in LLVM's canonical positions —
|
||||
otherwise "windows" sits in the vendor slot and the object silently falls
|
||||
back to ELF. The one unavoidable exception is **macOS**: the object must be
|
||||
emitted from Apple's `apple-darwin` triple (LLVM needs it for Mach-O), but
|
||||
zig's `-target` parser rejects that scheme, so the *linker* triple alone is
|
||||
the vendor-less `<arch>-macos`. One OS-specific line, not a table.
|
||||
- **New shorthands:** `linux-musl`, `linux-musl-arm`, `windows-gnu` (zig
|
||||
scheme). The existing `linux`/`linux-arm` shorthands were also de-vendored
|
||||
(`x86_64-linux-gnu`, matching the corpus runner's own expander).
|
||||
|
||||
Files: `src/zig_backend.zig` (discovery), `src/target.zig`
|
||||
(`selectZigLinker` / `emitZigLinkArgv` / `zigTargetTriple` / dispatch in
|
||||
`link`), `src/ir/emit_llvm.zig` (triple normalization), `src/main.zig`
|
||||
(`--self-contained` / `--no-self-contained` + shorthands).
|
||||
|
||||
Not yet done: distribution packaging (Phase 3 — vendoring `zig` into
|
||||
`libexec/`), and a corpus regression test (needs the runner to thread
|
||||
`--self-contained`; manual verification only so far).
|
||||
|
||||
The sections below are the original proposal; where they say "Linux-first" or
|
||||
"follow-up" for macOS/Windows, the table above supersedes them.
|
||||
|
||||
---
|
||||
|
||||
## 0. TL;DR + feasibility
|
||||
|
||||
**Problem.** A distributed `sx` compiler can run on a Linux box (static-LLVM
|
||||
binary + relocatable `library/`), but it cannot *finish a build*: the final
|
||||
link step shells out to the host's `cc`, and relies on the host's libc + CRT
|
||||
objects. No `cc`/glibc/SDK on the box → no binary. That is the gap between
|
||||
"sx runs here" and "sx is a toolchain here."
|
||||
|
||||
**Proposal.** Bundle a pinned `zig` binary inside the sx distribution and use
|
||||
`zig cc` as the link backend for `sx build`. `zig cc` brings its own lld,
|
||||
CRT objects, and libc (musl or glibc) for the chosen target. Default Linux
|
||||
output is **statically-linked musl**, which runs on any Linux with zero
|
||||
dependencies — the property that makes Zig's own output portable.
|
||||
|
||||
**Feasibility: high.** The change is contained:
|
||||
- The linker is selected through a single hook —
|
||||
`TargetConfig.getLinker()` at `src/target.zig:194-196` — and the final
|
||||
link argv is built in one place, the Unix `cc`-style branch at
|
||||
`src/target.zig:524-564`.
|
||||
- `zig cc` is a clang-compatible driver, so `-o` / `-L` / `-l` / extra
|
||||
objects pass through that branch unchanged. The backend only has to
|
||||
prepend `zig cc` and add `-target …` / `-static`.
|
||||
- Exe-relative resolution (for finding the bundled zig) is already solved
|
||||
for the stdlib in `src/imports.zig:204-227` and can be mirrored.
|
||||
- `sx run` is JIT and never links, so it is wholly unaffected.
|
||||
|
||||
The cost is a ~50–60 MB vendored `zig` (binary + its `lib/`) in the
|
||||
distribution, and version-pinning discipline.
|
||||
|
||||
---
|
||||
|
||||
## 1. Motivation & background
|
||||
|
||||
### 1.1 Current state
|
||||
|
||||
| Concern | Today | File |
|
||||
|---------|-------|------|
|
||||
| Compiler binary | Self-containable via `-Dstatic-llvm` (no system LLVM) | `build.zig:9-10,156-162` |
|
||||
| Stdlib | Relocatable, found relative to the exe | `src/imports.zig:204-227` |
|
||||
| **Linking** | **Shells to system `cc`** | `src/target.zig:524-564` |
|
||||
| **libc / CRT** | **Provided by the host `cc` driver implicitly** | (no `-lc`/crt passed) |
|
||||
|
||||
So two of three legs of a portable toolchain already stand. The third — the
|
||||
linker and the libc/CRT it pulls in — is the host dependency this design
|
||||
removes.
|
||||
|
||||
### 1.2 Why this matters for distribution
|
||||
|
||||
The goal is to hand someone a tarball and have `sx build app.sx` produce a
|
||||
working binary on a stock Linux machine — a fresh container, a minimal CI
|
||||
image, a box without `build-essential`. Today that fails at the link step.
|
||||
Zig solved exactly this problem for its own users; since sx is *built with*
|
||||
Zig, the cleanest fix is to stand on Zig's hermetic toolchain rather than
|
||||
re-implement it.
|
||||
|
||||
---
|
||||
|
||||
## 2. Goals & non-goals
|
||||
|
||||
### Goals
|
||||
- `sx build` produces a native Linux binary with **no host `cc`/ld/libc/SDK**.
|
||||
- Default Linux output is **portable** (static musl): runs on any Linux.
|
||||
- **Zero-config in the common case**: a bundled or PATH `zig` is detected and
|
||||
used automatically; the operator sets nothing.
|
||||
- A fully-specified, documented configuration surface (this document) for the
|
||||
cases that *do* need tuning.
|
||||
- No regression for existing users: system `cc` remains a fallback, and any
|
||||
explicit `--linker` still wins.
|
||||
|
||||
### Non-goals (this iteration)
|
||||
- Reimplementing lld in-process or building libc from source (see §7 —
|
||||
Zig already does both; we reuse it).
|
||||
- First-class Windows/macOS cross-compilation (nearly free as a follow-up,
|
||||
but unverified — §11).
|
||||
- Routing C-import compilation (`src/c_import.zig`, which also shells `cc`)
|
||||
through the backend.
|
||||
- Glibc-floor version pinning (`…-gnu.2.28`); exposed only if needed.
|
||||
|
||||
---
|
||||
|
||||
## 3. How Zig achieves hermetic builds (the model we're borrowing)
|
||||
|
||||
Zig's turnkey cross-compilation rests on bundling the two things sx borrows
|
||||
from the host:
|
||||
|
||||
1. **In-process lld.** Zig embeds LLVM's lld (ELF/COFF/Mach-O/wasm) and links
|
||||
without spawning an external linker.
|
||||
2. **libc as data.** Zig ships musl *source* (builds `libc.a` + `crt*.o` on
|
||||
demand, cached → static, no dynamic linker → portable output) and glibc
|
||||
stubs generated from `.abilist` per version. For Windows it ships mingw
|
||||
`.def` files and synthesizes import libraries.
|
||||
|
||||
`zig cc` exposes all of this behind a clang-compatible driver: `zig cc
|
||||
-target x86_64-linux-musl -static foo.o -o foo` yields a portable binary on
|
||||
any host, with nothing installed. **This design consumes that driver rather
|
||||
than rebuilding its internals** — the whole second column above arrives for
|
||||
free by vendoring the `zig` binary.
|
||||
|
||||
---
|
||||
|
||||
## 4. Design overview
|
||||
|
||||
`sx build` gains a **link backend** abstraction with two implementations:
|
||||
|
||||
- `system_cc` — today's behavior (shell `cc`, host libc).
|
||||
- `bundled_zig` — shell `<zig> cc -target <triple> [-static] …`.
|
||||
|
||||
Selection is automatic (§5.5): if a usable `zig` is discovered and the user
|
||||
gave no explicit `--linker`, `bundled_zig` is used; otherwise `system_cc`.
|
||||
The backend plugs into the existing Unix link branch — it contributes the
|
||||
leading `zig cc` tokens and the `-target`/`-static` flags; the rest of the
|
||||
argv assembly is unchanged because `zig cc` is clang-compatible.
|
||||
|
||||
One supporting change: when `bundled_zig` is active, the triple handed to
|
||||
LLVM in `src/ir/emit_llvm.zig` is aligned to the link target (`x86_64-linux`)
|
||||
so the emitted object links cleanly against the selected musl CRT.
|
||||
|
||||
---
|
||||
|
||||
## 5. Detailed design (the configuration surface)
|
||||
|
||||
### 5.1 zig discovery — resolution order
|
||||
|
||||
`discoverZig()` (new `src/zig_backend.zig`) returns the first hit:
|
||||
|
||||
1. `$SX_ZIG` — explicit override.
|
||||
2. `<exe_dir>/../libexec/zig/zig` — **install layout** (§6).
|
||||
3. `<exe_dir>/../../zig-bundle/zig` — **dev vendored layout** (§6).
|
||||
4. `zig` on `PATH` — **dev fallback** (the only one active today).
|
||||
|
||||
`<exe_dir>` is resolved exactly as `src/imports.zig` resolves the stdlib.
|
||||
If none resolve, behavior depends on activation (§5.5): auto-mode silently
|
||||
falls back to `system_cc`; `--self-contained` errors.
|
||||
|
||||
### 5.2 Environment variables
|
||||
|
||||
| Var | Effect | Default |
|
||||
|-----|--------|---------|
|
||||
| `SX_ZIG` | Absolute path to the `zig` used as the link backend. Highest-priority discovery source. | unset |
|
||||
| `ZIG_LIB_DIR` | Path to the bundled zig's `lib/`. Needed **only** if `zig` was relocated away from its `lib/`. In the supported layout (§6) they ship together and zig self-locates — leave unset. | unset |
|
||||
| `SX_DEBUG_ZIG` | Trace discovery: each candidate path and the chosen one (or "none → cc"). Mirrors `SX_DEBUG_STDLIB`. | unset |
|
||||
| `SX_DEBUG_LINK` | **Existing.** Prints the full link argv — shows the exact `zig cc …` invocation. | unset |
|
||||
| `SX_STDLIB_PATH` | **Existing.** Stdlib override; unrelated to linking but noted because a full distribution sets neither and relies on exe-relative discovery for both. | unset |
|
||||
|
||||
### 5.3 CLI flags (`sx build`)
|
||||
|
||||
| Flag | Effect |
|
||||
|------|--------|
|
||||
| `--self-contained` | Force `bundled_zig` ON. If no usable zig is found, **error** — do not silently fall back. |
|
||||
| `--no-self-contained` | Force `system_cc`. |
|
||||
| `--linker <cmd>` | **Existing.** Explicit linker; supplying it **disables** auto-activation (user's choice wins). To pin a specific zig, prefer `SX_ZIG` + `--self-contained`. |
|
||||
| `--target <triple\|shorthand>` | **Existing.** Selects target + ABI (§5.4). With `bundled_zig` active and target unspecified on a Linux host → `x86_64-linux-musl` static. |
|
||||
| `--sysroot <path>` | **Existing.** Forwarded to the linker; rarely needed with `bundled_zig` (zig brings its own sysroot). |
|
||||
|
||||
### 5.4 Target → ABI mapping
|
||||
|
||||
The default (no `--target`) deliberately differs from the legacy `linux`
|
||||
shorthand, because portable static output is the entire point.
|
||||
|
||||
| `sx` invocation | zig `-target` | Link mode | Portable? |
|
||||
|-----------------|---------------|-----------|-----------|
|
||||
| *(no `--target`, Linux host)* | `x86_64-linux-musl` | `-static` | ✅ any Linux |
|
||||
| `--target linux-musl` *(new)* | `x86_64-linux-musl` | `-static` | ✅ |
|
||||
| `--target linux` / `linux-x86` | `x86_64-linux-gnu` | dynamic | ❌ host glibc, versioned |
|
||||
| `--target linux-arm` | `aarch64-linux-musl` | `-static` | ✅ |
|
||||
| `--target windows` | `x86_64-windows-gnu` | per zig | follow-up (§11) |
|
||||
| `--target macos` / `macos-arm` | `aarch64-macos` | per zig | follow-up (§11) |
|
||||
|
||||
- A **new** `linux-musl` shorthand is added; the existing `linux` shorthand
|
||||
keeps its current gnu/dynamic meaning for back-compat.
|
||||
- The LLVM emit triple is aligned to the link target so the `.o` links
|
||||
cleanly against the selected libc/CRT (§4).
|
||||
|
||||
### 5.5 Activation truth table
|
||||
|
||||
`B` = a usable zig was discovered (§5.1). Subcommand = `sx build`.
|
||||
|
||||
| `--self-contained` | `--no-self-contained` | `--linker` | zig found (B) | Result |
|
||||
|:---:|:---:|:---:|:---:|--------|
|
||||
| — | — | no | yes | **bundled_zig** (auto) |
|
||||
| — | — | no | no | system `cc` (silent fallback) |
|
||||
| — | — | yes | * | user's `--linker` |
|
||||
| yes | — | * | yes | **bundled_zig** (forced) |
|
||||
| yes | — | * | no | **error**: `--self-contained` but no zig |
|
||||
| — | yes | * | * | system `cc` (forced off) |
|
||||
|
||||
- `--self-contained` + `--linker` together: backend choice goes to
|
||||
`--self-contained`; treat the literal combination as a usage error
|
||||
(document, don't guess).
|
||||
- `sx run` / `sx ir` / `sx asm` never link → backend not consulted.
|
||||
|
||||
### 5.6 Emit-triple alignment
|
||||
|
||||
`src/ir/emit_llvm.zig` (`LLVMSetTarget`, ~L246-284) currently uses the host
|
||||
default triple when `--target` is unspecified (on Linux,
|
||||
`x86_64-unknown-linux-gnu`). When `bundled_zig` is active, set the module
|
||||
triple to match the link target (`x86_64-linux`) so codegen and the musl CRT
|
||||
agree. Pure codegen objects are ABI-compatible across gnu/musl; aligning the
|
||||
triple removes the edge-case risk (TLS model, stack protector) up front.
|
||||
|
||||
---
|
||||
|
||||
## 6. Distribution layout (packaging)
|
||||
|
||||
A relocatable tree; everything resolves relative to `bin/sx`, so the whole
|
||||
directory moves/untars anywhere with no env vars set:
|
||||
|
||||
```
|
||||
sx-<os>-<arch>/
|
||||
├── bin/
|
||||
│ └── sx # built -Dstatic-llvm (no system LLVM dep)
|
||||
├── libexec/
|
||||
│ └── zig/
|
||||
│ ├── zig # pinned zig binary
|
||||
│ └── lib/ # zig's lib/ (musl/glibc sources, lld data, …)
|
||||
└── library/ # sx stdlib (existing discovery)
|
||||
└── modules/…
|
||||
```
|
||||
|
||||
Rules:
|
||||
- `zig` and its `lib/` **must** ship together under `libexec/zig/` so zig
|
||||
self-locates `lib/`; splitting them forces `ZIG_LIB_DIR`.
|
||||
- Pinned zig version: **0.16.0** (matches the build toolchain). Record the
|
||||
exact version in the release manifest — a mismatched `zig cc` CLI is the
|
||||
likeliest future breakage.
|
||||
- Vendor the matching zig release per host os/arch from ziglang.org at
|
||||
package time.
|
||||
|
||||
---
|
||||
|
||||
## 7. Alternatives considered
|
||||
|
||||
| Alternative | Why not (now) |
|
||||
|-------------|---------------|
|
||||
| **In-process lld + bundled musl sysroot** (sx owns the pipeline; no zig) | Requires a custom LLVM build *with* lld — the Homebrew `llvm@19` here ships none (`liblld*.a`, headers, `ld.lld` all absent) — plus a C++ lld shim and per-arch prebuilt musl. Strictly more work for the same user-visible result. The right *eventual* target if we want zero foreign binaries; tracked as a follow-up. |
|
||||
| **Full Zig-style: build libc from source on demand** | Most flexible (any arch/libc version, no prebuilt blobs) but the most work; only worth it after the in-process-lld path exists. |
|
||||
| **Document a hard dependency on system `cc`** | Zero engineering, but defeats the goal — the box still needs `build-essential`. Acceptable only as the current fallback, not the distribution story. |
|
||||
| **Bundle just `ld.lld` + a musl sysroot (no full zig)** | Smaller than a whole zig, but we'd hand-manage crt object selection, dynamic-linker paths, and import libs — i.e. re-derive what `zig cc` already encapsulates. Bundle-size saving doesn't justify the fragility. |
|
||||
|
||||
Vendoring `zig` wins on effort-to-result because sx already builds with Zig:
|
||||
it's a first-party dependency, not a foreign toolchain, and it unlocks
|
||||
Windows/macOS targets later for nearly free.
|
||||
|
||||
---
|
||||
|
||||
## 8. Phasing
|
||||
|
||||
Detail in [../current/PLAN-DIST.md](../current/PLAN-DIST.md). Summary:
|
||||
|
||||
0. **Resolve zig** — `discoverZig()` + `SX_DEBUG_ZIG`; PATH fallback only.
|
||||
1. **Link backend** — generalize the linker to a driver argv; emit
|
||||
`zig cc -target … -static`; align the emit triple.
|
||||
2. **Auto activation** — wire the §5.5 truth table; `cc` fallback intact.
|
||||
3. **Packaging** — `build.zig` `dist` step assembling the §6 tree.
|
||||
4. **Verify & lock** — `file`/`ldd` shows "statically linked"; host/arch-gated
|
||||
corpus test honoring the snapshot-integrity + FFI-cadence rules.
|
||||
|
||||
The minimum end-to-end proof is Phases 0+1 against PATH zig.
|
||||
|
||||
---
|
||||
|
||||
## 9. Open decisions
|
||||
|
||||
**Locked:**
|
||||
- Default Linux ABI = **static musl** (portable output).
|
||||
- Activation = **auto** when a usable zig is found and no `--linker`.
|
||||
- Dev uses **PATH zig**; vendoring deferred to Phase 3.
|
||||
|
||||
**Still open:**
|
||||
- Exact spelling of the force flags (`--self-contained` vs e.g.
|
||||
`--bundled-linker`); name chosen here pending review.
|
||||
- Whether auto-mode should *warn* on silent `cc` fallback or stay quiet
|
||||
(leaning quiet, with `SX_DEBUG_ZIG` for diagnosis).
|
||||
- Whether to gate the Phase-4 corpus test behind a `.build` `target`
|
||||
sidecar or keep it manual until a Linux CI runner exists.
|
||||
|
||||
---
|
||||
|
||||
## 10. Risks
|
||||
|
||||
- **Bundle size** ≈ 50–60 MB (zig + `lib/`). Acceptable for a toolchain;
|
||||
call it out in release notes.
|
||||
- **zig CLI drift** across versions — pin hard, record in the manifest;
|
||||
the most likely future breakage.
|
||||
- **gnu vs musl ABI** for the emitted object — covered by the emit-triple
|
||||
alignment (§5.6); TLS/stack-protector are the only realistic friction.
|
||||
- **Operator confusion**: default-no-target (musl) diverging from the
|
||||
`linux` shorthand (gnu). Mitigated by the new `linux-musl` shorthand and
|
||||
explicit documentation (§5.4).
|
||||
|
||||
---
|
||||
|
||||
## 11. Out of scope / follow-ups
|
||||
|
||||
- **Windows / macOS targets** via the same `zig cc -target`: nearly free
|
||||
after the Linux path, but Apple-SDK and Windows specifics need their own
|
||||
verification — not documented as supported until tested.
|
||||
- **`src/c_import.zig`** still shells system `cc` for C imports in JIT mode;
|
||||
route through the backend later.
|
||||
- **In-process lld** (alternative in §7) as the eventual zero-foreign-binary
|
||||
endgame.
|
||||
|
||||
---
|
||||
|
||||
## Appendix — quick recipes (once implemented)
|
||||
|
||||
```sh
|
||||
# Portable static Linux binary (default when a bundled zig is present):
|
||||
sx build app.sx -o app
|
||||
file app # → "ELF 64-bit … statically linked"
|
||||
|
||||
# Force the backend; fail loudly if no zig is bundled:
|
||||
sx build app.sx --self-contained
|
||||
|
||||
# Use a specific zig:
|
||||
SX_ZIG=/opt/zig-0.16.0/zig sx build app.sx --self-contained
|
||||
|
||||
# Opt out, use the system toolchain:
|
||||
sx build app.sx --no-self-contained
|
||||
|
||||
# Dynamic glibc instead of static musl:
|
||||
sx build app.sx --target linux
|
||||
|
||||
# Debug discovery + the exact link invocation:
|
||||
SX_DEBUG_ZIG=1 SX_DEBUG_LINK=1 sx build app.sx
|
||||
```
|
||||
625
design/execution-evolution-roadmap.md
Normal file
625
design/execution-evolution-roadmap.md
Normal file
@@ -0,0 +1,625 @@
|
||||
# Execution-Model Evolution — Roadmap (comptime JIT · async · concurrency · hot-reload)
|
||||
|
||||
> Status: **exploratory design-of-record.** Captures the forward plan for sx's
|
||||
> execution model across five interlocking threads. Not yet an active
|
||||
> `PLAN-*`/`CHECKPOINT-*` stream — this is the shared design the streams would be
|
||||
> carved from. Cross-platform shipping (the bundled-zig backend + the sx bundler)
|
||||
> is **already landed**; see [bundled-zig-link-backend-design.md](bundled-zig-link-backend-design.md)
|
||||
> and [../current/PLAN-DIST.md](../current/PLAN-DIST.md).
|
||||
|
||||
---
|
||||
|
||||
## 0. The thesis
|
||||
|
||||
sx's compiler stays small by pushing capability into **library sx + three general
|
||||
primitives** (`inline asm`, `extern`/`export`, `atomics`) rather than baking
|
||||
features into codegen. Concretely:
|
||||
|
||||
- **Async is a library, not a language feature** — colorblind, stackful fibers
|
||||
behind an `Io` interface (Zig-inspired). No function coloring, no
|
||||
async→state-machine transform. The implementation is pure sx down to a per-arch
|
||||
inline-asm context switch.
|
||||
- **Comptime gains a JIT escape hatch** — the interpreter stays the default
|
||||
(debuggable, portable), but drops to a host-JIT for the one thing it can't
|
||||
walk (inline asm) and, later, for whole fragments (the bundler).
|
||||
- **One shared substrate** — a persistent ORC LLJIT + host-target emitter — serves
|
||||
comptime-asm, the bundler, and JIT-resident hot-reload.
|
||||
|
||||
The honest trade is **small *surface*, but each primitive is *deep*** — not "small
|
||||
compiler." The net-new **compiler** obligations this plan adds (all verified absent
|
||||
today): **atomics lowering** (N1), **generic enums** `enum($T)`, **`type_info` +
|
||||
`reify` + `field_type`** (comptime type construction), **`callconv(.naked)`**,
|
||||
**repointable-`context` codegen** (+ per-fiber stack-limit), the **S1 persistent JIT
|
||||
spine**, **C1 thunk synthesis**, **comptime-asm lifting** (C3), and (later) the **S2
|
||||
ORC C++ shim**. Async itself is genuinely a library; the *enabling primitives* are a
|
||||
major codegen/runtime investment. Already landed: `inline asm` (in flight),
|
||||
`extern`/`export`, the `!`/`try`/`catch`/`onfail`/`raise` ERR stream, value-level
|
||||
reflection, the `sx run` ORC LLJIT, and the host-FFI trampolines.
|
||||
|
||||
---
|
||||
|
||||
## 1. The spine (shared substrate)
|
||||
|
||||
| ID | Piece | What | Size |
|
||||
|----|-------|------|------|
|
||||
| **S1** | Persistent JIT executor | A long-lived ORC LLJIT + a host-triple `LLVMEmitter` + a compiled-fragment cache, plumbed into the interpreter. Today the LLJIT exists only for `sx run`'s `main` ([target.zig:319](../src/target.zig#L319)); the emitter carries one target machine ([emit_llvm.zig:274](../src/ir/emit_llvm.zig#L274)). | L |
|
||||
| **S2** | ORC C++ shim | `MachOPlatform::Create` + redirectable/lazy-reexport symbols. The bare `LLVMOrcCreateLLJIT` can't do thread-locals, C constructors, or symbol redefinition — the wall the C-with-sx JIT spike hit (`_Thread_local` SIGABRT; `errors-*` examples crashed). Required by any non-trivial JIT or symbol repoint. | M |
|
||||
|
||||
S1/S2 are the spine: built once, consumed by **C1** (the FFI thunks — the main
|
||||
near-term consumer), **C3**, and (later) **R2**. S1 alone suffices for C1/C3 (bare
|
||||
calling/asm thunks — no TLS/ctors); S2 is only needed for R2 and JIT-ing C-with-sx.
|
||||
|
||||
---
|
||||
|
||||
## 2. Comptime / build layer
|
||||
|
||||
| ID | Piece | Unblocks | Depends | Size |
|
||||
|----|-------|----------|---------|------|
|
||||
| **C1** | **Real comptime FFI — JIT calling-thunks (LLVM = single ABI authority).** Trivial calls (scalar/ptr/string args, single-reg return) keep the existing `host_ffi.zig` trampoline fast-path; everything else (floats, structs-by-value, aggregate returns, >8 args, varargs) synthesizes a per-signature thunk, JIT-compiles it via **S1**, and calls it with an args buffer the interpreter fills by known layout (`type_info`). **LLVM emits the ABI-correct call — the same lowering as runtime codegen — so comptime and runtime FFI share ONE ABI implementation.** Rejected: libffi (foreign 2nd ABI impl), hand-rolled sx+asm (3rd impl + drift risk + needs C3 to run its own asm leaf anyway). | struct/string/slice/float signatures at comptime; full C interop in `#run`; lifts the bundler's API straightjacket; unifies comptime+runtime FFI | S1 (fast-path: none) | L |
|
||||
| **C2** | **`#compiler` → `extern` collapse** — BuildOptions hooks become real exported C symbols resolved through C1; `*BuildConfig` threaded via global/handle; delete `.compiler_expr`/`compiler_call`/Registry. | one FFI mechanism, not two | C1 (`extern`/`export` already shipped) | M |
|
||||
| **C3** | **Comptime asm via host-JIT** — stop bailing on `inline_asm` ([interp.zig:1019](../src/ir/interp.zig#L1019)); lift the block (operand model at [inst.zig:354](../src/ir/inst.zig#L354): inputs/`out_value`/`out_place`/`out_ty`/clobbers) to a host-arch thunk via `LLVMGetInlineAsm`, JIT, call through C1, cache by template+sig. | running asm-containing code at comptime | S1, C1 (+S2 non-trivial) | M |
|
||||
| **C4** *(DROPPED)* | **JIT-the-bundler** — **not built** (Decision 6). Interp+C1 is the shipping bundler (I/O-bound, so native speed is moot; C1 closes the only capability gap). Remains an always-available S1 optimization if profiling ever shows the bundler's *own logic* is a hotspot. | — | — | — |
|
||||
|
||||
**Residue:** cross-arch comptime asm (C3) can't run on the host — narrows the bail
|
||||
to the cross-compile case; needs a sharp diagnostic ("asm targets `<arch>`, host
|
||||
is `<host>`").
|
||||
|
||||
---
|
||||
|
||||
## 3. Concurrency primitives (atomics + threads)
|
||||
|
||||
> **Why this is its own section:** we are doing **multiple OS threads**, so the
|
||||
> async runtime and any lock-free structure need real atomics. OS threads already
|
||||
> exist; atomics do not.
|
||||
|
||||
| ID | Piece | State | Size |
|
||||
|----|-------|-------|------|
|
||||
| **N1** | **Atomics — NET-NEW compiler feature.** Atomic load/store/RMW (`add/sub/and/or/xor/swap` + `fetch_min`/`fetch_max`; no `nand`), `compare_exchange`/`_weak` (→ `?T`, **null = success**), and fences, with orderings (relaxed/acquire/release/acq_rel/seq_cst). LLVM provides all — an **emit** feature, not a runtime library. **Surface LOCKED = `Atomic($T)` wrapper + `Ordering` enum** (not `@atomic_*` — `@` is address-of in sx). | **lowering absent** — zero LLVM `atomicrmw`/`cmpxchg`/`fence` emission today; some IR/inference scaffolding exists | M |
|
||||
| **N2** | **OS threads + pthread Mutex/Cond + worker Pool** | **landed** — [std/thread.sx](../library/modules/std/thread.sx) (`pthread_create`/`join`/`detach`, in-place `Mutex`/`Cond`, bounded `Pool`). NOTE: pthread mutex **blocks the OS thread** — it is *not* fiber-aware (it would park every fiber on that thread); fiber-aware sync is N3, built on N1. | — |
|
||||
| **N3** | **Fiber-aware sync** — mutex / channel / waitgroup that **suspend the fiber**, not the OS thread. Hybrid: atomic fast-path (N1) + fiber-suspend slow-path (A2/A5). Distinct from the pthread primitives in N2. | new library | M |
|
||||
|
||||
**Compiler obligation for N1:** the emit must map sx orderings to LLVM's and **not
|
||||
reorder across atomics/fences**. Comptime is single-threaded, so the interpreter
|
||||
can treat atomic ops as ordinary ops (seq_cst is trivially satisfied with one
|
||||
thread) — no interp atomics machinery needed.
|
||||
|
||||
**N1 is a prerequisite for M:N scheduling (A5) and N3, and is broadly useful**
|
||||
(lock-free queues, refcounts, the allocator). It is the load-bearing new primitive
|
||||
this revision adds.
|
||||
|
||||
---
|
||||
|
||||
## 4. Async — colorblind, stackful, pure-sx
|
||||
|
||||
**Commitment:** no function coloring, no async→state-machine transform. Async is a
|
||||
capability carried in `context` (like `context.allocator`), not a property of a
|
||||
function's signature. A function does I/O through `context.io`; whether the call
|
||||
suspends is decided by the `Io` *implementation*, transparently.
|
||||
|
||||
| ID | Piece | Notes | Size |
|
||||
|----|-------|-------|------|
|
||||
| **A1** | **`Io` interface + `context.io`** — a protocol/vtable threaded like `Allocator`. `io.async(fn,args) → Future`, `future.await`, cancellation. | leverages protocols + context | M |
|
||||
| **A2** | **Stackful coroutine runtime — in sx lib, NOT a compiler builtin.** The context-switch is a `callconv(.naked)` sx fn with an inline-asm body (save callee-saved + SP/LR into `*from`, load from `*to`, `ret`); fiber bootstrap + stack alloc (`mmap`+guard via `extern`) also sx. The **compiler's** job is only (a) the general primitives — inline asm, `callconv(.naked)`, atomics — and (b) **fiber-safe codegen**: `context` lowered as a *repointable indirection* (never raw TLS) so the switch can repoint it, and stack-limit guards (if emitted) read from a swappable per-fiber location. Most arch-delicate sx in the tree (must match the platform callee-saved set + the compiler ABI), but it's inspectable sx, not a black box. | per-arch, arch-gated; co-validate vs codegen | M |
|
||||
| **A3** | **Event-loop `Io` impls** — kqueue / epoll / io_uring drive readiness, then the (now-ready) syscall via C1. Plus a trivial **blocking `Io`**. | pure sx around syscall `extern`s | L |
|
||||
| **A4** | **Stdlib I/O rework** — fs/socket/process take/use `context.io` instead of raw blocking syscalls, so existing calls participate in async. | mirrors the allocator-threading rule | M |
|
||||
| **A5** | **Schedulers — M:1 → N×(M:1) → M:N, all sx std-lib `Io` vtables (committed; M:N last, not deferred).** M:1 first (minimal vehicle to validate the colorblind stack; covers I/O-bound). N×(M:1) = first parallel step (per-thread M:1 loops + `std/thread.sx` spawn; shared state uses N1 atomics — expected under parallelism, not a wart). M:N work-stealing last (most machinery: thread-safe steal queues + migration + errno/TLS discipline). All over N1 atomics + the A2 asm context-switch + `extern` syscalls. **pinning** API for thread-affine work (UI main thread, GL context). | see §4.3 | M (M:1) / M (N×M:1) / L (M:N) |
|
||||
|
||||
### 4.1 How control enters sx (the colorblind model)
|
||||
|
||||
- **sx→sx is ordinary.** The whole call chain lives on the fiber stack; a suspend
|
||||
at a leaf `io.*` freezes the native stack verbatim. No frame knows it suspended.
|
||||
**Zero special handling at call boundaries** — that's the point.
|
||||
- **Three inbound boundaries** where the runtime enters sx:
|
||||
1. **Task entry** (`io.async(fn)`) — a trampoline starts `fn` on a fresh fiber
|
||||
stack via the normal calling convention.
|
||||
2. **Resumption** — a context-switch (asm), *not* a call; sx continues mid-stack.
|
||||
3. **C callback → sx** — must be `export`/`callconv(.c)`; runs on the event-loop
|
||||
stack (not a fiber) so it **cannot itself suspend** — it may resume/enqueue a
|
||||
fiber or run a non-suspending sx fn to completion (leaf-only).
|
||||
|
||||
### 4.2 `context` is fiber-local (the key obligation)
|
||||
|
||||
`context.io`/`context.allocator`/the `push Context` stack are dynamically scoped.
|
||||
Fibers time-share OS threads (and **migrate** under M:N), so `context` must travel
|
||||
**with the fiber** — saved/restored on every context-switch — **never a raw TLS
|
||||
read.** A spawned task snapshots the spawner's context, then evolves its own
|
||||
`push Context` stack. This is the CLAUDE.md "capture your owning allocator" rule one
|
||||
level up: ambient state that outlives a suspension point must be carried by the
|
||||
fiber.
|
||||
|
||||
### 4.3 Threads & the two hazard classes (why atomics)
|
||||
|
||||
| Model | Parallelism | Migration | Hazards |
|
||||
|-------|-------------|-----------|---------|
|
||||
| **M:1** (1 OS thread) | none | none | cooperative, race-free — simplest |
|
||||
| **N×(M:1)** (per-thread schedulers, no migration) | yes | none | **data races** on shared state → atomics/locks |
|
||||
| **M:N** (work-stealing) | yes | yes | data races **+** TLS-migration hazards |
|
||||
|
||||
- **Parallelism hazard** (any N>1): shared mutable state races → needs **N1
|
||||
atomics** + N3 fiber-aware sync. The M:1 "no locks" simplicity is gone.
|
||||
- **Migration hazard** (M:N only): a fiber that moves threads across a suspend
|
||||
reads the *wrong* thread's TLS. **`errno` must be captured immediately** after
|
||||
each syscall; **`context` must be fiber-local** (§4.2) — non-negotiable under M:N.
|
||||
- **Pinning** (`io.pinToThread()`): some work must stay put — the **UI main
|
||||
thread** (UIKit/macOS/Android — directly the app targets in §6), OpenGL
|
||||
current-context, TLS-using FFI. M:N needs a "don't migrate / main-thread-only"
|
||||
fiber attribute (Go's `LockOSThread`).
|
||||
|
||||
### 4.4 Pure-sx boundary
|
||||
|
||||
Everything is sx except the irreducible FFI floor: the **asm context-switch**
|
||||
(per-arch, in `.sx`), **syscall `extern`s** (kernel-implemented, like any libc
|
||||
binding), and **raw stack memory** (`mmap`). The schedulers, event loops, futures,
|
||||
cancellation, and sync primitives are ordinary sx. Payoff: **swappable `Io`
|
||||
vtables** — blocking, io_uring, kqueue, a **mock `Io`** for tests, a
|
||||
**deterministic-simulation `Io`** (fake clock, scripted readiness) for reproducible
|
||||
concurrency tests — all libraries.
|
||||
|
||||
### 4.5 Comptime async = blocking `Io`
|
||||
|
||||
At comptime install the **blocking `Io`**: `io.*` just blocks; no fibers, no
|
||||
scheduler, no suspend. Same source, different vtable. The interpreter never needs
|
||||
suspend/resume, and the FFI (C1) needs no async awareness. This is *why* the
|
||||
colorblind model resolves comptime async for free.
|
||||
|
||||
### 4.6 Syntax surface (grounded against the grammar)
|
||||
|
||||
All of the concurrency/atomics surface lands on **existing** sx grammar — `enum`
|
||||
tagged unions + `if x == { case … }` match ([specs.md:364,408](../specs.md#L408)),
|
||||
first-class **tuples** with named fields ([specs.md:815-852](../specs.md#L815)),
|
||||
`=>` closures, `struct($T)` generics, `callconv(...)`, and the ERR keywords
|
||||
(`try`/`catch`/`onfail`/`raise`/`error`). `race`/`async`/`await`/`atomic` are **not
|
||||
reserved words** ([specs.md:168](../specs.md#L168)), so they stay library
|
||||
types/methods — no keyword additions. One genuinely-new compiler capability is
|
||||
required (see end).
|
||||
|
||||
**Atomics (N1) — generic wrapper type.**
|
||||
```sx
|
||||
Ordering :: enum { relaxed; acquire; release; acq_rel; seq_cst; }
|
||||
Atomic :: ($T: Type) -> Type #builtin; // atomicity carried by the type
|
||||
|
||||
counter : Atomic(i64) = .init(0);
|
||||
counter.store(0, .relaxed);
|
||||
n := counter.load(.acquire);
|
||||
prev := counter.fetch_add(1, .seq_cst); // + fetch_sub/and/or/xor (min/max: open)
|
||||
old := counter.swap(42, .acq_rel);
|
||||
got := counter.compare_exchange(old, new, .acq_rel, .acquire); // strong → ?T (null = success)
|
||||
got2 := counter.compare_exchange_weak(old, new, .acq_rel, .acquire); // may fail spuriously; for retry loops
|
||||
fence(.seq_cst);
|
||||
```
|
||||
- CAS takes **two orderings** (success, failure); failure ordering may not be
|
||||
`release`/`acq_rel` nor stronger than success — enforce in the compiler.
|
||||
- Weak vs strong matters on **aarch64** (LL/SC) — weak in a loop is the idiom;
|
||||
both compile identically on x86.
|
||||
|
||||
**Channels (N3) — methods only (no `<-`); `recv` returns a tagged union (not `(v, ok)`).**
|
||||
```sx
|
||||
RecvResult :: enum($T: Type) { value: T; closed; } // ordinary generic enum (not the race-synthesized union)
|
||||
TryResult :: enum($T: Type) { value: T; empty; closed; } // non-blocking: 3 states a bool can't express
|
||||
|
||||
ch := Channel(i64).make(16); // capacity; .make() unbuffered
|
||||
ch.send(v);
|
||||
if ch.recv() == { case .value: (v) { use(v); } case .closed: { /* drained */ } }
|
||||
ch.close();
|
||||
// ergonomic layer: `for ch (v) { … }` consumes until closed, hiding RecvResult
|
||||
```
|
||||
|
||||
**Fiber-aware locks (N3) — explicit lock + `defer` (no guard sugar).**
|
||||
```sx
|
||||
m : Mutex;
|
||||
m.lock(); defer m.unlock();
|
||||
```
|
||||
|
||||
**Futures & spawn (A1).**
|
||||
```sx
|
||||
f := context.io.async(worker, arg); // Future(R)
|
||||
r := f.await(); // suspends this fiber
|
||||
f.cancel();
|
||||
d := context.io.timeout(5000); // a Future too — raceable like any other
|
||||
```
|
||||
|
||||
**Pinning (A5) — spawn attribute, accepts a thread handle.**
|
||||
```sx
|
||||
PinTarget :: enum { any; main; on: Thread; } // default = .any (may migrate)
|
||||
f := context.io.async(render, pin = .main);
|
||||
f := context.io.async(worker, pin = .on(some_thread));
|
||||
```
|
||||
|
||||
**`race` (Zig model — over futures, named tuple in → synthesized tagged-union out).**
|
||||
The input is a **named tuple** (positional also allowed → `.0`/`.1` tags); the
|
||||
result is an anonymous tagged union whose variants mirror the tuple's labels, each
|
||||
payload = that field's `Future(T)` projected to `T`. Losers are **cancelled and
|
||||
joined** before `race` returns (structured).
|
||||
```sx
|
||||
fa := context.io.async(read_a, conn); // Future(A)
|
||||
fb := context.io.async(read_b, conn); // Future(B)
|
||||
|
||||
winner := context.io.race((a: fa, b: fb)); // RaceResult = enum { a: A; b: B }
|
||||
if winner == {
|
||||
case .a: (v) { handle_a(v); } // v : A
|
||||
case .b: (v) { handle_b(v); } // v : B
|
||||
}
|
||||
// positional form: race((fa, fb)) → tags .0 / .1
|
||||
```
|
||||
The Go-style handler-map and the map literal that propped it up are **dropped** —
|
||||
`race` over futures subsumes select, and cancellation handles the losers.
|
||||
|
||||
**Cancellation rides ERR.** A cancelled `io.*` **raises**; the fiber unwinds
|
||||
through `defer`/`onfail` (`try`/`catch`/`raise` are real keywords). Cancellation is
|
||||
**cooperative** (observed only at suspend points — every `io.*` is a cancellation
|
||||
point) and **structured** (`race` joins losers' teardown before returning). No
|
||||
parallel unwind path — it reuses the error channel.
|
||||
|
||||
**Context switch (A2).**
|
||||
```sx
|
||||
swap_context :: (from: *Fiber, to: *Fiber) callconv(.naked) {
|
||||
asm { /* save callee-saved + SP into *from; load from *to; ret */ };
|
||||
}
|
||||
```
|
||||
`callconv(.naked)` ≠ `callconv(.c)`: **no prologue/epilogue/frame** — required
|
||||
because a context switch deliberately makes SP-in ≠ SP-out (a `.c` epilogue would
|
||||
restore from the wrong stack). Body is a single `asm` block; you emit your own
|
||||
`ret`. Args arrive in ABI registers, read directly from asm.
|
||||
|
||||
**One new compiler capability (gates `race`):** *comptime tuple→tagged-union
|
||||
synthesis.* Reflection today only **reads** types (`field_count`/`field_name`/
|
||||
`type_of`); `RaceResult(T)` must **construct** an anonymous `enum` from a tuple's
|
||||
`(label, payload-type)` pairs. Supporting pieces: a `field_type($T, i) -> Type`
|
||||
reflection accessor (we have value-level `field_value` + `type_of`, but type-only
|
||||
field projection is missing) and `Future(T) → T` projection (falls out of
|
||||
generics). This is the generic "derive a sum from a product" — useful beyond
|
||||
`race`.
|
||||
|
||||
---
|
||||
|
||||
## 5. Dev loop / hot-reload
|
||||
|
||||
| ID | Piece | Notes | Depends | Size |
|
||||
|----|-------|-------|---------|------|
|
||||
| **R1** | **Hot-reload (dylib swap)** — host owns `State`+allocator; reloadable module is a `.dylib` with a fixed `export` interface; watch→rebuild→`dlopen`→rebind→`dlclose`. State survives (host-owned). | leans on `export` (shipped); sidesteps S2; native | — | M |
|
||||
| **R2** | **Hot-reload (JIT-resident)** — program runs under S1's LLJIT; reloadable calls route through ORC indirection stubs, repointed on change. Finer granularity; same spine. | | S1, S2 | L |
|
||||
| **R3** | **Incremental compilation** — dependency tracking + recompile-only-changed. Perf enabler; coarse per-file v1 suffices first. | | — | L |
|
||||
|
||||
**Core rule:** the data that must survive a reload cannot be owned by the code that
|
||||
reloads. Code/state separation — the CLAUDE.md owning-allocator discipline, one
|
||||
level up.
|
||||
|
||||
**Residue — state migration on layout change:** body-only changes hot-swap;
|
||||
layout/signature/global-type changes are **detected** (compare new vs running
|
||||
`State` layout via `types.zig`) and trigger **rebuild+restart**. Migration hooks
|
||||
(`on_reload(old)→new`) are a hard later item. Design against *silent* corruption.
|
||||
|
||||
---
|
||||
|
||||
## 6. Cross-platform (mostly landed) — from a macOS laptop
|
||||
|
||||
### 6.1 Landed
|
||||
|
||||
| Capability | State | Reach from a mac |
|
||||
|---|---|---|
|
||||
| `extern`/`export` C linkage | done (replaced `#foreign`) | all targets |
|
||||
| Bundled-`zig cc` cross-link backend | Phases 0–2 done; packaging pending | **macOS, Linux(-musl/static), Windows(-gnu)** verified |
|
||||
| sx-side bundler (`.app`/`.apk`) | done | macOS, iOS sim/device, Android |
|
||||
| JIT `sx run` (ORC LLJIT) | done | host |
|
||||
| Target shorthands | done | `macos[-arm]`, `linux[-musl[-arm]]`, `windows[-gnu]`, `ios[-arm]`, `ios-sim[-arm/-x86]`, `android[-arm64/-x86_64]`, `wasm` |
|
||||
|
||||
### 6.2 Workflows
|
||||
|
||||
```sh
|
||||
# macOS (native): inner loop is JIT; ship is Mach-O / .app
|
||||
sx run app.sx
|
||||
sx build app.sx -o app
|
||||
sx build app.sx --bundle MyApp.app
|
||||
|
||||
# Linux (cross, landed killer feature): static, zero-dep ELF
|
||||
sx build app.sx --target linux-musl -o app # scp anywhere, runs
|
||||
|
||||
# Windows (cross, landed, MinGW path): PE32+
|
||||
sx build app.sx --target windows-gnu -o app.exe # cf. example 1660 (win32)
|
||||
|
||||
# iOS simulator (mac-only host)
|
||||
sx build app.sx --target ios-sim --bundle App.app
|
||||
|
||||
# iOS device — signing threaded via the build program (BuildOptions setters)
|
||||
# #run { o := build_options(); o.set_bundle_id(...); o.set_codesign_identity(...);
|
||||
# o.set_provisioning_profile(...); }
|
||||
sx build build.sx --target ios --bundle App.app
|
||||
|
||||
# Android (cross + bundle): javac → d8 → aapt2 → zipalign → apksigner, then adb
|
||||
sx build app.sx --target android --apk app.apk
|
||||
```
|
||||
|
||||
### 6.3 Where the roadmap lights up cross-platform
|
||||
|
||||
- **C1 + C4** → the iOS/Android **bundlers** (orchestrate ~a dozen host tools at
|
||||
comptime; biggest win; always host-arch so no cross-arch risk).
|
||||
- **R1/R2 + A1–A5** → the **inner dev loop for non-host targets**: push-a-dylib +
|
||||
remote-trigger-reload over an async laptop↔device channel — a capability that
|
||||
*doesn't exist today* short of full rebuild+reinstall.
|
||||
- **A1/A2 colorblind `Io`** → the dev tooling is itself async, and the **same
|
||||
networking code runs blocking inside the bundler** (`adb push`) and async in the
|
||||
live session — no coloring.
|
||||
- **Pinning (A5)** → the UI render fiber pins to the main OS thread on every app
|
||||
target.
|
||||
|
||||
**The single hard constraint the matrix exposes:** cross builds mean target arch ≠
|
||||
host arch, so **C3's residue bites** — comptime/`#run` code reaching *target-arch*
|
||||
inline asm can't execute on the mac. Native macOS dev never hits it; every cross
|
||||
target must gate comptime asm to host-arch (`when host_arch == …`) or get a loud
|
||||
diagnostic.
|
||||
|
||||
---
|
||||
|
||||
## 7. Linear build sequence (async-first — no parallel streams)
|
||||
|
||||
Single ordered list; deps satisfied at every step. **Async-first** (user-chosen): the
|
||||
async story needs no JIT spine (syscalls use the existing trampoline FFI; comptime
|
||||
async = blocking `Io`), so the FFI/JIT cluster comes *after*. C4 is omitted (dropped —
|
||||
an S1 optimization if ever profiled). Net-new compiler prereqs (per the codebase
|
||||
grounding) are explicit steps, not buried.
|
||||
|
||||
**Foundations — compiler primitives the async story needs (all net-new):**
|
||||
1. **N1 — Atomics lowering.** IR/inference scaffolding exists; add LLVM
|
||||
`atomicrmw`/`cmpxchg`/`fence` emission + orderings. Surface = `Atomic($T)` wrapper.
|
||||
Gates channels/N3 + parallel schedulers.
|
||||
2. ~~**Generic enums** `enum($T)`~~ **DROPPED.** `RecvResult($T)`/`TryResult($T)` are
|
||||
**type-fns over `reify`** (step 3), not a new `enum($T)` language feature — and
|
||||
type-fns (user `($T)->Type` in type position) **already work** (e.g.
|
||||
[`Make`](../examples/0208-generics-value-param-type-function.sx),
|
||||
[`Complex`](../examples/0201-generics-generic-struct.sx)). A declarative `enum($T)`
|
||||
surface, if ever wanted, is later *sugar* desugaring to a type-fn-over-`reify`.
|
||||
3. **`type_info` + `reify` + `field_type`** — comptime metaprogramming floor. Gates
|
||||
`race` synthesis **and** channel `RecvResult`/`TryResult` (all type-fns over
|
||||
`reify`; **generic-enum syntax dropped**). **Validated against the codebase (3
|
||||
reviewers): a small extension reusing existing machinery throughout — not net-new
|
||||
architecture.** Five contracts:
|
||||
1. **Nominal identity via type-fn memoization** — type-fns dedup by mangled
|
||||
`(fn,args)` name (generic.zig:1620-1629) + reify `findByName`, so `RecvResult(i64)`
|
||||
is one `TypeId` and the body runs once. (NOT structural dedup — enums are
|
||||
nominal via `nominal_id`, types.zig:1110.)
|
||||
2. **Functional through codegen** — layout / construct / match+exhaustiveness /
|
||||
`toLLVMType` / `type_name`+format are **all type-table-driven, zero AST
|
||||
coupling**, so a backing-decl-less reify'd enum flows through unmodified.
|
||||
3. **Validate loudly** at the single `intern`/`internNominal` choke point
|
||||
(types.zig:411-439): reject dup variants / bad backing / unresolved payloads.
|
||||
4. **Comptime-only, JIT-free** — a type-table op in the interp; no S1 dependency
|
||||
(keeps reify, hence channels + `race`, off the JIT critical path).
|
||||
5. **Reference-based self-reference (v1)** — `*Self`/`[]Self` payloads via the
|
||||
reserve-placeholder→complete path recursive *source* types already use
|
||||
(nominal.zig:86/108/120, types.zig:442); **by-value recursion rejected** (loud,
|
||||
infinite size). reify gains a `reify_rec((self) => …)` builder form.
|
||||
- **Type-minting precedents (7):** monomorphization, protocol vtables, tuples,
|
||||
vector/array, ptr/slice ctors, FFI stubs, **type-fn instantiation** — all
|
||||
construct `TypeInfo` programmatically + `intern()`. **Residual = plumbing, not
|
||||
capability:** name reify-results by the instantiation's mangled name (done for
|
||||
inline-struct bodies — extend to reify-results) + reify input validation.
|
||||
4. **`callconv(.naked)`** — extend `CallConv {default, c}` (types.zig:169) + skip
|
||||
prologue/epilogue lowering. Gates A2.
|
||||
5. **Repointable-`context` codegen** — lower `context` as a swappable indirection
|
||||
(never raw TLS) + per-fiber stack-limit. Compiler obligation; gates A2 *and*
|
||||
cross-fiber `context.io` correctness. (Reviewer note: this is a **prerequisite**
|
||||
of A2, not a successor.)
|
||||
|
||||
**Async runtime — sx lib over the primitives:**
|
||||
6. **A1 — `Io` interface + `context.io` + `Future` + `cancel()` API.**
|
||||
7. **A2 — fiber runtime** (naked context-switch asm, bootstrap, `mmap` stacks).
|
||||
8. **A3 — blocking `Io` → deterministic-sim `Io` (keystone, calibrated) → event-loop `Io`.**
|
||||
9. **A5·M:1 — single-thread scheduler.**
|
||||
10. **N3 — fiber-aware sync** (channels/mutex/waitgroup; `recv → RecvResult`).
|
||||
11. **A6 — Cancellation.** `.canceled` in the `!` channel (model a); per-fiber atomic
|
||||
flag (N1); every `io.*` a cancellation point; structured cancel-and-join; **masked
|
||||
during cleanup**.
|
||||
12. **A4 — stdlib I/O rework** (fs/socket/process onto `context.io`).
|
||||
13. **A5·N×(M:1)** — first parallel (errno-capture + `context`-fiber-local discipline).
|
||||
14. **A5·M:N** — work-stealing (steal queues + migration + pinning).
|
||||
|
||||
**Then comptime / FFI / JIT cluster:**
|
||||
15. **S1 — persistent JIT spine** → 16. **C1 — real FFI (LLVM = ABI authority, on S1)**
|
||||
→ 17. **C2 — `#compiler`→`extern`** → 18. **C3 — comptime asm** (S1 + C1; +S2 if
|
||||
TLS/ctors).
|
||||
|
||||
**Deferred tail:**
|
||||
19. **S2 — ORC C++ shim** (highest-risk — see §8; macOS `MachOPlatform`; ELF/COFF
|
||||
unplanned) → 20. **R1 — dylib reload** (shipped `export`) → 21. **R2 —
|
||||
JIT-resident reload** (S1 + S2; **↔ async live-fiber coupling**, §8) → 22. **R3 —
|
||||
incremental compilation**.
|
||||
|
||||
Hard edges to remember: **C1 depends on S1** (the non-trivial FFI cases); **C3 depends
|
||||
on C1** (calls through its thunk path); **R1/R2 couple to the async runtime** (can't
|
||||
hot-swap code with live suspended fibers — runtime + long-lived fibers stay
|
||||
persistent, only leaf logic reloads).
|
||||
|
||||
---
|
||||
|
||||
## 8. Irreducible hard problems (detect-and-degrade, don't pretend)
|
||||
|
||||
1. **State migration across layout change** (R1/R2) → v1 detects + rebuild/restart;
|
||||
migration hooks later.
|
||||
2. **Cross-arch comptime asm** (C3) → can't run on host; narrows the bail + loud
|
||||
diagnostic; gate to host-arch.
|
||||
3. **M:N migration hazards** (A5) → errno-capture discipline + fiber-local context
|
||||
(mandatory), pinning for thread-affine work.
|
||||
|
||||
### 8.1 Highest technical risks (from review — ranked, async-first lens)
|
||||
|
||||
1. **A2 context-switch correctness** (in the async critical path). Silent stack
|
||||
corruption, per-arch, **untestable by the deterministic-`Io` harness** (it tests
|
||||
*scheduling*, not the *switch*); a one-register slip is invisible until it crashes
|
||||
on the right arch. Couples *library asm* to the *compiler ABI* — ABI drift breaks
|
||||
it silently later. → needs a dedicated **switch-stress test** (§10).
|
||||
2. **`reify` → anonymous-tagged-union → match-codegen** (gates `race` + channels).
|
||||
**DE-RISKED by review** (§7 step 3): all enum stages are type-table-driven with
|
||||
zero AST coupling, identity is handled by existing type-fn mangled-name memoization,
|
||||
and forward-declaration for self-ref already exists. Residual is *plumbing*
|
||||
(name reify-results by mangled name + input validation), not new architecture.
|
||||
3. **Deterministic-`Io` is the test keystone yet itself uncalibrated** — a buggy
|
||||
deterministic scheduler yields deterministic-*wrong* stdout that snapshots lock in.
|
||||
→ calibrate against the blocking `Io` / property-test fixed order (§10).
|
||||
4. **`context`-fiber-local + errno discipline** (A5 M:N). "Non-negotiable" but
|
||||
enforced by manual rule, not the compiler; M:1 can't even exercise migration.
|
||||
5. **S2 ORC shim** (deferred, but highest-risk when reached): only C++ in the tree,
|
||||
**already failed a spike** (`_Thread_local` SIGABRT), `MachOPlatform` is
|
||||
macOS-specific — **Linux/Windows JIT-resident reload + non-Mac TLS/ctor JIT have no
|
||||
named plan**. One "M" box hides a per-OS effort.
|
||||
6. **C1 args-buffer layout-vs-ABI** — "LLVM emits the call" covers the *call*, not the
|
||||
interpreter's *buffer pack* from `type_info`. Disagreement on edge layouts
|
||||
(over-aligned/empty structs, aarch64 small-struct register splitting, `bool`) =
|
||||
silent comptime corruption. → adversarial layout cases (§10).
|
||||
|
||||
---
|
||||
|
||||
## 9. Decisions log (all resolved)
|
||||
|
||||
**Sequencing — locked:** **async-first** (§7). The async cluster (steps 1–14)
|
||||
precedes the FFI/JIT cluster (15–18) because async needs no JIT spine. **Cancellation
|
||||
(A6) = model (a)** — a `.canceled` variant in the **existing `!` error channel** that
|
||||
`io.*` already returns (I/O is inherently fallible, so `io.*` is already `!`-typed —
|
||||
the "keep calls clean" argument for the non-local-`raise` model is moot). Reuses
|
||||
`!`/`try`/`catch`/`onfail`; no new unwind primitive. **Net-new prereq surfaced by
|
||||
grounding:** `callconv(.naked)` (only `.default`/`.c` today). **Generic enums dropped**
|
||||
— `RecvResult($T)`/`TryResult($T)` are **type-fns over `reify`** (type-fns already work
|
||||
in type position, e.g. `Make`/`Complex`), so no `enum($T)` feature is needed; `reify`
|
||||
gains two contracts (deterministic identity + functional-enum output, §7 step 3).
|
||||
|
||||
**Locked (see §4.6 for the grounded surface):**
|
||||
- **N1 atomics surface = generic wrapper `Atomic($T)`** + `Ordering` enum, `.init`,
|
||||
`compare_exchange`/`_weak` returning `?T` (**null = success** — pinned, opposite of
|
||||
most priors). (Not `@atomic_*` builtins — `@` is address-of in sx.) **RMW set** =
|
||||
`add/sub/and/or/xor/swap` + `fetch_min`/`fetch_max` (free from LLVM); **no `nand`**.
|
||||
- **`race` = over futures** (Zig model), **single named-tuple in** (`race((a: fa, b:
|
||||
fb))`) → synthesized tagged-union out; Go-style handler-map + map literal
|
||||
**dropped**. **No `async` spawn-sugar** — always `context.io.async(...)`.
|
||||
- **Channels** = `send`/`recv` methods (no `<-`); **`recv` returns a tagged union**
|
||||
`RecvResult($T){ value; closed }` (not `(v, ok)`), `try_recv` → `{ value; empty;
|
||||
closed }`; optional `for ch (v) {…}` iteration sugar. **locks** = `lock()` + `defer
|
||||
unlock()` (no guard sugar). `race`/`async`/`await` stay library, not keywords.
|
||||
- **Comptime type metaprogramming = `type_info` + `reify` builtins only** (Zig
|
||||
`@typeInfo`/`@Type` model). **Everything else is sx lib** — `make_enum`,
|
||||
`field_type`, `RaceResult`. `reify` coverage starts at **enum/struct/tuple**, grows
|
||||
later. `Future($T)` exposes `Value :: T` so `Future(X)→X` is plain member access
|
||||
(no `type_arg` builtin).
|
||||
- **C1 FFI engine = LLVM as single ABI authority** — per-signature JIT calling-thunks
|
||||
via S1 (LLVM emits the ABI-correct call, same as runtime codegen); trampoline
|
||||
fast-path for trivial calls. **libffi/dyncall + hand-rolled-sx rejected** (2nd/3rd
|
||||
ABI impl; hand-rolled needs C3 for its own asm leaf anyway). Promotes **S1 to
|
||||
foundational** (shared by C1, C3).
|
||||
|
||||
**Scheduler (Decision 5) — locked:** **M:1 → N×(M:1) → M:N**, all **sx std-lib `Io`
|
||||
vtables** (compiler only provides N1 atomics + the A2 asm context-switch + `extern`
|
||||
syscalls). M:1 ships first (validates the colorblind stack, covers I/O-bound);
|
||||
N×(M:1) is the first parallel step; **M:N is last in sequence but committed — not
|
||||
deferred.** Data races under parallelism are expected and handled with atomics +
|
||||
fiber-aware sync — that *is* parallelism, not a wart; M:1's lock-freedom is just a
|
||||
property of the single-threaded case.
|
||||
|
||||
**Deferred, orthogonal additions (Decisions 6–7) — both addable later without
|
||||
revisiting anything locked:**
|
||||
- **C4 (Decision 6) — fully orthogonal; not built now.** Pure deferred optimization
|
||||
riding S1 (already present for C1/C3): JIT the bundler subgraph instead of
|
||||
interpreting it. Zero coupling — same bundler sx, same C1 FFI. Apply only if
|
||||
profiling ever shows the bundler's *own logic* is a hotspot (it's I/O-bound, so
|
||||
unlikely). Interp+C1 is the shipping bundler.
|
||||
- **Hot-reload (Decision 7) — deferred; mechanism additive.** Substrate ready: R1
|
||||
(dylib-swap) needs only shipped `export`; R2 (JIT-resident) needs S1 + the S2 ORC
|
||||
shim. **R1-vs-R2 chosen at pickup.** One coupling (a design constraint, not a
|
||||
decision change): you can't hot-swap code with **live suspended fibers** pointing
|
||||
into the old module — so the async runtime + long-lived fibers stay on the
|
||||
*persistent* side, only transient **leaf logic** is reloadable (or quiesce fibers
|
||||
before swap).
|
||||
|
||||
---
|
||||
|
||||
## 10. Testing & gates
|
||||
|
||||
Inherits the project cadence (CLAUDE.md): `zig build && zig build test` after every
|
||||
step; **xfail-then-green or behavior-lock — no commit both adds a test AND makes it
|
||||
pass**; never regenerate snapshots while red; corpus = `examples/` + `issues/` with
|
||||
`.exit`/`.stdout`/`.stderr`/`.ir` snapshots. Per-*step* gates live in the eventual
|
||||
`PLAN-*` streams; this section is the design-level verification strategy that those
|
||||
streams must implement.
|
||||
|
||||
### 10.1 The async test harness = the deterministic-simulation `Io` (the keystone)
|
||||
|
||||
Concurrency is nondeterministic (scheduling/readiness order), which **breaks snapshot
|
||||
testing** outright. So the **deterministic-sim `Io`** (fixed clock, scripted
|
||||
readiness, deterministic single-stepping scheduler) is not merely a feature — it is
|
||||
**the test harness for everything async**. Every concurrency example runs under it →
|
||||
reproducible stdout → snapshottable. Consequence for sequencing: **build the
|
||||
deterministic `Io` right after the blocking `Io`** (it's the simplest scheduler after
|
||||
blocking and it *gates the ability to test* fibers/channels/race/schedulers at all).
|
||||
The 10 patterns in §4.6-adjacent examples become corpus tests only because they run
|
||||
under it.
|
||||
|
||||
### 10.2 What is NOT snapshot-testable
|
||||
|
||||
True parallel **data races** (N×M:1 / M:N) are nondeterministic by construction. They
|
||||
run under the deterministic `Io` for *correctness* repro, but race-detection needs a
|
||||
separate **stress harness** (run-N-times / TSan-style), **not** the corpus. Any such
|
||||
coverage bound must be stated loudly (a `log()`-style note in the harness), never
|
||||
silently skipped — per the REJECTED-PATTERNS rule against silent gaps.
|
||||
|
||||
### 10.3 Arch-sensitive lowering — atomics + context-switch
|
||||
|
||||
Atomic orderings lower differently per arch (x86 `lock`-prefix / plain MOV vs aarch64
|
||||
LL/SC / `ldar`/`stlr`), and the A2 context-switch is per-arch asm. Lock both with the
|
||||
**existing inline-asm cross-arch sibling pattern**: a `.build` `{"target": "…"}`
|
||||
sidecar runs **ir-only** on a non-matching host (asserts `.ir` + `.exit` + `.stderr`
|
||||
from `sx ir --target`) and **end-to-end** on a matching CI runner. So `Atomic`
|
||||
lowering carries **x86_64 + aarch64 `.ir`** snapshots; the context-switch gets
|
||||
per-arch run tests on matching runners.
|
||||
|
||||
### 10.4 New corpus categories
|
||||
|
||||
`17xx` atomics · `18xx` concurrency (fibers/channels/race/async, all under the
|
||||
deterministic `Io`). Comptime metaprogramming (`type_info`/`reify`) + comptime-asm
|
||||
extend `06xx`; C1 FFI extends `12xx`; the cross-arch comptime-asm **loud bail** and
|
||||
the cancellation diagnostics are `11xx`.
|
||||
|
||||
### 10.5 Per-piece gates (design level)
|
||||
|
||||
| Piece | Locks via |
|
||||
|---|---|
|
||||
| **N1 atomics** | unit `emit_llvm.test.zig` (LLVM `atomicrmw`/`cmpxchg`/`fence` + ordering emission); corpus `17xx` single-thread (deterministic); arch-gated `.ir` (x86_64 + aarch64) |
|
||||
| **type_info / reify** | unit (reflect round-trips; reify'd enum has correct layout/match codegen); corpus `06xx` comptime (deterministic) |
|
||||
| **C1 FFI** | **behavior-lock** existing trampoline cases first; then xfail→green `12xx` comptime extern with floats / structs-by-value / aggregate (`{ptr,len}`) returns; unit for thunk-synth + args-buffer marshal |
|
||||
| **S1 spine** | infra — exercised transitively via C1/C3 examples; unit for LLJIT lifecycle + thunk cache |
|
||||
| **C3 comptime asm** | corpus `06xx` host-arch `#run` asm computes a value; `11xx` diagnostic asserts the cross-arch loud bail |
|
||||
| **A1/A2 fibers** | unit (scheduler step, fiber bootstrap); context-switch arch-gated run tests; corpus `18xx` under deterministic `Io` |
|
||||
| **A3/A5 schedulers, channels, race, cancel** | corpus `18xx` (the 10 patterns) under deterministic `Io` → deterministic snapshots; cancellation cleanup (`onfail`/`defer`) asserted via stdout ordering |
|
||||
|
||||
### 10.6 Cadence example (atomics, N1)
|
||||
|
||||
1. **xfail** — add `examples/17xx-atomics-fetch-add.sx` using `Atomic(i64).fetch_add`; seed the `.exit` marker → **red** (codegen missing). *(test added, not yet passing)*
|
||||
2. **green** — emit LLVM `atomicrmw add` + ordering; example passes; capture `.stdout` + x86_64/aarch64 `.ir` snapshots; review the diff. *(makes it pass, no new test)*
|
||||
|
||||
This satisfies "no commit both adds a test and makes it pass," and every other piece
|
||||
follows the same xfail→green (or behavior-lock→extend) shape.
|
||||
|
||||
### 10.7 Review-surfaced gaps (the high-corruption-risk pieces need *correctness*, not existence, tests)
|
||||
|
||||
The §10.5 gates prove things *run*; the §8.1 risks are silent-corruption modes a
|
||||
run/snapshot test won't catch. Each needs an explicit adversarial gate:
|
||||
|
||||
- **A2 context-switch — switch-stress test.** Scribble *every* callee-saved register
|
||||
+ a stack-canary before suspend; deep/recursive fiber chains; verify all survive
|
||||
post-resume. Run/snapshot tests don't prove register preservation. (The single
|
||||
highest-corruption-risk piece, §8.1.1.)
|
||||
- **Deterministic-`Io` — calibrate the oracle.** Cross-check a handful of cases
|
||||
against the blocking `Io` and property-test that scheduling order is actually fixed,
|
||||
*before* trusting it to gate everything async (a deterministic-but-wrong scheduler
|
||||
snapshots garbage).
|
||||
- **`context`-fiber-local invariant — named test at the N×M:1/M:N step.** M:1 can't
|
||||
exercise migration; add a test that forces a fiber to migrate and asserts it reads
|
||||
*its* `context`/`errno`, not the new thread's.
|
||||
- **N1 ordering *semantics* are out of snapshot scope — state it loudly.** `.ir`
|
||||
snapshots prove the *keyword emitted*, not weak-memory correctness (e.g. `relaxed`
|
||||
where `acquire` was needed ships green). Declare this out-of-scope parallel to
|
||||
§10.2's race carve-out; lock-free structures need the stress harness.
|
||||
- **C1 args-buffer — adversarial layout cases.** Over-aligned structs, empty structs,
|
||||
aarch64 small-struct register splitting, `bool` — a wrong layout that happens to
|
||||
print right passes a stdout test. Call these out explicitly, not just
|
||||
"structs-by-value."
|
||||
- **S2 — has no gate today despite a prior spike failure.** When reached, add a TLS +
|
||||
C-constructor JIT test (the exact `_Thread_local` SIGABRT case), per host OS.
|
||||
- **Hot-reload — no row today.** When picked up: state-survival test + the
|
||||
live-suspended-fiber-into-stale-module hazard (R1/R2).
|
||||
@@ -549,6 +549,40 @@ Lexer/token: add `kw_asm` to the `Token.Tag` enum + keyword `StaticStringMap` in
|
||||
* Every `%[name]` referenced in the template must name an operand (best surfaced as
|
||||
a Sema diagnostic; also caught at codegen during the rewrite — §II.6).
|
||||
|
||||
### Operand naming rule (auto-name from a `{reg}` pin) — DECIDED
|
||||
|
||||
The `[name]` label on an operand is purely an sx-surface convenience: it provides
|
||||
the `%[name]` template alias and (for `out_value`) the result tuple's field name.
|
||||
LLVM never sees it (it sees positional `${N}` + the constraint). To kill the
|
||||
common redundancy where a label just echoes its pinned register
|
||||
(`[eax] "={eax}"`), the **operand name is derived as follows**, uniformly across
|
||||
every operand kind (`out_value` / `out_place` / read-write / `input`):
|
||||
|
||||
1. **Explicit `[name]` wins** — use it verbatim (the `%[name]` alias / field name).
|
||||
2. **Else, if the constraint pins a single register** — `"={eax}"`, `"{rdi}"`,
|
||||
`"+{rax}"`, i.e. a `{reg}` body (optionally with a `=`/`+` prefix) — the operand
|
||||
is **auto-named after that register** (`eax`, `rdi`, `rax`). Usable as
|
||||
`%[eax]` and as the tuple field name.
|
||||
3. **Else (register-class `=r`/`+r`/`r`, or memory `=m`, …)** — the operand has
|
||||
**no implicit name**. A `[name]` is then **required** if the template
|
||||
references it (`%[name]`) or, for `out_value`, if a named result field is
|
||||
wanted; otherwise it is anonymous (positional tuple field).
|
||||
|
||||
Corollaries:
|
||||
|
||||
* **Reject the echo form.** An explicit `[name]` that is identical to the
|
||||
register its own constraint pins (`[eax] "={eax}"`) carries no information —
|
||||
emit a diagnostic ("redundant operand name `eax` — it already names the pinned
|
||||
register; drop the `[eax]`"). The useful form is a label that *differs* from the
|
||||
register (`[quot] "={rax}"` → field `quot` over register `rax`).
|
||||
* **Result field names** (the §II.5 result-type rule above) come from each
|
||||
`out_value`'s *effective* name — explicit `[name]`, else the auto-derived
|
||||
register name; positional only when neither exists (a class-constrained output
|
||||
with no `[name]`).
|
||||
* This is a **typing-stage** rule: the parser still stores `name: ?[]const u8`
|
||||
(null when no `[name]` was written); Sema computes the effective name. No
|
||||
parser change.
|
||||
|
||||
Note: there is **no** "≤1 output" rule (that was Zig's limit; sx's tuples lift it).
|
||||
|
||||
## II.6 sx IR + LLVM codegen (the part that must match Zig bit-for-bit)
|
||||
437
docs/inline-assembly.md
Normal file
437
docs/inline-assembly.md
Normal file
@@ -0,0 +1,437 @@
|
||||
# Inline Assembly in sx
|
||||
|
||||
A guide to writing inline assembly in sx — emitting raw target
|
||||
instructions, wiring values in and out, writing through memory, and
|
||||
defining whole routines in assembly.
|
||||
|
||||
> Looking for the *why* behind the design (how it maps to LLVM, the
|
||||
> Zig comparison, the emit algorithm)? That lives in
|
||||
> [inline-asm-design.md](../design/inline-asm-design.md). This page is the
|
||||
> user-facing how-to.
|
||||
|
||||
---
|
||||
|
||||
## The mental model
|
||||
|
||||
`asm` is an **expression**. It drops to the machine: you write a
|
||||
template of real instructions, declare which sx values feed registers
|
||||
going in and which come back out, and the block evaluates to the
|
||||
output value (or a tuple of them).
|
||||
|
||||
```sx
|
||||
add :: (a: i64, b: i64) -> i64 {
|
||||
return asm { "add %[out], %[a], %[b]", [out] "=r" -> i64, [a] "r" = a, [b] "r" = b };
|
||||
}
|
||||
```
|
||||
|
||||
Three things to know up front:
|
||||
|
||||
1. **The body is a brace block of comma-separated parts:** the template
|
||||
string first, then operands, then an optional `clobbers(.…)` clause.
|
||||
2. **Each operand is tagged by role**, not by position: `-> Type` is a
|
||||
value output, `= expr` is an input, `-> @place` writes through to
|
||||
existing storage. The list is flat and order-independent — there are
|
||||
no positional `:` sections.
|
||||
3. **The outputs decide the result.** Zero outputs → `void` (and the
|
||||
block must be `volatile`); one → that type; many → a tuple.
|
||||
|
||||
Templates are **AT&T syntax** (lowered through LLVM), **target-specific**,
|
||||
and **never run at compile time** — see [When it runs](#when-it-runs).
|
||||
|
||||
---
|
||||
|
||||
## Operands
|
||||
|
||||
An operand is `[name]? "constraint" <role>`. The constraint string is
|
||||
the LLVM/GCC-style constraint; the role marker says what the operand
|
||||
does.
|
||||
|
||||
### Inputs — `= expr`
|
||||
|
||||
`= expr` feeds a value in. The constraint picks where it lands:
|
||||
|
||||
```sx
|
||||
[a] "r" = a // any general register
|
||||
"{rdi}" = fd // pinned to a specific register (x86_64 rdi)
|
||||
```
|
||||
|
||||
### Symbol inputs — `"s" = fn`
|
||||
|
||||
A `"s"` input feeds a **function or global symbol** (not a runtime value).
|
||||
In the template, `%[name]` expands to the symbol's **platform-mangled
|
||||
name**, so you can branch or call straight to it:
|
||||
|
||||
```sx
|
||||
cb :: (n: i64) -> i64 export "cb" { return n + 1; }
|
||||
|
||||
trampoline :: (n: i64) -> i64 {
|
||||
return asm volatile {
|
||||
#string ASM
|
||||
mov x0, %[arg]
|
||||
bl %[fn] // DIRECT call — `bl _cb` on macOS, `bl cb` on Linux
|
||||
mov %[res], x0
|
||||
ASM,
|
||||
[res] "=r" -> i64,
|
||||
[arg] "r" = n,
|
||||
[fn] "s" = cb, // symbol operand
|
||||
clobbers(.x0, .x30, .memory),
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
The same `%[fn]` works on **x86_64** — just the branch mnemonic differs:
|
||||
|
||||
```sx
|
||||
return asm volatile {
|
||||
"call %[fn]", // x86_64 — same portable %[fn]
|
||||
[ret] "={rax}" -> i64,
|
||||
"{rdi}" = n,
|
||||
[fn] "s" = cb,
|
||||
clobbers(.rcx, .rdx, .rsi, .r8, .r9, .r10, .r11, .memory),
|
||||
};
|
||||
```
|
||||
|
||||
Two reasons to prefer this over passing a function *pointer* in a plain
|
||||
`"r"` register and using an indirect `blr`/`call *`:
|
||||
|
||||
- **One fewer indirection** — a direct PC-relative branch, no pointer
|
||||
load into a register, and a predictable (non-indirect) branch.
|
||||
- **Portable** — `%[fn]` is the same on every target; the backend emits
|
||||
the correctly-mangled name, so you never hardcode the macOS leading
|
||||
underscore *or* a per-arch operand modifier.
|
||||
|
||||
**How the portability works.** A bare `%[fn]` would render differently
|
||||
per target — on x86 the symbol prints as `$cb` (an immediate `$`-prefix
|
||||
that `call` rejects), while aarch64 prints it bare. So for a symbol (`"s"`)
|
||||
operand the compiler **auto-injects LLVM's `:c` operand modifier** (`%[fn]`
|
||||
→ `${N:c}`, "print the constant with no punctuation"). `:c` prints the
|
||||
plain symbol on every target — equivalent to the GCC `:P`/`%P0` call-target
|
||||
idiom on x86 (both emit the same `R_X86_64_PLT32` relocation) and a no-op
|
||||
on aarch64. You can still override it with an explicit `%[fn:X]` if you
|
||||
ever need a different rendering, but for a call/branch you never should.
|
||||
|
||||
The callee needs a stable, externally-linked symbol — i.e. `export`
|
||||
(which also gives it the C ABI). A plain or `callconv(.c)`-only function
|
||||
is `internal` and gets dead-code-eliminated, so the symbol won't link.
|
||||
(A global-scope `asm { … }` routine has no operand list, so it can't use
|
||||
a symbol operand — it references the literal symbol in its text.)
|
||||
|
||||
### Value outputs — `-> Type`
|
||||
|
||||
`-> Type` produces a value that becomes (part of) the block's result:
|
||||
|
||||
```sx
|
||||
[out] "=r" -> i64 // result in any register
|
||||
"={rax}" -> i64 // result pinned to rax
|
||||
```
|
||||
|
||||
### Naming and `%[name]`
|
||||
|
||||
Inside the template, `%[name]` refers to an operand by its **effective
|
||||
name**. An operand pinned to a register is **auto-named after that
|
||||
register** — `"{rdi}"` is reachable as `%[rdi]`, `"={rax}"` as `%[rax]`
|
||||
— so an explicit `[name]` is only needed:
|
||||
|
||||
- for a register-**class** operand (`"=r"`, `"r"`), which has no register
|
||||
to name it; or
|
||||
- to give a pinned operand a name *different* from its register.
|
||||
|
||||
Two labels are rejected so names stay unambiguous:
|
||||
|
||||
- the **echo form** `[rax] "={rax}"` — the label just repeats the pin, so
|
||||
drop it (the operand is already `%[rax]`); and
|
||||
- **duplicate** operand names.
|
||||
|
||||
In the template, `%%` is a literal `%`, and `%=` expands to a unique id
|
||||
(handy for a local label that must differ across inlinings).
|
||||
|
||||
### The result type
|
||||
|
||||
The number of **value** outputs (`-> Type`) decides the block's type:
|
||||
|
||||
| `-> Type` outputs | result | example |
|
||||
|---|---|---|
|
||||
| 0 | `void` — must be `volatile` | `asm volatile { "dmb ish" }` |
|
||||
| 1 | that type `T` | `x := asm { …, "=r" -> i64 }` |
|
||||
| N | a **tuple**, fields named by each operand's name | `lo, hi := asm { … }` |
|
||||
|
||||
With multiple outputs you get real multiple return values — a named
|
||||
operand becomes a named tuple field:
|
||||
|
||||
```sx
|
||||
// aarch64 — split a value into low/high bytes
|
||||
split :: (x: u64) -> (lo: u64, hi: u64) {
|
||||
return asm {
|
||||
#string ASM
|
||||
and %[l], %[x], #0xff
|
||||
lsr %[h], %[x], #8
|
||||
ASM,
|
||||
[l] "=r" -> u64, // → .lo (operand 0)
|
||||
[h] "=r" -> u64, // → .hi (operand 1)
|
||||
[x] "r" = x,
|
||||
};
|
||||
}
|
||||
lo, hi := split(0x1234); // (0x34, 0x12) = (52, 18)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `volatile`
|
||||
|
||||
`asm volatile { … }` marks the block as having side effects, so the
|
||||
optimizer won't move or delete it. It is **required whenever there are
|
||||
no value outputs** — a result-less, non-volatile asm would be dead code.
|
||||
|
||||
```sx
|
||||
barrier :: () { asm volatile { "dmb ish" }; } // aarch64 full barrier
|
||||
```
|
||||
|
||||
A block with outputs may still be `volatile` when its effects matter
|
||||
beyond the returned value (e.g. a syscall).
|
||||
|
||||
---
|
||||
|
||||
## `clobbers(.…)`
|
||||
|
||||
`clobbers(.…)` is a dot-name list of registers and flags the asm trashes
|
||||
that aren't already operands — so the register allocator keeps clear of
|
||||
them:
|
||||
|
||||
```sx
|
||||
clobbers(.rcx, .r11, .memory) // x86_64 syscall trashes rcx, r11, and memory
|
||||
clobbers(.cc) // condition flags
|
||||
```
|
||||
|
||||
`.memory` means "this asm reads or writes memory the compiler can't see,"
|
||||
and `.cc` means "the condition flags are modified."
|
||||
|
||||
---
|
||||
|
||||
## Writing through memory — `-> @place`
|
||||
|
||||
Sometimes the asm should write into existing storage (a local, a struct
|
||||
field) rather than *return* a value. `-> @place` does that: the place
|
||||
output does **not** join the result tuple. There are three forms,
|
||||
distinguished by the constraint.
|
||||
|
||||
### Write-through — `= …` constraint
|
||||
|
||||
The asm computes a value into a register; sx stores it through the
|
||||
place's address afterward.
|
||||
|
||||
```sx
|
||||
compute :: () -> i64 {
|
||||
other : i64 = 0;
|
||||
main_val := asm volatile {
|
||||
#string ASM
|
||||
mov %[m], #5
|
||||
mov %[o], #37
|
||||
ASM,
|
||||
[m] "=r" -> i64, // value output → returned into main_val
|
||||
[o] "=r" -> @other, // place output → stored through @other
|
||||
};
|
||||
return main_val + other; // 5 + 37 = 42
|
||||
}
|
||||
```
|
||||
|
||||
A value output and one or more place outputs can mix freely; only the
|
||||
value outputs build the returned tuple.
|
||||
|
||||
### Read-write — `+` constraint
|
||||
|
||||
A `+` operand is read **and** written: the place's current value is fed
|
||||
in, the asm updates it in place, and the result is stored back.
|
||||
|
||||
```sx
|
||||
// increment-in-place: x is loaded, the asm adds 1, the result is stored back
|
||||
bump :: () -> i64 {
|
||||
x : i64 = 41;
|
||||
asm volatile { "add %[v], %[v], #1", [v] "+r" -> @x };
|
||||
return x; // 42
|
||||
}
|
||||
```
|
||||
|
||||
### Indirect memory — `=*m` constraint
|
||||
|
||||
An `=*m` operand passes the place's **address** to the asm, which writes
|
||||
through it directly (no register round-trip, no return slot):
|
||||
|
||||
```sx
|
||||
// store 42 straight into x's storage
|
||||
poke :: () -> i64 {
|
||||
x : i64 = 0;
|
||||
asm volatile {
|
||||
#string ASM
|
||||
mov x9, #42
|
||||
str x9, %[out]
|
||||
ASM,
|
||||
[out] "=*m" -> @x,
|
||||
clobbers(.x9),
|
||||
};
|
||||
return x; // 42
|
||||
}
|
||||
```
|
||||
|
||||
**The place must be mutable storage.** Taking the address of a scalar
|
||||
`::` constant has no meaning — a scalar constant folds to its value and
|
||||
has no storage — so `-> @SOME_CONST` is a compile error:
|
||||
|
||||
```
|
||||
cannot take the address of constant 'SOME_CONST' — a scalar '::'
|
||||
constant has no storage (use a '=' variable or a local copy)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Multi-instruction templates
|
||||
|
||||
A single `"…"` string is one fragment. For several instructions, use a
|
||||
multi-line string literal or sx's **`#string` heredoc**, which is
|
||||
delivered **verbatim** — no escape processing — so you write assembly
|
||||
exactly as it should appear:
|
||||
|
||||
```sx
|
||||
serialize :: () {
|
||||
asm volatile {
|
||||
#string ASM
|
||||
mfence
|
||||
lfence
|
||||
ASM,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Global (module-scope) assembly
|
||||
|
||||
A top-level `asm { … }` block is **global assembly** — template only
|
||||
(no operands, no `volatile`), emitted as module-level assembly. It is
|
||||
the place to define a whole routine in assembly. Symbols it defines are
|
||||
reached from sx with a **lib-less `extern`** declaration:
|
||||
|
||||
```sx
|
||||
asm {
|
||||
#string ASM
|
||||
.global _my_add
|
||||
_my_add:
|
||||
add x0, x0, x1
|
||||
ret
|
||||
ASM,
|
||||
};
|
||||
|
||||
my_add :: (a: i64, b: i64) -> i64 extern;
|
||||
|
||||
main :: () -> i64 {
|
||||
return my_add(40, 2); // 42 — computed by the global-asm routine
|
||||
}
|
||||
```
|
||||
|
||||
Multiple global blocks concatenate in source order. (Symbol naming
|
||||
follows the platform convention — a leading underscore on macOS, none
|
||||
on Linux.)
|
||||
|
||||
---
|
||||
|
||||
## When it runs
|
||||
|
||||
Inline assembly is emitted into the program and runs at **runtime**,
|
||||
under both execution paths:
|
||||
|
||||
- **`sx run` (JIT)** — the module is compiled to an in-memory object
|
||||
(the integrated assembler assembles your asm, including global blocks),
|
||||
then run. Both inline and global asm work.
|
||||
- **`sx build` (AOT)** — same, into a native binary.
|
||||
|
||||
It does **not** run at **compile time**. A `#run` (comptime) call into a
|
||||
global-asm symbol fails loudly:
|
||||
|
||||
```sx
|
||||
COMPUTED :: #run my_add(40, 2); // error: the symbol isn't linked yet at comptime
|
||||
```
|
||||
|
||||
```
|
||||
comptime extern call: symbol not found via dlsym
|
||||
```
|
||||
|
||||
The comptime interpreter resolves `extern` calls against the host
|
||||
process; a module-asm symbol only exists once the program is
|
||||
assembled and linked, so call it at runtime, not in a `#run`.
|
||||
|
||||
---
|
||||
|
||||
## Cookbook
|
||||
|
||||
**Read a register** (no inputs):
|
||||
|
||||
```sx
|
||||
stack_ptr :: () -> u64 {
|
||||
return asm { "mov %[out], sp", [out] "=r" -> u64 }; // aarch64
|
||||
}
|
||||
```
|
||||
|
||||
**x86_64 syscall** — `write(2)`, with pinned registers and clobbers:
|
||||
|
||||
```sx
|
||||
sys_write :: (fd: i64, buf: *u8, count: i64) -> i64 {
|
||||
return asm volatile {
|
||||
"syscall",
|
||||
[ret] "={rax}" -> i64, // bytes written, in rax
|
||||
"{rax}" = 1, // SYS_write
|
||||
"{rdi}" = fd,
|
||||
"{rsi}" = buf,
|
||||
"{rdx}" = count,
|
||||
clobbers(.rcx, .r11, .memory),
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**x86_64 divmod** — one instruction, two outputs, returned as a tuple:
|
||||
|
||||
```sx
|
||||
divmod :: (n: u64, d: u64) -> (quot: u64, rem: u64) {
|
||||
return asm {
|
||||
"divq %[d]",
|
||||
[quot] "={rax}" -> u64,
|
||||
[rem] "={rdx}" -> u64,
|
||||
"{rax}" = n, "{rdx}" = 0, [d] "r" = d,
|
||||
clobbers(.cc),
|
||||
};
|
||||
}
|
||||
q, r := divmod(17, 5); // (3, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rules of thumb
|
||||
|
||||
- **`asm` yields a value.** Bind it (`x := asm { … }`), `return` it, or
|
||||
destructure a multi-output tuple (`a, b := asm { … }`). A block with no
|
||||
value outputs must be `volatile`.
|
||||
- **Pinned operands name themselves.** `"{rdi}"` is `%[rdi]`; only add
|
||||
`[name]` for register-class operands or to rename. Don't echo a pin
|
||||
(`[rax] "={rax}"`).
|
||||
- **`%%` for a literal percent; `%[name]` for an operand.** Templates are
|
||||
AT&T.
|
||||
- **List everything you trash** in `clobbers(.…)` — scratch registers,
|
||||
`.cc`, and `.memory` if the asm touches memory the compiler can't see.
|
||||
- **`-> @place` writes storage; pick the form:** `=` (compute then
|
||||
store), `+` (read-modify-write), `=*m` (write through the address).
|
||||
The place must be mutable — not a scalar `::` constant.
|
||||
- **Global `asm { … }`** defines symbols; import them with a lib-less
|
||||
`extern`. They run under JIT and AOT, but **not** in a `#run`.
|
||||
- **It's target-specific.** Gate or pick instructions per architecture;
|
||||
there is no portable instruction set.
|
||||
|
||||
---
|
||||
|
||||
## See also
|
||||
|
||||
- [inline-asm-design.md](../design/inline-asm-design.md) — the design rationale and
|
||||
LLVM mapping.
|
||||
- `examples/16xx-platform-asm-*` — the full, runnable example matrix
|
||||
(basic in/out, tuples, the three `-> @place` forms, global asm, the
|
||||
x86_64 syscall, and the comptime-boundary guard).
|
||||
- The "Inline Assembly" section of [readme.md](../readme.md) for a
|
||||
one-screen overview.
|
||||
```
|
||||
18
examples/1177-diagnostics-addr-of-const-rejected.sx
Normal file
18
examples/1177-diagnostics-addr-of-const-rejected.sx
Normal file
@@ -0,0 +1,18 @@
|
||||
// Taking the address of a scalar `::` constant is a compile error: a scalar
|
||||
// constant folds to its value and has NO storage (only array/struct constants
|
||||
// are immutable globals with a real address — see 0177). Covers a module-scope
|
||||
// const, a local const, and an inline-asm `-> @const` write-through (the path
|
||||
// that surfaced the bug). Before the fix, `@N` lowered to `inttoptr (i64 40 to
|
||||
// ptr)` — a wild pointer that segfaulted on deref and emitted invalid stores
|
||||
// for asm `-> @const`. Regression (issue 0138).
|
||||
|
||||
takes :: (p: *i64) {}
|
||||
|
||||
N :: 40;
|
||||
|
||||
main :: () {
|
||||
takes(@N); // module scalar const — no storage
|
||||
x :: 7;
|
||||
takes(@x); // local scalar const — no storage
|
||||
asm volatile { "mov %[c], #99", [c] "=r" -> @N }; // write-through to a const
|
||||
}
|
||||
9
examples/1638-platform-target-host.sx
Normal file
9
examples/1638-platform-target-host.sx
Normal file
@@ -0,0 +1,9 @@
|
||||
// Phase 0 (ASM stream) test-infra lock: exercises the `<name>.build` JSON
|
||||
// config + `--target` threading + the host-match EXECUTE path of the corpus
|
||||
// runner. The companion `.build` pins the HOST target (`{ "target": "macos" }`
|
||||
// resolves to the host arch+os), so the runner threads `--target` and still
|
||||
// runs the example natively — its stdout is asserted as usual.
|
||||
#import "modules/std.sx";
|
||||
main :: () {
|
||||
print("target-host ok\n");
|
||||
}
|
||||
7
examples/1639-platform-target-cross.sx
Normal file
7
examples/1639-platform-target-cross.sx
Normal file
@@ -0,0 +1,7 @@
|
||||
// Phase 0 (ASM stream) test-infra lock: exercises the corpus runner's
|
||||
// CROSS-TARGET ir-only path. The `.build` pins `x86_64-linux`, which does NOT
|
||||
// match this aarch64 host, so the runner skips run/build/exec and verifies via
|
||||
// `sx ir --target x86_64-linux` only — asserting exit + the `.ir` snapshot +
|
||||
// stderr (no `.stdout`). Asm-free on purpose: it locks the harness gating, not
|
||||
// any inline-asm lowering (that arrives in Phase A+).
|
||||
main :: () -> i64 { return 0; }
|
||||
20
examples/1640-platform-asm-parse.sx
Normal file
20
examples/1640-platform-asm-parse.sx
Normal file
@@ -0,0 +1,20 @@
|
||||
// ASM stream Phase E — x86_64 multi-output asm: `divq` produces quotient in rax
|
||||
// and remainder in rdx, returned as a `(quot, rem)` tuple. Two `={rax}`/`={rdx}`
|
||||
// value outputs ⇒ LLVM returns a `{ i64, i64 }` struct, which IS sx's tuple
|
||||
// representation (so `q, r := …` destructures it directly). x86-pinned via
|
||||
// `.build`: ir-only on a non-x86 host (the `.ir` snapshot locks the struct
|
||||
// return + `%[name]` rewrite); runs natively on x86_64-linux. See 1647 for a
|
||||
// multi-output example that executes on aarch64.
|
||||
divmod :: (n: u64, d: u64) -> (quot: u64, rem: u64) {
|
||||
return asm {
|
||||
"divq %[d]",
|
||||
[quot] "={rax}" -> u64,
|
||||
[rem] "={rdx}" -> u64,
|
||||
"{rax}" = n, "{rdx}" = 0, [d] "r" = d,
|
||||
clobbers(.cc),
|
||||
};
|
||||
}
|
||||
|
||||
main :: () {
|
||||
q, r := divmod(17, 5);
|
||||
}
|
||||
6
examples/1641-platform-asm-missing-volatile.sx
Normal file
6
examples/1641-platform-asm-missing-volatile.sx
Normal file
@@ -0,0 +1,6 @@
|
||||
// ASM stream Phase B — an asm with no value outputs yields no result, so its
|
||||
// effects could be deleted unless it is marked `volatile`. This omits
|
||||
// `volatile` ⇒ a compile error. Pins that diagnostic (mirrors Zig's rule).
|
||||
// Called from `main` so lowering reaches the asm body.
|
||||
nope :: () { asm { "nop" }; }
|
||||
main :: () { nope(); }
|
||||
5
examples/1642-platform-asm-nop-volatile.sx
Normal file
5
examples/1642-platform-asm-nop-volatile.sx
Normal file
@@ -0,0 +1,5 @@
|
||||
// ASM stream — the no-output `volatile` form runs end-to-end: a bare `nop`
|
||||
// (no operands, no result) assembles and executes cleanly (exit 0). Confirms
|
||||
// the no-output⇒volatile rule's positive side AND the zero-operand emit path.
|
||||
nop :: () { asm volatile { "nop" }; }
|
||||
main :: () { nop(); }
|
||||
6
examples/1643-platform-asm-echo-name.sx
Normal file
6
examples/1643-platform-asm-echo-name.sx
Normal file
@@ -0,0 +1,6 @@
|
||||
// ASM stream Phase B — operand naming rule (§II.5): an explicit `[name]` that
|
||||
// just echoes the register its own constraint pins (`[eax] "={eax}"`) carries
|
||||
// no information — the operand is already auto-named after the register. Reject
|
||||
// it. The useful form is a label that DIFFERS (e.g. `[quot] "={rax}"`).
|
||||
f :: () -> u32 { return asm volatile { "cpuid", [eax] "={eax}" -> u32, "{eax}" = 1 }; }
|
||||
main :: () { x := f(); }
|
||||
4
examples/1644-platform-asm-duplicate-name.sx
Normal file
4
examples/1644-platform-asm-duplicate-name.sx
Normal file
@@ -0,0 +1,4 @@
|
||||
// ASM stream Phase B — two asm operands may not share a `[name]`: the `%[name]`
|
||||
// template reference (and the result tuple field) would be ambiguous.
|
||||
f :: () -> u64 { return asm volatile { "nop", [x] "=r" -> u64, [x] "r" = 5 }; }
|
||||
main :: () { v := f(); }
|
||||
10
examples/1645-platform-asm-aarch64-add.sx
Normal file
10
examples/1645-platform-asm-aarch64-add.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
// ASM stream Phase D — inline assembly that RUNS end-to-end. An aarch64 `add`
|
||||
// with two register-class inputs (`%[a]`, `%[b]`) and a value output (`%[out]`)
|
||||
// returned from the function. The `.build` pins aarch64-macOS: on a matching
|
||||
// host the runner executes it (exit 42); elsewhere it falls to ir-only mode and
|
||||
// asserts the `.ir` snapshot (the inline_asm op + LLVM `call asm` are target-
|
||||
// independent in the IR text). Regression for the full lower→emit→JIT path.
|
||||
add_asm :: (a: i64, b: i64) -> i64 {
|
||||
return asm { "add %[out], %[a], %[b]", [out] "=r" -> i64, [a] "r" = a, [b] "r" = b };
|
||||
}
|
||||
main :: () -> i64 { return add_asm(40, 2); }
|
||||
9
examples/1646-platform-asm-value-binding.sx
Normal file
9
examples/1646-platform-asm-value-binding.sx
Normal file
@@ -0,0 +1,9 @@
|
||||
// ASM stream Phase D — a bare `x := asm { … -> T }` binding (not a direct
|
||||
// `return asm`) types correctly: the value output flows through the local and
|
||||
// out as the exit code. Regression for the `inferType` `.asm_expr` arm (without
|
||||
// it the binding inferred `.unresolved` and silently produced 0). aarch64-pinned
|
||||
// via `.build` → runs on a matching host, ir-only elsewhere.
|
||||
main :: () -> i64 {
|
||||
x := asm { "mov %[out], #99", [out] "=r" -> i64 };
|
||||
return x;
|
||||
}
|
||||
20
examples/1647-platform-asm-aarch64-multi.sx
Normal file
20
examples/1647-platform-asm-aarch64-multi.sx
Normal file
@@ -0,0 +1,20 @@
|
||||
// ASM stream Phase E — multi-output asm that RUNS end-to-end on aarch64. Splits
|
||||
// a value into low/high bytes via two value outputs, returned + destructured as
|
||||
// a `(lo, hi)` tuple. The two outputs become an LLVM `{ i64, i64 }` struct =
|
||||
// sx's tuple. aarch64-pinned via `.build`: executes on a matching host (exit
|
||||
// reflects lo+hi), ir-only elsewhere.
|
||||
split :: (x: u64) -> (lo: u64, hi: u64) {
|
||||
return asm {
|
||||
#string ASM
|
||||
and %[l], %[x], #0xff
|
||||
lsr %[h], %[x], #8
|
||||
ASM,
|
||||
[l] "=r" -> u64,
|
||||
[h] "=r" -> u64,
|
||||
[x] "r" = x,
|
||||
};
|
||||
}
|
||||
main :: () -> i64 {
|
||||
lo, hi := split(0x1234);
|
||||
return xx (lo + hi); // 0x34 + 0x12 = 52 + 18 = 70
|
||||
}
|
||||
20
examples/1648-platform-asm-global.sx
Normal file
20
examples/1648-platform-asm-global.sx
Normal file
@@ -0,0 +1,20 @@
|
||||
// ASM stream Phase F — top-level (global) `asm { … }`: a template-only block at
|
||||
// module scope, lowered to LLVM `module asm` (LLVMAppendModuleInlineAsm). It
|
||||
// defines a symbol that a lib-less `extern` declaration calls into — the
|
||||
// import direction reuses the existing C-FFI extern path, no new surface.
|
||||
// Built+run via `aot` (a module-asm symbol lives in the final linked binary,
|
||||
// not the JIT host); aarch64-macos-pinned, so ir-only on a non-matching host.
|
||||
asm {
|
||||
#string ASM
|
||||
.global _my_add
|
||||
_my_add:
|
||||
add x0, x0, x1
|
||||
ret
|
||||
ASM,
|
||||
};
|
||||
|
||||
my_add :: (a: i64, b: i64) -> i64 extern;
|
||||
|
||||
main :: () -> i64 {
|
||||
return my_add(40, 2); // 42, computed by the global-asm routine
|
||||
}
|
||||
19
examples/1649-platform-asm-place-output.sx
Normal file
19
examples/1649-platform-asm-place-output.sx
Normal file
@@ -0,0 +1,19 @@
|
||||
// ASM stream Phase 2 — `-> @place` write-through output. An asm result can be
|
||||
// STORED through a place (a local / struct field) instead of returned: the
|
||||
// place output does NOT join the result tuple. Here one value output is
|
||||
// returned (into `main_val`) while a second is written through `@other`. The
|
||||
// two are combined to 42. Read-write (`+`) and indirect (`*`) place outputs are
|
||||
// not yet implemented (rejected at lowering). aarch64-pinned; ir-only elsewhere.
|
||||
compute :: () -> i64 {
|
||||
other : i64 = 0;
|
||||
main_val := asm volatile {
|
||||
#string ASM
|
||||
mov %[m], #5
|
||||
mov %[o], #37
|
||||
ASM,
|
||||
[m] "=r" -> i64, // value output → returned
|
||||
[o] "=r" -> @other, // place output → stored through @other
|
||||
};
|
||||
return main_val + other; // 5 + 37 = 42
|
||||
}
|
||||
main :: () -> i64 { return compute(); }
|
||||
11
examples/1650-platform-asm-rw-place.sx
Normal file
11
examples/1650-platform-asm-rw-place.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
// ASM stream Phase 2 — read-write (`+`) place output. The place is LOADED as a
|
||||
// seed, the asm both reads and writes the operand register (tied input ↔ output),
|
||||
// and the (modified) result is STORED back through the place. Increment-in-place:
|
||||
// the register holds 41 on entry, the asm adds 1, 42 is written back to `x`.
|
||||
// aarch64-pinned; ir-only elsewhere.
|
||||
compute :: () -> i64 {
|
||||
x : i64 = 41;
|
||||
asm volatile { "add %[v], %[v], #1", [v] "+r" -> @x };
|
||||
return x; // 42
|
||||
}
|
||||
main :: () -> i64 { return compute(); }
|
||||
27
examples/1651-platform-asm-x86-syscall-write.sx
Normal file
27
examples/1651-platform-asm-x86-syscall-write.sx
Normal file
@@ -0,0 +1,27 @@
|
||||
// ASM stream — x86_64 Linux `write(2)` via a raw `syscall`. The canonical inline-
|
||||
// asm use case: SYS_write (rax=1) with fd/buf/count pinned to rdi/rsi/rdx, the
|
||||
// `syscall` instruction clobbering rcx + r11 (+ memory), and the byte count
|
||||
// returned in rax. Demonstrates register-pinned inputs, a pinned value output,
|
||||
// a pointer input (`*u8` → rsi), and `clobbers(.…)` lowering all at once.
|
||||
//
|
||||
// x86-pinned via `.build`: ir-only on a non-x86 host — the `.ir` snapshot locks
|
||||
// the exact constraint string (`={rax},{rax},{rdi},{rsi},{rdx},~{rcx},~{r11},
|
||||
// ~{memory}`), which is the §II.11 silent-miscompile risk zone — and runs
|
||||
// natively on x86_64-linux (printing "ok\n"). See 1640 for an x86 multi-output
|
||||
// example, 1645/1647/1649/1650 for aarch64 examples that execute on this host.
|
||||
sys_write :: (fd: i64, buf: *u8, count: i64) -> i64 {
|
||||
return asm volatile {
|
||||
"syscall",
|
||||
[ret] "={rax}" -> i64, // return: bytes written, in rax
|
||||
"{rax}" = 1, // SYS_write (x86_64 Linux)
|
||||
"{rdi}" = fd, // fd
|
||||
"{rsi}" = buf, // buf
|
||||
"{rdx}" = count, // count
|
||||
clobbers(.rcx, .r11, .memory),
|
||||
};
|
||||
}
|
||||
|
||||
main :: () {
|
||||
msg : [3]u8 = .[111, 107, 10]; // "ok\n"
|
||||
n := sys_write(1, @msg[0], 3);
|
||||
}
|
||||
19
examples/1652-platform-asm-indirect-mem.sx
Normal file
19
examples/1652-platform-asm-indirect-mem.sx
Normal file
@@ -0,0 +1,19 @@
|
||||
// ASM stream — indirect-memory (`=*m`) place output. The place address is passed
|
||||
// to the asm as a pointer and the asm writes THROUGH it (no return slot): here
|
||||
// `str x9, %[out]` stores 42 into `x`'s storage directly. Distinct from a
|
||||
// write-through `=` output (which returns a value that is then stored). Mixes a
|
||||
// value output and an input below to exercise operand ordering. aarch64-pinned;
|
||||
// ir-only elsewhere (the `.ir` locks the `=*m` constraint + `elementtype` attr).
|
||||
poke :: () -> i64 {
|
||||
x : i64 = 0;
|
||||
asm volatile {
|
||||
#string ASM
|
||||
mov x9, #42
|
||||
str x9, %[out]
|
||||
ASM,
|
||||
[out] "=*m" -> @x,
|
||||
clobbers(.x9),
|
||||
};
|
||||
return x; // 42 — written through the pointer
|
||||
}
|
||||
main :: () -> i64 { return poke(); }
|
||||
22
examples/1653-platform-asm-global-jit.sx
Normal file
22
examples/1653-platform-asm-global-jit.sx
Normal file
@@ -0,0 +1,22 @@
|
||||
// ASM stream — global (module-scope) `asm { … }` executed via the JIT (`sx run`),
|
||||
// NOT AOT. `sx run` compiles the whole module to an in-memory object (the
|
||||
// integrated assembler assembles the `module asm` block into it), then ORC
|
||||
// relocates and runs it — so a module-asm symbol IS resolvable at JIT main
|
||||
// execution, just like a normal symbol. The only path that can't see it is a
|
||||
// COMPILE-TIME `#run` call (the interpreter resolves externs via host dlsym; the
|
||||
// symbol isn't linked yet — see 1654). Sibling of 1648 (which exercises the same
|
||||
// feature via AOT). aarch64-macos-pinned; ir-only elsewhere.
|
||||
asm {
|
||||
#string ASM
|
||||
.global _my_sub
|
||||
_my_sub:
|
||||
sub x0, x0, x1
|
||||
ret
|
||||
ASM,
|
||||
};
|
||||
|
||||
my_sub :: (a: i64, b: i64) -> i64 extern;
|
||||
|
||||
main :: () -> i64 {
|
||||
return my_sub(44, 2); // 42, computed by the global-asm routine, under JIT
|
||||
}
|
||||
22
examples/1654-platform-asm-global-comptime-call.sx
Normal file
22
examples/1654-platform-asm-global-comptime-call.sx
Normal file
@@ -0,0 +1,22 @@
|
||||
// ASM stream — calling a global-asm symbol at COMPILE TIME (`#run`) fails loud.
|
||||
// A module-asm symbol only exists once the module is assembled+linked; the
|
||||
// comptime interpreter resolves `extern` calls via host `dlsym` (RTLD_DEFAULT),
|
||||
// where the symbol is absent — so `#run my_add(…)` cannot evaluate and reports a
|
||||
// clear diagnostic instead of silently misfiring. (Calling the same symbol at
|
||||
// RUNTIME works under both JIT and AOT — see 1648/1653.) The failure is at
|
||||
// dlsym resolution, before any asm is assembled, so it is arch-independent —
|
||||
// no `.build` target needed. Regression guard for the comptime boundary.
|
||||
asm {
|
||||
#string ASM
|
||||
.global _my_add
|
||||
_my_add:
|
||||
add x0, x0, x1
|
||||
ret
|
||||
ASM,
|
||||
};
|
||||
|
||||
my_add :: (a: i64, b: i64) -> i64 extern;
|
||||
|
||||
COMPUTED :: #run my_add(40, 2); // compile-time call — module-asm symbol not yet linked
|
||||
|
||||
main :: () -> i64 { return COMPUTED; }
|
||||
35
examples/1655-platform-asm-callback-into-sx.sx
Normal file
35
examples/1655-platform-asm-callback-into-sx.sx
Normal file
@@ -0,0 +1,35 @@
|
||||
// ASM stream — the round trip: sx → asm → sx. A global-asm routine (`_caller`)
|
||||
// CALLS BACK into an sx function (`cb`) by its symbol, then returns. For the asm
|
||||
// `bl _cb` to resolve, the sx callback needs EXTERNAL LINKAGE and a stable C
|
||||
// symbol — that is exactly what `export` provides (it also implies the C ABI, so
|
||||
// no hidden context parameter). `callconv(.c)` alone is NOT enough: it sets the
|
||||
// ABI but leaves the function `internal`, so it is dead-code-eliminated (nothing
|
||||
// in the IR references it — the `bl` is opaque to the optimizer) and `_cb` is
|
||||
// undefined at link. macOS gives `export "cb"` the symbol `_cb` (leading
|
||||
// underscore), which the template references. aarch64-macos-pinned; runs under
|
||||
// the JIT here (sx run), ir-only elsewhere.
|
||||
|
||||
// The sx callback — `export` gives it external linkage + the `_cb` symbol + C ABI.
|
||||
cb :: (n: i64) -> i64 export "cb" {
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
// A global-asm trampoline that calls back into `cb`. It saves/restores the link
|
||||
// register (x30) around the `bl` — it was itself reached via `bl`, so the return
|
||||
// address must survive the nested call.
|
||||
asm {
|
||||
#string ASM
|
||||
.global _caller
|
||||
_caller:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
bl _cb // x0 = cb(x0) — back into sx
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
ASM,
|
||||
};
|
||||
|
||||
caller :: (n: i64) -> i64 extern;
|
||||
|
||||
main :: () -> i64 {
|
||||
return caller(41); // sx main → asm caller → bl _cb → sx cb → 42
|
||||
}
|
||||
25
examples/1656-platform-asm-symbol-operand.sx
Normal file
25
examples/1656-platform-asm-symbol-operand.sx
Normal file
@@ -0,0 +1,25 @@
|
||||
// ASM stream — symbol operand (`"s"`): feed a function/global SYMBOL into the
|
||||
// template so a DIRECT `bl %[fn]` (PC-relative — one fewer indirection than a
|
||||
// register-indirect `blr`: no pointer load, a relative reloc, a predictable
|
||||
// branch) goes straight to it. The backend emits the platform-mangled name
|
||||
// (`_cb` on macOS, `cb` on Linux), so the template stays portable — no hardcoded
|
||||
// underscore. Round trip: sx → asm → `bl _cb` → sx → 42. aarch64-macos-pinned;
|
||||
// runs under the JIT here, ir-only elsewhere (the `.ir` locks `"s"`/`ptr @cb`).
|
||||
cb :: (n: i64) -> i64 export "cb" { return n + 1; }
|
||||
|
||||
tramp :: (n: i64) -> i64 {
|
||||
return asm volatile {
|
||||
#string ASM
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x0, %[arg]
|
||||
bl %[fn]
|
||||
mov %[res], x0
|
||||
ldp x29, x30, [sp], #16
|
||||
ASM,
|
||||
[res] "=r" -> i64,
|
||||
[arg] "r" = n,
|
||||
[fn] "s" = cb, // symbol operand → direct `bl _cb`
|
||||
clobbers(.x0, .x30, .memory),
|
||||
};
|
||||
}
|
||||
main :: () -> i64 { return tramp(41); }
|
||||
12
examples/1657-platform-asm-x86-rw-place.sx
Normal file
12
examples/1657-platform-asm-x86-rw-place.sx
Normal file
@@ -0,0 +1,12 @@
|
||||
// ASM stream — read-write (`+`) place output on x86_64 (cross-arch sibling of
|
||||
// the aarch64 1650). `incq %[v]` reads the operand register, increments it, and
|
||||
// the result is stored back through the place. Locks the x86 lowering of `+`:
|
||||
// an output `=r` plus a tied input (`=r,0`) seeded with the place's value.
|
||||
// x86-pinned via `.build`: ir-only here (the `.ir` is the assertion), runs
|
||||
// natively on x86_64-linux (main returns 0 on success, 1 if the asm misbehaved).
|
||||
bump :: () -> i64 {
|
||||
x : i64 = 41;
|
||||
asm volatile { "incq %[v]", [v] "+r" -> @x };
|
||||
return x; // 42
|
||||
}
|
||||
main :: () -> i64 { if bump() != 42 { return 1; } return 0; }
|
||||
12
examples/1658-platform-asm-x86-indirect-mem.sx
Normal file
12
examples/1658-platform-asm-x86-indirect-mem.sx
Normal file
@@ -0,0 +1,12 @@
|
||||
// ASM stream — indirect-memory (`=*m`) place output on x86_64 (cross-arch sibling
|
||||
// of the aarch64 1652). `movq $42, %[out]` stores straight through the place's
|
||||
// address — the address is passed as an opaque `ptr` with an `elementtype(i64)`
|
||||
// attribute, no return slot. Note `$42`: a literal `$` in the template is escaped
|
||||
// to LLVM's `$$` and emitted back as `$42` (an x86 immediate). x86-pinned;
|
||||
// ir-only here, runs on x86_64-linux.
|
||||
poke :: () -> i64 {
|
||||
x : i64 = 0;
|
||||
asm volatile { "movq $42, %[out]", [out] "=*m" -> @x };
|
||||
return x; // 42
|
||||
}
|
||||
main :: () -> i64 { if poke() != 42 { return 1; } return 0; }
|
||||
19
examples/1659-platform-asm-x86-symbol-operand.sx
Normal file
19
examples/1659-platform-asm-x86-symbol-operand.sx
Normal file
@@ -0,0 +1,19 @@
|
||||
// ASM stream — symbol operand (`"s"`) on x86_64 (cross-arch sibling of the
|
||||
// aarch64 1656). A DIRECT `call` to an `export`ed sx function by symbol, written
|
||||
// with the SAME portable `%[fn]` as the aarch64 example — the compiler injects
|
||||
// the `:c` operand modifier for symbol operands, so the symbol prints bare on
|
||||
// every target (x86 would otherwise render `$cb`, a bad call target). The
|
||||
// backend emits the platform-mangled name (`call cb` on Linux). x86-pinned;
|
||||
// ir-only here, runs on x86_64-linux. Round trip: sx → asm → call cb → sx → 42.
|
||||
cb :: (n: i64) -> i64 export "cb" { return n + 1; }
|
||||
|
||||
tramp :: (n: i64) -> i64 {
|
||||
return asm volatile {
|
||||
"call %[fn]",
|
||||
[ret] "={rax}" -> i64,
|
||||
"{rdi}" = n, // arg in rdi (SysV)
|
||||
[fn] "s" = cb, // symbol operand → direct `call cb`
|
||||
clobbers(.rcx, .rdx, .rsi, .r8, .r9, .r10, .r11, .memory),
|
||||
};
|
||||
}
|
||||
main :: () -> i64 { if tramp(41) != 42 { return 1; } return 0; }
|
||||
29
examples/1660-platform-windows-win32-print.sx
Normal file
29
examples/1660-platform-windows-win32-print.sx
Normal file
@@ -0,0 +1,29 @@
|
||||
// Windows x86_64 — print "42" and exit(0) through the Win32 system-call
|
||||
// boundary. The Windows analog of the Linux raw-`syscall` write (see
|
||||
// 1651): Windows has no stable raw syscall ABI (NtWriteFile's ordinal
|
||||
// shifts between OS builds), so the documented boundary IS kernel32 —
|
||||
// `GetStdHandle` + `WriteFile` to print, `ExitProcess` to terminate.
|
||||
//
|
||||
// Exercises the bundled-`zig` link backend end to end: built with
|
||||
// `--target windows-gnu --self-contained`, zig cc (mingw) auto-resolves
|
||||
// kernel32, producing a PE32+ that prints "42\n" and exits 0.
|
||||
//
|
||||
// Pinned `x86_64-windows-gnu` via `.build`: ir-only on this non-Windows
|
||||
// host (the `.ir` snapshot locks the Win64-ABI lowering of the three
|
||||
// extern calls); runs end-to-end on a Windows x86_64 runner.
|
||||
|
||||
kernel32 :: #library "kernel32";
|
||||
|
||||
// DWORD = u32, HANDLE/LPVOID = *void, BOOL = i32.
|
||||
GetStdHandle :: (n_std_handle: u32) -> *void extern;
|
||||
WriteFile :: (file: *void, buf: *u8, n: u32, written: *u32, overlapped: *void) -> i32 extern;
|
||||
ExitProcess :: (code: u32) -> void extern;
|
||||
|
||||
main :: () {
|
||||
// STD_OUTPUT_HANDLE = (DWORD)-11 = 0xFFFFFFF5.
|
||||
out := GetStdHandle(0xFFFFFFF5);
|
||||
msg : [3]u8 = .[52, 50, 10]; // "42\n"
|
||||
written : u32 = 0;
|
||||
WriteFile(out, @msg[0], 3, @written, null);
|
||||
ExitProcess(0);
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
1
|
||||
@@ -0,0 +1,17 @@
|
||||
error: cannot take the address of constant 'N' — a scalar '::' constant has no storage (use a '=' variable or a local copy for mutable data)
|
||||
--> examples/1177-diagnostics-addr-of-const-rejected.sx:14:11
|
||||
|
|
||||
14 | takes(@N); // module scalar const — no storage
|
||||
| ^^
|
||||
|
||||
error: cannot take the address of constant 'x' — a scalar '::' constant has no storage (use a '=' variable or a local copy for mutable data)
|
||||
--> examples/1177-diagnostics-addr-of-const-rejected.sx:16:11
|
||||
|
|
||||
16 | takes(@x); // local scalar const — no storage
|
||||
| ^^
|
||||
|
||||
error: cannot take the address of constant 'N' — a scalar '::' constant has no storage (use a '=' variable or a local copy for mutable data)
|
||||
--> examples/1177-diagnostics-addr-of-const-rejected.sx:17:49
|
||||
|
|
||||
17 | asm volatile { "mov %[c], #99", [c] "=r" -> @N }; // write-through to a const
|
||||
| ^^
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1226-ffi-export-fn.build
Normal file
1
examples/expected/1226-ffi-export-fn.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "aot": true }
|
||||
1
examples/expected/1227-ffi-export-fn-rename.build
Normal file
1
examples/expected/1227-ffi-export-fn-rename.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "aot": true }
|
||||
1
examples/expected/1638-platform-target-host.build
Normal file
1
examples/expected/1638-platform-target-host.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1638-platform-target-host.exit
Normal file
1
examples/expected/1638-platform-target-host.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
1
examples/expected/1638-platform-target-host.stderr
Normal file
1
examples/expected/1638-platform-target-host.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1638-platform-target-host.stdout
Normal file
1
examples/expected/1638-platform-target-host.stdout
Normal file
@@ -0,0 +1 @@
|
||||
target-host ok
|
||||
1
examples/expected/1639-platform-target-cross.build
Normal file
1
examples/expected/1639-platform-target-cross.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "x86_64-linux" }
|
||||
1
examples/expected/1639-platform-target-cross.exit
Normal file
1
examples/expected/1639-platform-target-cross.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
6
examples/expected/1639-platform-target-cross.ir
Normal file
6
examples/expected/1639-platform-target-cross.ir
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
ret i32 0
|
||||
}
|
||||
1
examples/expected/1639-platform-target-cross.stderr
Normal file
1
examples/expected/1639-platform-target-cross.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1640-platform-asm-parse.build
Normal file
1
examples/expected/1640-platform-asm-parse.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "x86_64-linux" }
|
||||
1
examples/expected/1640-platform-asm-parse.exit
Normal file
1
examples/expected/1640-platform-asm-parse.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
26
examples/expected/1640-platform-asm-parse.ir
Normal file
26
examples/expected/1640-platform-asm-parse.ir
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal { i64, i64 } @divmod(i64 %0, i64 %1) #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %0, ptr %alloca, align 8
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %1, ptr %allocaN, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%loadN = load i64, ptr %allocaN, align 8
|
||||
%asm = call { i64, i64 } asm "divq ${4}", "={rax},={rdx},{rax},{rdx},r,~{cc}"(i64 %load, i64 0, i64 %loadN)
|
||||
ret { i64, i64 } %asm
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call { i64, i64 } @divmod(i64 17, i64 5)
|
||||
%tg = extractvalue { i64, i64 } %call, 0
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %tg, ptr %alloca, align 8
|
||||
%tgN = extractvalue { i64, i64 } %call, 1
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %tgN, ptr %allocaN, align 8
|
||||
ret i32 0
|
||||
}
|
||||
1
examples/expected/1640-platform-asm-parse.stderr
Normal file
1
examples/expected/1640-platform-asm-parse.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1640-platform-asm-parse.stdout
Normal file
1
examples/expected/1640-platform-asm-parse.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
1
|
||||
@@ -0,0 +1,5 @@
|
||||
error: asm expression with no outputs must be marked `volatile`
|
||||
--> examples/1641-platform-asm-missing-volatile.sx:5:14
|
||||
|
|
||||
5 | nope :: () { asm { "nop" }; }
|
||||
| ^^^^^^^^^^^^^
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1642-platform-asm-nop-volatile.exit
Normal file
1
examples/expected/1642-platform-asm-nop-volatile.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
1
examples/expected/1642-platform-asm-nop-volatile.stderr
Normal file
1
examples/expected/1642-platform-asm-nop-volatile.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1642-platform-asm-nop-volatile.stdout
Normal file
1
examples/expected/1642-platform-asm-nop-volatile.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1643-platform-asm-echo-name.exit
Normal file
1
examples/expected/1643-platform-asm-echo-name.exit
Normal file
@@ -0,0 +1 @@
|
||||
1
|
||||
5
examples/expected/1643-platform-asm-echo-name.stderr
Normal file
5
examples/expected/1643-platform-asm-echo-name.stderr
Normal file
@@ -0,0 +1,5 @@
|
||||
error: redundant asm operand name `eax` — it already names the pinned register; drop the `[eax]`
|
||||
--> examples/1643-platform-asm-echo-name.sx:5:25
|
||||
|
|
||||
5 | f :: () -> u32 { return asm volatile { "cpuid", [eax] "={eax}" -> u32, "{eax}" = 1 }; }
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
1
examples/expected/1643-platform-asm-echo-name.stdout
Normal file
1
examples/expected/1643-platform-asm-echo-name.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1644-platform-asm-duplicate-name.exit
Normal file
1
examples/expected/1644-platform-asm-duplicate-name.exit
Normal file
@@ -0,0 +1 @@
|
||||
1
|
||||
@@ -0,0 +1,5 @@
|
||||
error: duplicate asm operand name `x`
|
||||
--> examples/1644-platform-asm-duplicate-name.sx:3:25
|
||||
|
|
||||
3 | f :: () -> u64 { return asm volatile { "nop", [x] "=r" -> u64, [x] "r" = 5 }; }
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1645-platform-asm-aarch64-add.build
Normal file
1
examples/expected/1645-platform-asm-aarch64-add.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1645-platform-asm-aarch64-add.exit
Normal file
1
examples/expected/1645-platform-asm-aarch64-add.exit
Normal file
@@ -0,0 +1 @@
|
||||
42
|
||||
21
examples/expected/1645-platform-asm-aarch64-add.ir
Normal file
21
examples/expected/1645-platform-asm-aarch64-add.ir
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @add_asm(i64 %0, i64 %1) #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %0, ptr %alloca, align 8
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %1, ptr %allocaN, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%loadN = load i64, ptr %allocaN, align 8
|
||||
%asm = call i64 asm "add ${0}, ${1}, ${2}", "=r,r,r"(i64 %load, i64 %loadN)
|
||||
ret i64 %asm
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @add_asm(i64 40, i64 2)
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1645-platform-asm-aarch64-add.stderr
Normal file
1
examples/expected/1645-platform-asm-aarch64-add.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1645-platform-asm-aarch64-add.stdout
Normal file
1
examples/expected/1645-platform-asm-aarch64-add.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1646-platform-asm-value-binding.build
Normal file
1
examples/expected/1646-platform-asm-value-binding.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1646-platform-asm-value-binding.exit
Normal file
1
examples/expected/1646-platform-asm-value-binding.exit
Normal file
@@ -0,0 +1 @@
|
||||
99
|
||||
11
examples/expected/1646-platform-asm-value-binding.ir
Normal file
11
examples/expected/1646-platform-asm-value-binding.ir
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%asm = call i64 asm "mov ${0}, #99", "=r"()
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %asm, ptr %alloca, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%ca.tr = trunc i64 %load to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1646-platform-asm-value-binding.stderr
Normal file
1
examples/expected/1646-platform-asm-value-binding.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1646-platform-asm-value-binding.stdout
Normal file
1
examples/expected/1646-platform-asm-value-binding.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1647-platform-asm-aarch64-multi.build
Normal file
1
examples/expected/1647-platform-asm-aarch64-multi.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1647-platform-asm-aarch64-multi.exit
Normal file
1
examples/expected/1647-platform-asm-aarch64-multi.exit
Normal file
@@ -0,0 +1 @@
|
||||
70
|
||||
31
examples/expected/1647-platform-asm-aarch64-multi.ir
Normal file
31
examples/expected/1647-platform-asm-aarch64-multi.ir
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal { i64, i64 } @split(i64 %0) #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %0, ptr %alloca, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%asm = call { i64, i64 } asm " and ${0}, ${2}, #0xff\0A lsr ${1}, ${2}, #8\0A", "=r,=r,r"(i64 %load)
|
||||
%tg = extractvalue { i64, i64 } %asm, 0
|
||||
%tgN = extractvalue { i64, i64 } %asm, 1
|
||||
%ti = insertvalue { i64, i64 } undef, i64 %tg, 0
|
||||
%tiN = insertvalue { i64, i64 } %ti, i64 %tgN, 1
|
||||
ret { i64, i64 } %tiN
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call { i64, i64 } @split(i64 4660)
|
||||
%tg = extractvalue { i64, i64 } %call, 0
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %tg, ptr %alloca, align 8
|
||||
%tgN = extractvalue { i64, i64 } %call, 1
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %tgN, ptr %allocaN, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%loadN = load i64, ptr %allocaN, align 8
|
||||
%add = add i64 %load, %loadN
|
||||
%ca.tr = trunc i64 %add to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1647-platform-asm-aarch64-multi.stderr
Normal file
1
examples/expected/1647-platform-asm-aarch64-multi.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1647-platform-asm-aarch64-multi.stdout
Normal file
1
examples/expected/1647-platform-asm-aarch64-multi.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1648-platform-asm-global.build
Normal file
1
examples/expected/1648-platform-asm-global.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "aot": true, "target": "macos" }
|
||||
1
examples/expected/1648-platform-asm-global.exit
Normal file
1
examples/expected/1648-platform-asm-global.exit
Normal file
@@ -0,0 +1 @@
|
||||
42
|
||||
16
examples/expected/1648-platform-asm-global.ir
Normal file
16
examples/expected/1648-platform-asm-global.ir
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
module asm ".global _my_add"
|
||||
module asm "_my_add:"
|
||||
module asm " add x0, x0, x1"
|
||||
module asm " ret"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i64 @my_add(i64, i64) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @my_add(i64 40, i64 2)
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1648-platform-asm-global.stderr
Normal file
1
examples/expected/1648-platform-asm-global.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1648-platform-asm-global.stdout
Normal file
1
examples/expected/1648-platform-asm-global.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1649-platform-asm-place-output.build
Normal file
1
examples/expected/1649-platform-asm-place-output.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1649-platform-asm-place-output.exit
Normal file
1
examples/expected/1649-platform-asm-place-output.exit
Normal file
@@ -0,0 +1 @@
|
||||
42
|
||||
25
examples/expected/1649-platform-asm-place-output.ir
Normal file
25
examples/expected/1649-platform-asm-place-output.ir
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @compute() #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 0, ptr %alloca, align 8
|
||||
%asm = call { i64, i64 } asm sideeffect " mov ${0}, #5\0A mov ${1}, #37\0A", "=r,=r"()
|
||||
%asm.out = extractvalue { i64, i64 } %asm, 0
|
||||
%asm.out1 = extractvalue { i64, i64 } %asm, 1
|
||||
store i64 %asm.out1, ptr %alloca, align 8
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %asm.out, ptr %allocaN, align 8
|
||||
%load = load i64, ptr %allocaN, align 8
|
||||
%loadN = load i64, ptr %alloca, align 8
|
||||
%add = add i64 %load, %loadN
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @compute()
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1649-platform-asm-place-output.stderr
Normal file
1
examples/expected/1649-platform-asm-place-output.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1649-platform-asm-place-output.stdout
Normal file
1
examples/expected/1649-platform-asm-place-output.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1650-platform-asm-rw-place.build
Normal file
1
examples/expected/1650-platform-asm-rw-place.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
1
examples/expected/1650-platform-asm-rw-place.exit
Normal file
1
examples/expected/1650-platform-asm-rw-place.exit
Normal file
@@ -0,0 +1 @@
|
||||
42
|
||||
20
examples/expected/1650-platform-asm-rw-place.ir
Normal file
20
examples/expected/1650-platform-asm-rw-place.ir
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @compute() #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 41, ptr %alloca, align 8
|
||||
%asm.rw.seed = load i64, ptr %alloca, align 8
|
||||
%asm = call i64 asm sideeffect "add ${0}, ${0}, #1", "=r,0"(i64 %asm.rw.seed)
|
||||
store i64 %asm, ptr %alloca, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
ret i64 %load
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @compute()
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
1
examples/expected/1650-platform-asm-rw-place.stderr
Normal file
1
examples/expected/1650-platform-asm-rw-place.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
examples/expected/1650-platform-asm-rw-place.stdout
Normal file
1
examples/expected/1650-platform-asm-rw-place.stdout
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
{ "target": "x86_64-linux" }
|
||||
@@ -0,0 +1 @@
|
||||
0
|
||||
28
examples/expected/1651-platform-asm-x86-syscall-write.ir
Normal file
28
examples/expected/1651-platform-asm-x86-syscall-write.ir
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @sys_write(i64 %0, ptr %1, i64 %2) #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 %0, ptr %alloca, align 8
|
||||
%allocaN = alloca ptr, align 8
|
||||
store ptr %1, ptr %allocaN, align 8
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %2, ptr %allocaN, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
%loadN = load ptr, ptr %allocaN, align 8
|
||||
%loadN = load i64, ptr %allocaN, align 8
|
||||
%asm = call i64 asm sideeffect "syscall", "={rax},{rax},{rdi},{rsi},{rdx},~{rcx},~{r11},~{memory}"(i64 1, i64 %load, ptr %loadN, i64 %loadN)
|
||||
ret i64 %asm
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%alloca = alloca [3 x i8], align 1
|
||||
store [3 x i8] c"ok\0A", ptr %alloca, align 1
|
||||
%igp.ptr = getelementptr i8, ptr %alloca, i64 0
|
||||
%call = call i64 @sys_write(i64 1, ptr %igp.ptr, i64 3)
|
||||
%allocaN = alloca i64, align 8
|
||||
store i64 %call, ptr %allocaN, align 8
|
||||
ret i32 0
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user