diff --git a/current/CHECKPOINT-ASM.md b/current/CHECKPOINT-ASM.md index 3241b71..1e00626 100644 --- a/current/CHECKPOINT-ASM.md +++ b/current/CHECKPOINT-ASM.md @@ -6,7 +6,26 @@ commit, one step at a time per the cadence rule (no commit may both add a test and make it pass). ## Last completed step -**A.0** — `kw_asm` keyword (first compiler code). Added the `kw_asm` `Token.Tag` +**A.1** — parse `asm { … }` + loud lowering bail (folded A.1+A.2 into one honest +lock commit, since the loud bail IS current correct behavior — cadence option +(a)). Added `AsmExpr`/`AsmOperand` to `src/ast.zig` + the `asm_expr` `Node.Data` +arm; `parseAsmExpr` in `src/parser.zig` (`parsePrimary` `.kw_asm` dispatch) — +parses the template, flat operand list (`[name]? "constraint" -> Type` value +output / `= expr` input), and `clobbers(.…)`; `volatile`/`clobbers` recognized +contextually via `isContextualWord`. The new `asm_expr` tag forced (and got) +arms in three exhaustive `Node.Data` switches: `src/sema.zig` `analyzeNode` + +`findNodeAtOffset`, `src/ir/semantic_diagnostics.zig` `checkBindingNames` (all +recurse into template + operand payloads). Lowering bails LOUD + named in +`src/ir/lower/expr.zig` ("inline assembly codegen is not yet implemented…") via +an explicit `.asm_expr` arm (not the generic `unknown_expr` else) returning +`emitPlaceholder`. `-> @place` write-through is rejected with a clear "Phase 2" +parse error. Locked with `examples/1640-platform-asm-parse.sx` (multi-output +`divmod`, named operands, register pins, clobbers — parses then bails; called +from `main`). `zig build test` green (648 corpus, 0 failed; 445 unit). Files: +`src/ast.zig`, `src/parser.zig`, `src/sema.zig`, `src/ir/semantic_diagnostics.zig`, +`src/ir/lower/expr.zig`, `examples/1640-*`. + +Prior: **A.0** — `kw_asm` keyword (first compiler code). Added the `kw_asm` `Token.Tag` variant + `.{ "asm", .kw_asm }` keyword-map entry in `src/token.zig`; `volatile` / `clobbers` deliberately stay OUT of the global table (contextual). New exhaustive `Tag` switch in `src/lsp/server.zig` `classifyToken` flagged the missing arm (the @@ -31,21 +50,33 @@ guards fire: corrupting the `.ir` → IR mismatch; deleting it → the require-f `src/corpus_run.test.zig`, `examples/1639-*`. ## Current state -Phase 0 complete (corpus target-gating + `.build` JSON). Phase A underway: `asm` -now lexes as `kw_asm` (A.0). No parsing/AST yet — `asm` in source would reach -`parsePrimary` and fall through to the existing "unexpected token" error until -A.1. Phase B–E feasibility already confirmed against the live tree +Phase A underway: `asm { … }` lexes (A.0) and **parses** into `AsmExpr` (A.1); +lowering bails LOUD + named (no IR op / emit yet). Result-type derivation, the +operand auto-naming rule, and the validation checklist are **Phase B** (not yet +implemented — any asm reaching lowering errors out). The adopted **operand +auto-naming rule** (design §II.5, decided this session): name auto-derived from a +`{reg}` pin; explicit `[name]` only when it differs or for register-class (`=r`) +operands; echo form `[eax] "={eax}"` rejected. Parser stores `name: ?[]const u8`; +the rule is a Phase-B (typing) concern, so the parser needs no change for it. + +Known orthogonal bug: **issue 0137** — `sx run` on a program with no `main` +segfaults (`src/target.zig:256-273`, unguarded JIT entry lookup). Pre-existing, +asm-independent; does NOT block the ASM stream (every example has a `main`). + +Phase B–E feasibility already confirmed against the live tree (`LLVMGetInlineAsm` / `LLVMBuildCall2` / `LLVMAppendModuleInlineAsm` in LLVM@19 `Core.h`; ERR-stream `extractvalue`→tuple in `emit_llvm.zig:726-927`; lib-less `extern`, 60 sites; `--target` a global CLI flag). ## Next step -**A.1** (xfail) — parse `asm { … }` → `AsmExpr` / `AsmOperand` in `parsePrimary` -(`src/parser.zig`); add the `asm_expr` arm to `Node.Data` + the `AsmExpr` / -`AsmOperand` structs in `src/ast.zig` (per design §II.3); lowering still -`bailDetail("inline asm codegen unimplemented")` in `src/ir/interp.zig` (or the -lower dispatch). Pin a parse-shape snapshot (`sx ir` or AST). The unimplemented -bail must be loud + named. See `PLAN-ASM.md` Phase A (A.1) + design §II.3–II.4. +**B.0/B.1** (Phase B — sema/typing) — derive the asm result type from the +`out_value` operands (0→`void` + require `volatile`; 1→`T`; N→tuple, named via the +§II.5 auto-naming rule), in the expression typer (`src/ir/expr_typer.zig` / +`inferExprType`). Implement the validation checklist (no-output⇒volatile; layout; +comptime-string template; coerce comptime int→i64/float→f64) + the auto-naming / +echo-rejection diagnostics. On failure return the `.unresolved` sentinel, never a +silent default. Pin error-message examples. See `PLAN-ASM.md` Phase B + design +§II.5. (Lowering keeps bailing until Phase C adds the IR op.) ## Log - (init) Plan + design doc written; ASM stream opened. @@ -61,6 +92,12 @@ bail must be loud + named. See `PLAN-ASM.md` Phase A (A.1) + design §II.3–II. - (A.0) `kw_asm` keyword in token.zig (+ map entry); LSP `classifyToken` switch coverage; lock test in new `lexer.test.zig` (wired via root.zig). `volatile` / `clobbers` stay contextual identifiers. `zig build test` green (445 unit, +1). +- (A.1) parse `asm { … }` → `AsmExpr` + loud lowering bail; `asm_expr` arms in 3 + exhaustive `Node.Data` switches; `-> @place` rejected (Phase 2). Adopted operand + auto-naming rule (design §II.5). Locked with 1640 fixture. Filed orthogonal + issue 0137 (no-`main` JIT segfault). `zig build test` green (648 corpus, 445 unit). ## Known issues -None yet. +- **0137** — `sx run` on a program with no `main` segfaults (unguarded JIT entry + lookup, `src/target.zig:256-273`). Pre-existing, asm-independent. Filed + `issues/0137-jit-run-no-main-segfault.md`. Does not block A.1. diff --git a/docs/inline-asm-design.md b/docs/inline-asm-design.md index 8c3e7f1..47f40e4 100644 --- a/docs/inline-asm-design.md +++ b/docs/inline-asm-design.md @@ -549,6 +549,40 @@ Lexer/token: add `kw_asm` to the `Token.Tag` enum + keyword `StaticStringMap` in * Every `%[name]` referenced in the template must name an operand (best surfaced as a Sema diagnostic; also caught at codegen during the rewrite — §II.6). +### Operand naming rule (auto-name from a `{reg}` pin) — DECIDED + +The `[name]` label on an operand is purely an sx-surface convenience: it provides +the `%[name]` template alias and (for `out_value`) the result tuple's field name. +LLVM never sees it (it sees positional `${N}` + the constraint). To kill the +common redundancy where a label just echoes its pinned register +(`[eax] "={eax}"`), the **operand name is derived as follows**, uniformly across +every operand kind (`out_value` / `out_place` / read-write / `input`): + +1. **Explicit `[name]` wins** — use it verbatim (the `%[name]` alias / field name). +2. **Else, if the constraint pins a single register** — `"={eax}"`, `"{rdi}"`, + `"+{rax}"`, i.e. a `{reg}` body (optionally with a `=`/`+` prefix) — the operand + is **auto-named after that register** (`eax`, `rdi`, `rax`). Usable as + `%[eax]` and as the tuple field name. +3. **Else (register-class `=r`/`+r`/`r`, or memory `=m`, …)** — the operand has + **no implicit name**. A `[name]` is then **required** if the template + references it (`%[name]`) or, for `out_value`, if a named result field is + wanted; otherwise it is anonymous (positional tuple field). + +Corollaries: + +* **Reject the echo form.** An explicit `[name]` that is identical to the + register its own constraint pins (`[eax] "={eax}"`) carries no information — + emit a diagnostic ("redundant operand name `eax` — it already names the pinned + register; drop the `[eax]`"). The useful form is a label that *differs* from the + register (`[quot] "={rax}"` → field `quot` over register `rax`). +* **Result field names** (the §II.5 result-type rule above) come from each + `out_value`'s *effective* name — explicit `[name]`, else the auto-derived + register name; positional only when neither exists (a class-constrained output + with no `[name]`). +* This is a **typing-stage** rule: the parser still stores `name: ?[]const u8` + (null when no `[name]` was written); Sema computes the effective name. No + parser change. + Note: there is **no** "≤1 output" rule (that was Zig's limit; sx's tuples lift it). ## II.6 sx IR + LLVM codegen (the part that must match Zig bit-for-bit) diff --git a/examples/1640-platform-asm-parse.sx b/examples/1640-platform-asm-parse.sx new file mode 100644 index 0000000..d2efaec --- /dev/null +++ b/examples/1640-platform-asm-parse.sx @@ -0,0 +1,20 @@ +// ASM stream Phase A.1 — `asm { … }` PARSES into an AsmExpr: template, named +// value outputs (`[quot] "={rax}" -> u64`), register-pinned inputs, and a +// `clobbers(.…)` clause are all accepted with no parse error. Codegen is not +// implemented yet (the IR op + LLVM emit land in Phases C–E), so lowering bails +// LOUD + named. This example pins that intermediate diagnostic; a later phase +// turns it into a running multi-return example. Called from `main` so lowering +// actually reaches the asm body (lazy lowering skips uncalled functions). +divmod :: (n: u64, d: u64) -> (quot: u64, rem: u64) { + return asm { + "divq %[d]", + [quot] "={rax}" -> u64, + [rem] "={rdx}" -> u64, + "{rax}" = n, "{rdx}" = 0, [d] "r" = d, + clobbers(.cc), + }; +} + +main :: () { + q, r := divmod(17, 5); +} diff --git a/examples/expected/1640-platform-asm-parse.exit b/examples/expected/1640-platform-asm-parse.exit new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/examples/expected/1640-platform-asm-parse.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1640-platform-asm-parse.stderr b/examples/expected/1640-platform-asm-parse.stderr new file mode 100644 index 0000000..a34f506 --- /dev/null +++ b/examples/expected/1640-platform-asm-parse.stderr @@ -0,0 +1,17 @@ +error: inline assembly codegen is not yet implemented (ASM stream: lowering + emit land in Phases C–E) + --> examples/1640-platform-asm-parse.sx:9:12 + | + 9 | return asm { + | ^^^^^ +10 | "divq %[d]", + | ^^^^^^^^^^^^^^^^^^^^ +11 | [quot] "={rax}" -> u64, + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +12 | [rem] "={rdx}" -> u64, + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +13 | "{rax}" = n, "{rdx}" = 0, [d] "r" = d, + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +14 | clobbers(.cc), + | ^^^^^^^^^^^^^^^^^^^^^^ +15 | }; + | ^^^^^ diff --git a/examples/expected/1640-platform-asm-parse.stdout b/examples/expected/1640-platform-asm-parse.stdout new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/1640-platform-asm-parse.stdout @@ -0,0 +1 @@ + diff --git a/issues/0137-jit-run-no-main-segfault.md b/issues/0137-jit-run-no-main-segfault.md new file mode 100644 index 0000000..f891fda --- /dev/null +++ b/issues/0137-jit-run-no-main-segfault.md @@ -0,0 +1,69 @@ +# 0137 — `sx run` on a program with no `main` segfaults (JIT entry lookup unguarded) + +## Symptom + +`sx run ` on a program that defines no `main` function **crashes** +(SIGSEGV/abort, "Segmentation fault at address 0x60") instead of emitting a clean +diagnostic like `error: no 'main' function found`. + +- **Observed:** process crash, exit 134 (abort) / 139 (SIGSEGV); no diagnostic. +- **Expected:** a normal compile-style error ("no `main` entry point") and a + clean non-zero exit, the same way any other missing-entry condition reports. + +Independent of inline assembly — surfaced while writing an ASM-stream probe that +omitted `main`, but reproduces with an ordinary, asm-free program (see below). + +## Reproduction + +A file with only an (uncalled) function and no `main`: + +```sx +foo :: (n: u64) -> u64 { return n + 1; } +``` + +```sh +sx run that.sx +# => "Segmentation fault at address 0x60", exit 134 +# expected: "error: no 'main' function found" (or similar), clean non-zero exit +``` + +## Root cause (suspected) + +`src/target.zig` JIT-run path, ~lines 256–273. After the ORC lookup: + +```zig +var main_addr: c.LLVMOrcExecutorAddress = 0; +err = c.LLVMOrcLLJITLookup(jit, &main_addr, "main"); +if (err != null) { /* prints "JIT lookup error" and returns error.CompileError */ } + +// no guard for main_addr == 0 here: +const main_fn: *const fn () callconv(.c) i32 = @ptrFromInt(main_addr); +const result = main_fn(); // <- calls a null/garbage pointer when no main +``` + +When the module has no `main` symbol, the lookup leaves `main_addr` at `0` (or +ORC returns a degenerate success), so `@ptrFromInt(main_addr)` + `main_fn()` +calls into null → the crash. There is no `main_addr == 0` check. + +## Investigation prompt (paste into a fresh session) + +> `sx run` on a program with no `main` segfaults instead of diagnosing. The JIT +> run path in `src/target.zig` (~lines 256–273) looks up `"main"` via +> `LLVMOrcLLJITLookup`, then unconditionally casts `main_addr` to a function +> pointer and calls it. When the program defines no `main`, `main_addr` is `0` +> (or the lookup degenerately "succeeds"), so the call dereferences null and +> crashes. +> +> Fix: after the lookup's `err` check, add `if (main_addr == 0) { … }` that emits +> a clean user-facing error ("no `main` function found" / "program has no entry +> point") and returns `error.CompileError` (matching the existing +> `JIT lookup error` style), BEFORE the `@ptrFromInt` + call. Consider whether a +> pre-JIT check (the module/program already knows whether a `main` decl exists — +> e.g. emit_llvm.zig:631 already null-checks `LLVMGetNamedFunction(.., "main")`) +> is the better choke point so the diagnostic carries a source span rather than a +> bare message. Either is acceptable; the hard requirement is *no crash*. +> +> Verification: `printf 'foo :: (n: u64) -> u64 { return n + 1; }\n' > /tmp/x.sx +> && sx run /tmp/x.sx` — expect a clean error message + non-zero exit, NOT a +> segfault. Add a pinned repro under `issues/` (or an `examples/11xx-diagnostics-*` +> once the message is settled) asserting the diagnostic on stderr + the exit code. diff --git a/src/ast.zig b/src/ast.zig index 18fa70e..311befa 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -95,6 +95,7 @@ pub const Node = struct { ffi_intrinsic_call: FfiIntrinsicCall, runtime_class_decl: RuntimeClassDecl, jni_env_block: JniEnvBlock, + asm_expr: AsmExpr, pub fn declName(self: Data) ?[]const u8 { return switch (self) { @@ -222,6 +223,42 @@ pub const StringLiteral = struct { is_raw: bool = false, }; +/// Inline assembly expression: `asm volatile? { "tmpl", , +/// clobbers(.…) }` (ASM stream, design §II.3). A flat `operands` list in source +/// order — that order keys the `%N`/`%[name]` indices and the LLVM constraint +/// string. The result type is derived in Sema from the `out_value` operands +/// (0→void, 1→T, N→tuple). Parsed in Phase A.1; lowering bails loudly until the +/// IR op + emit land (Phases C–E). +pub const AsmExpr = struct { + /// Template: a string-literal / `#string` heredoc node (a comptime string). + template: *Node, + is_volatile: bool = false, + /// Declaration order preserved (= `%N` indexing). + operands: []const AsmOperand, + /// Dot-names from `clobbers(.…)`: e.g. "rcx", "cc", "memory". + clobbers: []const []const u8, +}; + +pub const AsmOperand = struct { + /// Optional `[name]`; null when not written. The *effective* name (for + /// `%[name]` and the result tuple field) is computed in Sema: explicit + /// `[name]`, else auto-derived from a `{reg}` pin in `constraint` (design + /// §II.5 naming rule). + name: ?[]const u8 = null, + /// Verbatim constraint, e.g. "={rax}", "=r", "+r", "{rdi}", "r". + constraint: []const u8, + role: Role, + /// `out_value` → a Type node; `input` → an expression node. (`out_place` + /// payload is a write-through place expr — Phase 2, not parsed in A.1.) + payload: *Node, + + pub const Role = enum { + out_value, // `-> Type` value output; N of these → a tuple result + out_place, // `-> @place` write-through to storage (Phase 2) + input, // `= expr` + }; +}; + pub const Identifier = struct { name: []const u8, /// True when written as a backtick raw identifier (`` `i2 ``). Carried so a diff --git a/src/ir/lower/expr.zig b/src/ir/lower/expr.zig index 76159ff..73f99e3 100644 --- a/src/ir/lower/expr.zig +++ b/src/ir/lower/expr.zig @@ -2189,6 +2189,16 @@ pub fn lowerExpr(self: *Lowering, node: *const Node) Ref { .try_expr => |te| self.lowerTry(te.operand, node.span), .catch_expr => |ce| self.lowerCatch(&ce, node.span), .caller_location => self.lowerCallerLocation(node), + // Inline assembly parses (Phase A.1) but has no IR op / emit yet + // (Phases C–E). Bail LOUDLY with a named diagnostic rather than falling + // into the generic `unknown_expr` arm — the placeholder Ref makes + // `hasErrors()` abort the build on this message (CLAUDE.md no-silent-arm). + .asm_expr => blk: { + if (self.diagnostics) |diags| { + diags.addFmt(.err, node.span, "inline assembly codegen is not yet implemented (ASM stream: lowering + emit land in Phases C–E)", .{}); + } + break :blk self.emitPlaceholder("inline_asm"); + }, else => self.emitError("unknown_expr", node.span), }; } diff --git a/src/ir/semantic_diagnostics.zig b/src/ir/semantic_diagnostics.zig index de5accf..f592d80 100644 --- a/src/ir/semantic_diagnostics.zig +++ b/src/ir/semantic_diagnostics.zig @@ -312,6 +312,10 @@ pub const UnknownTypeChecker = struct { .comptime_expr => |ce| self.checkBindingNames(ce.expr), .insert_expr => |ins| self.checkBindingNames(ins.expr), .spread_expr => |se| self.checkBindingNames(se.operand), + .asm_expr => |ae| { + self.checkBindingNames(ae.template); + for (ae.operands) |op| self.checkBindingNames(op.payload); + }, // ── Named type / alias / import declarations: a bare reserved // spelling as the declared name is rejected. These // have no nested binding sites, so only the name is checked. A diff --git a/src/parser.zig b/src/parser.zig index 07529ad..421d772 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -2702,6 +2702,105 @@ pub const Parser = struct { return expr; } + /// True when the current token is a bare identifier with text `word` — used + /// for the contextual keywords `volatile` / `clobbers` that appear only + /// inside an `asm { … }` body and are NOT globally reserved. + fn isContextualWord(self: *const Parser, word: []const u8) bool { + return self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), word); + } + + /// Inline assembly expression (ASM stream, design §II.2–II.4): + /// `asm volatile? { "tmpl", [name]? "constraint" (-> Type | = expr), …, + /// clobbers(.name, …) }` + /// A flat, comma-separated brace block: the template first, then operands + /// and an optional `clobbers(.…)` clause, source order preserved. + fn parseAsmExpr(self: *Parser, start: u32) anyerror!*Node { + self.advance(); // consume `asm` + var is_volatile = false; + if (self.isContextualWord("volatile")) { + is_volatile = true; + self.advance(); + } + try self.expect(.l_brace); + + // First element: the template (a comptime string — `"..."` or `#string`). + const template = try self.parseExpr(); + + var operands = std.ArrayList(ast.AsmOperand).empty; + var clobbers = std.ArrayList([]const u8).empty; + + while (self.current.tag == .comma) { + self.advance(); // consume the separating comma + if (self.current.tag == .r_brace) break; // trailing comma + + // `clobbers(.name, .name, …)` clause. + if (self.isContextualWord("clobbers")) { + self.advance(); + try self.expect(.l_paren); + while (true) { + try self.expect(.dot); + if (self.current.tag != .identifier) + return self.fail("expected a clobber name after '.' in clobbers(...)"); + try clobbers.append(self.allocator, self.tokenSlice(self.current)); + self.advance(); + if (self.current.tag == .comma) { + self.advance(); + continue; + } + break; + } + try self.expect(.r_paren); + continue; + } + + // Operand: `[name]? "constraint" (-> Type | = expr)`. + var op_name: ?[]const u8 = null; + if (self.current.tag == .l_bracket) { + self.advance(); + if (self.current.tag != .identifier) + return self.fail("expected an operand name in '[...]'"); + op_name = self.tokenSlice(self.current); + self.advance(); + try self.expect(.r_bracket); + } + if (self.current.tag != .string_literal) + return self.fail("expected a \"constraint\" string in asm operand"); + const craw = self.tokenSlice(self.current); + const constraint = craw[1 .. craw.len - 1]; // strip quotes + self.advance(); + + var role: ast.AsmOperand.Role = undefined; + var payload: *Node = undefined; + if (self.current.tag == .arrow) { + self.advance(); + if (self.current.tag == .at) + return self.fail("`-> @place` write-through asm outputs are not supported yet (Phase 2); use a `-> Type` value output"); + role = .out_value; + payload = try self.parseTypeExpr(); + } else if (self.current.tag == .equal) { + self.advance(); + role = .input; + payload = try self.parseExpr(); + } else { + return self.fail("expected '->' (output) or '=' (input) after the asm constraint"); + } + try operands.append(self.allocator, .{ + .name = op_name, + .constraint = constraint, + .role = role, + .payload = payload, + }); + } + + try self.expect(.r_brace); + return try self.createNode(start, .{ .asm_expr = .{ + .template = template, + .is_volatile = is_volatile, + .operands = try operands.toOwnedSlice(self.allocator), + .clobbers = try clobbers.toOwnedSlice(self.allocator), + } }); + } + fn parsePrimary(self: *Parser) anyerror!*Node { const start = self.current.loc.start; // Pack references in expression position: @@ -2807,6 +2906,7 @@ pub const Parser = struct { self.advance(); return try self.createNode(start, .{ .identifier = .{ .name = name } }); }, + .kw_asm => return self.parseAsmExpr(start), .dot => { self.advance(); // Anonymous struct literal: .{ ... } diff --git a/src/sema.zig b/src/sema.zig index 15c3ee0..6adfb7f 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -1360,6 +1360,13 @@ pub const Analyzer = struct { try self.analyzeNode(eb.body); self.popScope(); }, + .asm_expr => |ae| { + // Walk the template and each operand payload (input exprs; + // out_value type exprs are leaves). Result-type derivation is + // Phase B; lowering bails until then. + try self.analyzeNode(ae.template); + for (ae.operands) |op| try self.analyzeNode(op.payload); + }, .impl_block => |ib| { // Each impl block gets its own scope so methods don't conflict across impls try self.pushScope(); @@ -1830,6 +1837,12 @@ pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node { if (findNodeAtOffset(d, offset)) |found| return found; } }, + .asm_expr => |ae| { + if (findNodeAtOffset(ae.template, offset)) |found| return found; + for (ae.operands) |op| { + if (findNodeAtOffset(op.payload, offset)) |found| return found; + } + }, } return node;