feat(asm): Phase A.1 — parse asm { … } into AsmExpr; loud lowering bail

`asm volatile? { "tmpl", [name]? "constraint" (-> Type | = expr), …,
clobbers(.…) }` now parses into a flat-operand AsmExpr/AsmOperand (ast.zig +
parser.zig parseAsmExpr, dispatched from parsePrimary on .kw_asm). `volatile`
and `clobbers` are recognized contextually (not reserved). `-> @place`
write-through is rejected with a clear "Phase 2" parse error.

Codegen is not implemented yet (IR op + LLVM emit are Phases C–E), so lowering
bails LOUD + named via an explicit .asm_expr arm in lower/expr.zig (not the
generic unknown_expr else) — emitPlaceholder makes hasErrors() abort the build
on the message.

The new asm_expr tag forced (and got) arms in three exhaustive Node.Data
switches: sema.zig analyzeNode + findNodeAtOffset, semantic_diagnostics.zig
checkBindingNames — each recurses into template + operand payloads.

Design: adopted the operand auto-naming rule (design §II.5) — name auto-derived
from a {reg} pin, explicit [name] only when it differs or for register-class
operands, echo form rejected. Typing-stage rule; parser stores name: ?[]const u8.

Locked with examples/1640-platform-asm-parse.sx (multi-output divmod: named
operands, register pins, clobbers — parses then bails, called from main).

Also files issue 0137 (pre-existing, orthogonal: `sx run` with no `main`
segfaults via an unguarded JIT entry lookup in target.zig — not an asm bug).

zig build test green (648 corpus, 445 unit).
This commit is contained in:
agra
2026-06-15 20:21:25 +03:00
parent 3c9ecd0b42
commit f8e029d719
12 changed files with 355 additions and 12 deletions

View File

@@ -95,6 +95,7 @@ pub const Node = struct {
ffi_intrinsic_call: FfiIntrinsicCall,
runtime_class_decl: RuntimeClassDecl,
jni_env_block: JniEnvBlock,
asm_expr: AsmExpr,
pub fn declName(self: Data) ?[]const u8 {
return switch (self) {
@@ -222,6 +223,42 @@ pub const StringLiteral = struct {
is_raw: bool = false,
};
/// Inline assembly expression: `asm volatile? { "tmpl", <operands…>,
/// clobbers(.…) }` (ASM stream, design §II.3). A flat `operands` list in source
/// order — that order keys the `%N`/`%[name]` indices and the LLVM constraint
/// string. The result type is derived in Sema from the `out_value` operands
/// (0→void, 1→T, N→tuple). Parsed in Phase A.1; lowering bails loudly until the
/// IR op + emit land (Phases CE).
pub const AsmExpr = struct {
/// Template: a string-literal / `#string` heredoc node (a comptime string).
template: *Node,
is_volatile: bool = false,
/// Declaration order preserved (= `%N` indexing).
operands: []const AsmOperand,
/// Dot-names from `clobbers(.…)`: e.g. "rcx", "cc", "memory".
clobbers: []const []const u8,
};
pub const AsmOperand = struct {
/// Optional `[name]`; null when not written. The *effective* name (for
/// `%[name]` and the result tuple field) is computed in Sema: explicit
/// `[name]`, else auto-derived from a `{reg}` pin in `constraint` (design
/// §II.5 naming rule).
name: ?[]const u8 = null,
/// Verbatim constraint, e.g. "={rax}", "=r", "+r", "{rdi}", "r".
constraint: []const u8,
role: Role,
/// `out_value` → a Type node; `input` → an expression node. (`out_place`
/// payload is a write-through place expr — Phase 2, not parsed in A.1.)
payload: *Node,
pub const Role = enum {
out_value, // `-> Type` value output; N of these → a tuple result
out_place, // `-> @place` write-through to storage (Phase 2)
input, // `= expr`
};
};
pub const Identifier = struct {
name: []const u8,
/// True when written as a backtick raw identifier (`` `i2 ``). Carried so a

View File

@@ -2189,6 +2189,16 @@ pub fn lowerExpr(self: *Lowering, node: *const Node) Ref {
.try_expr => |te| self.lowerTry(te.operand, node.span),
.catch_expr => |ce| self.lowerCatch(&ce, node.span),
.caller_location => self.lowerCallerLocation(node),
// Inline assembly parses (Phase A.1) but has no IR op / emit yet
// (Phases CE). Bail LOUDLY with a named diagnostic rather than falling
// into the generic `unknown_expr` arm — the placeholder Ref makes
// `hasErrors()` abort the build on this message (CLAUDE.md no-silent-arm).
.asm_expr => blk: {
if (self.diagnostics) |diags| {
diags.addFmt(.err, node.span, "inline assembly codegen is not yet implemented (ASM stream: lowering + emit land in Phases CE)", .{});
}
break :blk self.emitPlaceholder("inline_asm");
},
else => self.emitError("unknown_expr", node.span),
};
}

View File

@@ -312,6 +312,10 @@ pub const UnknownTypeChecker = struct {
.comptime_expr => |ce| self.checkBindingNames(ce.expr),
.insert_expr => |ins| self.checkBindingNames(ins.expr),
.spread_expr => |se| self.checkBindingNames(se.operand),
.asm_expr => |ae| {
self.checkBindingNames(ae.template);
for (ae.operands) |op| self.checkBindingNames(op.payload);
},
// ── Named type / alias / import declarations: a bare reserved
// spelling as the declared name is rejected. These
// have no nested binding sites, so only the name is checked. A

View File

@@ -2702,6 +2702,105 @@ pub const Parser = struct {
return expr;
}
/// True when the current token is a bare identifier with text `word` — used
/// for the contextual keywords `volatile` / `clobbers` that appear only
/// inside an `asm { … }` body and are NOT globally reserved.
fn isContextualWord(self: *const Parser, word: []const u8) bool {
return self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), word);
}
/// Inline assembly expression (ASM stream, design §II.2II.4):
/// `asm volatile? { "tmpl", [name]? "constraint" (-> Type | = expr), …,
/// clobbers(.name, …) }`
/// A flat, comma-separated brace block: the template first, then operands
/// and an optional `clobbers(.…)` clause, source order preserved.
fn parseAsmExpr(self: *Parser, start: u32) anyerror!*Node {
self.advance(); // consume `asm`
var is_volatile = false;
if (self.isContextualWord("volatile")) {
is_volatile = true;
self.advance();
}
try self.expect(.l_brace);
// First element: the template (a comptime string — `"..."` or `#string`).
const template = try self.parseExpr();
var operands = std.ArrayList(ast.AsmOperand).empty;
var clobbers = std.ArrayList([]const u8).empty;
while (self.current.tag == .comma) {
self.advance(); // consume the separating comma
if (self.current.tag == .r_brace) break; // trailing comma
// `clobbers(.name, .name, …)` clause.
if (self.isContextualWord("clobbers")) {
self.advance();
try self.expect(.l_paren);
while (true) {
try self.expect(.dot);
if (self.current.tag != .identifier)
return self.fail("expected a clobber name after '.' in clobbers(...)");
try clobbers.append(self.allocator, self.tokenSlice(self.current));
self.advance();
if (self.current.tag == .comma) {
self.advance();
continue;
}
break;
}
try self.expect(.r_paren);
continue;
}
// Operand: `[name]? "constraint" (-> Type | = expr)`.
var op_name: ?[]const u8 = null;
if (self.current.tag == .l_bracket) {
self.advance();
if (self.current.tag != .identifier)
return self.fail("expected an operand name in '[...]'");
op_name = self.tokenSlice(self.current);
self.advance();
try self.expect(.r_bracket);
}
if (self.current.tag != .string_literal)
return self.fail("expected a \"constraint\" string in asm operand");
const craw = self.tokenSlice(self.current);
const constraint = craw[1 .. craw.len - 1]; // strip quotes
self.advance();
var role: ast.AsmOperand.Role = undefined;
var payload: *Node = undefined;
if (self.current.tag == .arrow) {
self.advance();
if (self.current.tag == .at)
return self.fail("`-> @place` write-through asm outputs are not supported yet (Phase 2); use a `-> Type` value output");
role = .out_value;
payload = try self.parseTypeExpr();
} else if (self.current.tag == .equal) {
self.advance();
role = .input;
payload = try self.parseExpr();
} else {
return self.fail("expected '->' (output) or '=' (input) after the asm constraint");
}
try operands.append(self.allocator, .{
.name = op_name,
.constraint = constraint,
.role = role,
.payload = payload,
});
}
try self.expect(.r_brace);
return try self.createNode(start, .{ .asm_expr = .{
.template = template,
.is_volatile = is_volatile,
.operands = try operands.toOwnedSlice(self.allocator),
.clobbers = try clobbers.toOwnedSlice(self.allocator),
} });
}
fn parsePrimary(self: *Parser) anyerror!*Node {
const start = self.current.loc.start;
// Pack references in expression position:
@@ -2807,6 +2906,7 @@ pub const Parser = struct {
self.advance();
return try self.createNode(start, .{ .identifier = .{ .name = name } });
},
.kw_asm => return self.parseAsmExpr(start),
.dot => {
self.advance();
// Anonymous struct literal: .{ ... }

View File

@@ -1360,6 +1360,13 @@ pub const Analyzer = struct {
try self.analyzeNode(eb.body);
self.popScope();
},
.asm_expr => |ae| {
// Walk the template and each operand payload (input exprs;
// out_value type exprs are leaves). Result-type derivation is
// Phase B; lowering bails until then.
try self.analyzeNode(ae.template);
for (ae.operands) |op| try self.analyzeNode(op.payload);
},
.impl_block => |ib| {
// Each impl block gets its own scope so methods don't conflict across impls
try self.pushScope();
@@ -1830,6 +1837,12 @@ pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node {
if (findNodeAtOffset(d, offset)) |found| return found;
}
},
.asm_expr => |ae| {
if (findNodeAtOffset(ae.template, offset)) |found| return found;
for (ae.operands) |op| {
if (findNodeAtOffset(op.payload, offset)) |found| return found;
}
},
}
return node;