feat(asm): Phase C.1 + D — inline asm codegen (runs end-to-end)

lowerAsmExpr stops bailing and builds the inline_asm op: resolves each operand's
effective name (§II.5 — explicit [name] else the {reg} pin), interns
template/constraints/clobbers, lowers input Refs, derives the result TypeId
(0→void, 1→T). Adds the last deferred validation (every %[name] must name an
operand). Multi-output (N>1) bails with a named "Phase E" diagnostic.

emitInlineAsm (backend/llvm/ops.zig) ports Zig's airAssembly: assembles the LLVM
constraint string (outputs → inputs → ~{clobber}, ',' → '|'), rewrites the
template (%[name]→${N}, %%→%, $→$$, %=→${:uid}), then LLVMGetInlineAsm +
LLVMBuildCall2 (AT&T dialect). Dispatch wired in emit_llvm.zig (replacing the C.0
@panic tripwire).

inferType gains an .asm_expr arm (expr_typer.zig) so a bare `x := asm {…-> T}`
binding types correctly — without it the binding inferred .unresolved and
silently produced 0.

llvm_shim.c: LLVMInitializeNativeAsmParser() — the JIT must assemble inline asm
at run time.

Verified end-to-end on the aarch64 host: `mov`/`add` with register-class inputs
and a value output run (exit 42/99), `nop volatile` runs (exit 0). IR is
textbook: `call i64 asm "add ${0},${1},${2}", "=r,r,r"(…)`.

Locked with 1645 (aarch64 add, runs; ir-only on non-aarch64) + 1646 (:= binding).
Updated 1640 (now Phase-E bail) + 1642 (now runs).

zig build test green (654 corpus, 446 unit).
This commit is contained in:
agra
2026-06-15 21:39:54 +03:00
parent 6c08de8ec1
commit 5a5e04c6d5
23 changed files with 395 additions and 50 deletions

View File

@@ -24,6 +24,7 @@ const Call = ir_inst.Call;
const CallIndirect = ir_inst.CallIndirect;
const ObjcMsgSend = ir_inst.ObjcMsgSend;
const JniMsgSend = ir_inst.JniMsgSend;
const InlineAsm = ir_inst.InlineAsm;
const BuiltinCall = ir_inst.BuiltinCall;
const TriOp = ir_inst.TriOp;
const Branch = ir_inst.Branch;
@@ -774,6 +775,161 @@ pub const Ops = struct {
self.e.mapRef(result);
}
/// Inline assembly (ASM stream Phase D) — the port of Zig's `airAssembly`.
/// Handles 0 value outputs (void) and 1 (scalar); multi-output tuples are
/// Phase E (lowering bails before reaching here). Builds the LLVM constraint
/// string, rewrites the `%[name]` template, then `LLVMGetInlineAsm` +
/// `LLVMBuildCall2`.
pub fn emitInlineAsm(self: Ops, instruction: *const Inst, a: InlineAsm) void {
const e = self.e;
const alloc = e.alloc;
var n_inputs: usize = 0;
for (a.operands) |op| {
if (op.role == .input) n_inputs += 1;
}
// Result LLVM type: void (no value output) or the single scalar.
const ret_ty = if (instruction.ty == .void) e.cached_void else e.toLLVMType(instruction.ty);
// One LLVM call param per input operand, in source order.
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
defer alloc.free(param_types);
const call_args = alloc.alloc(c.LLVMValueRef, n_inputs) catch unreachable;
defer alloc.free(call_args);
{
var i: usize = 0;
for (a.operands) |op| {
if (op.role != .input) continue;
const raw_ty = e.argIRTypeOrFail(op.operand);
const llvm_ty = e.toLLVMType(raw_ty);
param_types[i] = llvm_ty;
call_args[i] = e.coerceArg(e.resolveRef(op.operand), llvm_ty);
i += 1;
}
}
// ── Constraint string: outputs first, then inputs, then ~{clobber}. ──
var cons: std.ArrayList(u8) = .empty;
defer cons.deinit(alloc);
self.appendAsmConstraints(&cons, a, false); // outputs (out_value / out_place)
self.appendAsmConstraints(&cons, a, true); // inputs
for (a.clobbers) |cl| {
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
cons.appendSlice(alloc, "~{") catch unreachable;
cons.appendSlice(alloc, e.ir_mod.types.getString(cl)) catch unreachable;
cons.append(alloc, '}') catch unreachable;
}
// ── Template rewrite: %[name]->${N}, %%->%, $->$$, %=->${:uid}. ──
var rendered: std.ArrayList(u8) = .empty;
defer rendered.deinit(alloc);
self.renderAsmTemplate(&rendered, a);
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_inputs), 0);
const asm_val = c.LLVMGetInlineAsm(
fn_ty,
rendered.items.ptr,
rendered.items.len,
cons.items.ptr,
cons.items.len,
@intFromBool(a.has_side_effects),
0, // IsAlignStack
c.LLVMInlineAsmDialectATT,
0, // CanThrow
);
const label: [*:0]const u8 = if (instruction.ty == .void) "" else "asm";
const result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
// Always mapRef — the IR Ref counter advances regardless of result type.
e.mapRef(result);
}
/// Append the constraint fragments for one role group (outputs or inputs),
/// comma-separated, with each operand's `,` rewritten to LLVM's `|`
/// (alternative-constraint separator). Mirrors `FuncGen.airAssembly`.
fn appendAsmConstraints(self: Ops, cons: *std.ArrayList(u8), a: InlineAsm, inputs: bool) void {
const e = self.e;
const alloc = e.alloc;
for (a.operands) |op| {
const is_input = op.role == .input;
if (is_input != inputs) continue;
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
const s = e.ir_mod.types.getString(op.constraint);
for (s) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
}
}
/// The positional index of a named operand in the LLVM operand list
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
/// guarantees every `%[name]` names an operand, so callers can assume a hit.
fn asmOperandIndex(self: Ops, a: InlineAsm, name: []const u8) ?usize {
const e = self.e;
var idx: usize = 0;
for ([_]bool{ false, true }) |inputs| {
for (a.operands) |op| {
const is_input = op.role == .input;
if (is_input != inputs) continue;
if (op.name != .empty and std.mem.eql(u8, e.ir_mod.types.getString(op.name), name)) return idx;
idx += 1;
}
}
return null;
}
/// Rewrite the asm template into LLVM form. State machine over the bytes:
/// `$`→`$$`, `%%`→`%`, `%=`→`${:uid}`, `%[name]`→`${N}`, `%[name:mod]`→
/// `${N:mod}`. Port of `FuncGen.zig`'s template rewriter.
fn renderAsmTemplate(self: Ops, out: *std.ArrayList(u8), a: InlineAsm) void {
const e = self.e;
const alloc = e.alloc;
const tmpl = e.ir_mod.types.getString(a.template);
var i: usize = 0;
while (i < tmpl.len) {
const ch = tmpl[i];
if (ch == '$') {
out.appendSlice(alloc, "$$") catch unreachable;
i += 1;
continue;
}
if (ch == '%' and i + 1 < tmpl.len) {
const nxt = tmpl[i + 1];
if (nxt == '%') {
out.append(alloc, '%') catch unreachable;
i += 2;
continue;
}
if (nxt == '=') {
out.appendSlice(alloc, "${:uid}") catch unreachable;
i += 2;
continue;
}
if (nxt == '[') {
const close = std.mem.indexOfScalarPos(u8, tmpl, i + 2, ']').?; // lowering validated
var name = tmpl[i + 2 .. close];
var modifier: ?[]const u8 = null;
if (std.mem.indexOfScalar(u8, name, ':')) |colon| {
modifier = name[colon + 1 ..];
name = name[0..colon];
}
const idx = self.asmOperandIndex(a, name).?; // lowering validated
var buf: [16]u8 = undefined;
const ds = std.fmt.bufPrint(&buf, "{d}", .{idx}) catch unreachable;
out.appendSlice(alloc, "${") catch unreachable;
out.appendSlice(alloc, ds) catch unreachable;
if (modifier) |m| {
out.append(alloc, ':') catch unreachable;
out.appendSlice(alloc, m) catch unreachable;
}
out.append(alloc, '}') catch unreachable;
i = close + 1;
continue;
}
}
out.append(alloc, ch) catch unreachable;
i += 1;
}
}
pub fn emitCall(self: Ops, instruction: *const Inst, call_op: Call) void {
// Evaluate comptime functions at compile time
const callee_func = &self.e.ir_mod.functions.items[call_op.callee.index()];

View File

@@ -1563,11 +1563,7 @@ pub const LLVMEmitter = struct {
// ── Calls ─────────────────────────────────────────────
.objc_msg_send => |msg| self.ops().emitObjcMsgSend(instruction, msg),
.jni_msg_send => |msg| self.ops().emitJniMsgSend(instruction, msg),
// Tripwire (ASM stream): the IR op exists (Phase C.0) but emit lands
// in Phase D. Until then `lowerAsmExpr` still bails, so no inline_asm
// op is ever created — reaching here means lowering switched over
// before emit was ready. Crash loudly rather than miscompile.
.inline_asm => @panic("inline_asm reached LLVM emit before Phase D — lowering must still bail until emitInlineAsm lands"),
.inline_asm => |a| self.ops().emitInlineAsm(instruction, a),
.call => |call_op| self.ops().emitCall(instruction, call_op),
.call_indirect => |call_op| self.ops().emitCallIndirect(instruction, call_op),

View File

@@ -398,6 +398,22 @@ pub const ExprTyper = struct {
}
break :blk self.l.inferExprType(nc.rhs);
},
// Inline asm result type from the `out_value` operands: 0 → void,
// 1 → that operand's type. N>1 (tuple) is Phase E → `.unresolved`
// here (lowering bails on it anyway). Mirrors `lowerAsmExpr`, so a
// bare `x := asm {…-> T}` binding types correctly.
.asm_expr => |ae| blk: {
var n_out: usize = 0;
var first_out: ?*Node = null;
for (ae.operands) |op| {
if (op.role != .out_value) continue;
n_out += 1;
if (first_out == null) first_out = op.payload;
}
if (n_out == 0) break :blk .void;
if (n_out == 1) break :blk self.l.resolveTypeWithBindings(first_out.?);
break :blk .unresolved;
},
// Statements don't produce values (`.return_stmt` is handled above
// as `.noreturn` — it diverges rather than yielding `void`).
.assignment, .var_decl, .const_decl, .fn_decl,

View File

@@ -2261,9 +2261,98 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
return self.emitPlaceholder("inline_asm");
}
// Shape is valid — codegen just isn't implemented yet (Phases CE).
diags.addFmt(.err, span, "inline assembly codegen is not yet implemented (ASM stream: lowering + emit land in Phases CE)", .{});
return self.emitPlaceholder("inline_asm");
// (4) Every `%[name]` in the template must name an operand (effective name:
// explicit `[name]` or auto-derived register). Caught here so emit's
// template rewriter never sees an unknown reference. §II.6.
{
const tmpl = ae.template.data.string_literal.raw;
var i: usize = 0;
while (i < tmpl.len) : (i += 1) {
if (tmpl[i] != '%' or i + 1 >= tmpl.len) continue;
const nxt = tmpl[i + 1];
if (nxt == '%' or nxt == '=') {
i += 1;
continue;
}
if (nxt != '[') continue;
const close = std.mem.indexOfScalarPos(u8, tmpl, i + 2, ']') orelse {
diags.addFmt(.err, span, "unterminated `%[` in asm template", .{});
return self.emitPlaceholder("inline_asm");
};
var ref_name = tmpl[i + 2 .. close];
if (std.mem.indexOfScalar(u8, ref_name, ':')) |colon| ref_name = ref_name[0..colon];
var found = false;
for (ae.operands) |op| {
const eff = op.name orelse (pinnedRegister(op.constraint) orelse "");
if (eff.len != 0 and std.mem.eql(u8, eff, ref_name)) {
found = true;
break;
}
}
if (!found) {
diags.addFmt(.err, span, "asm template references `%[{s}]` but no operand is named `{s}`", .{ ref_name, ref_name });
return self.emitPlaceholder("inline_asm");
}
i = close;
}
}
// ── Build the IR op (C.1). D emits 0 or 1 value output; N>1 (tuple result)
// is Phase E — bail loudly until then. ──
var n_value_outputs: usize = 0;
for (ae.operands) |op| {
if (op.role == .out_value) n_value_outputs += 1;
}
if (n_value_outputs > 1) {
diags.addFmt(.err, span, "multi-output (tuple-returning) inline assembly is not yet implemented (ASM stream Phase E)", .{});
return self.emitPlaceholder("inline_asm");
}
// Result type: 0 outputs → void; 1 → that operand's resolved type. (The
// resolver diagnoses an unresolvable type and returns `.unresolved`.)
var result_ty: TypeId = .void;
for (ae.operands) |op| {
if (op.role == .out_value) {
result_ty = self.resolveTypeWithBindings(op.payload);
break;
}
}
if (result_ty == .unresolved) return self.emitPlaceholder("inline_asm");
// IR operands, in source order (= `%N` index space + LLVM operand order).
const ir_ops = self.alloc.alloc(inst_mod.InlineAsm.AsmOperand, ae.operands.len) catch unreachable;
for (ae.operands, 0..) |op, i| {
// Effective name (design §II.5): explicit `[name]`, else auto-derived
// from a `{reg}` pin, else anonymous (`.empty`).
const eff_name: []const u8 = op.name orelse (pinnedRegister(op.constraint) orelse "");
ir_ops[i] = .{
.role = switch (op.role) {
.out_value => .out_value,
.out_place => .out_place,
.input => .input,
},
.name = if (eff_name.len == 0) types.StringId.empty else self.module.types.internString(eff_name),
.constraint = self.module.types.internString(op.constraint),
// input → the lowered value Ref; an output yields its value (none).
.operand = if (op.role == .input) self.lowerExpr(op.payload) else Ref.none,
};
}
const ir_clobbers = self.alloc.alloc(types.StringId, ae.clobbers.len) catch unreachable;
for (ae.clobbers, 0..) |cl, i| {
ir_clobbers[i] = self.module.types.internString(cl);
}
// Template text RAW — no sx escape processing (matches `#string` literal
// bytes; the `%[name]`/`%%`/`$` rewrite happens at emit). §II.11.
const template_text = ae.template.data.string_literal.raw;
return self.builder.emit(.{ .inline_asm = .{
.template = self.module.types.internString(template_text),
.operands = ir_ops,
.clobbers = ir_clobbers,
.has_side_effects = ae.is_volatile,
} }, result_ty);
}
/// If `node` names a `for xs: (*x)` by-ref capture (an `*elem`), returns