feat(asm): Phase C.1 + D — inline asm codegen (runs end-to-end)

lowerAsmExpr stops bailing and builds the inline_asm op: resolves each operand's
effective name (§II.5 — explicit [name] else the {reg} pin), interns
template/constraints/clobbers, lowers input Refs, derives the result TypeId
(0→void, 1→T). Adds the last deferred validation (every %[name] must name an
operand). Multi-output (N>1) bails with a named "Phase E" diagnostic.

emitInlineAsm (backend/llvm/ops.zig) ports Zig's airAssembly: assembles the LLVM
constraint string (outputs → inputs → ~{clobber}, ',' → '|'), rewrites the
template (%[name]→${N}, %%→%, $→$$, %=→${:uid}), then LLVMGetInlineAsm +
LLVMBuildCall2 (AT&T dialect). Dispatch wired in emit_llvm.zig (replacing the C.0
@panic tripwire).

inferType gains an .asm_expr arm (expr_typer.zig) so a bare `x := asm {…-> T}`
binding types correctly — without it the binding inferred .unresolved and
silently produced 0.

llvm_shim.c: LLVMInitializeNativeAsmParser() — the JIT must assemble inline asm
at run time.

Verified end-to-end on the aarch64 host: `mov`/`add` with register-class inputs
and a value output run (exit 42/99), `nop volatile` runs (exit 0). IR is
textbook: `call i64 asm "add ${0},${1},${2}", "=r,r,r"(…)`.

Locked with 1645 (aarch64 add, runs; ir-only on non-aarch64) + 1646 (:= binding).
Updated 1640 (now Phase-E bail) + 1642 (now runs).

zig build test green (654 corpus, 446 unit).
This commit is contained in:
agra
2026-06-15 21:39:54 +03:00
parent 6c08de8ec1
commit 5a5e04c6d5
23 changed files with 395 additions and 50 deletions

View File

@@ -24,6 +24,7 @@ const Call = ir_inst.Call;
const CallIndirect = ir_inst.CallIndirect;
const ObjcMsgSend = ir_inst.ObjcMsgSend;
const JniMsgSend = ir_inst.JniMsgSend;
const InlineAsm = ir_inst.InlineAsm;
const BuiltinCall = ir_inst.BuiltinCall;
const TriOp = ir_inst.TriOp;
const Branch = ir_inst.Branch;
@@ -774,6 +775,161 @@ pub const Ops = struct {
self.e.mapRef(result);
}
/// Inline assembly (ASM stream Phase D) — the port of Zig's `airAssembly`.
/// Handles 0 value outputs (void) and 1 (scalar); multi-output tuples are
/// Phase E (lowering bails before reaching here). Builds the LLVM constraint
/// string, rewrites the `%[name]` template, then `LLVMGetInlineAsm` +
/// `LLVMBuildCall2`.
pub fn emitInlineAsm(self: Ops, instruction: *const Inst, a: InlineAsm) void {
const e = self.e;
const alloc = e.alloc;
var n_inputs: usize = 0;
for (a.operands) |op| {
if (op.role == .input) n_inputs += 1;
}
// Result LLVM type: void (no value output) or the single scalar.
const ret_ty = if (instruction.ty == .void) e.cached_void else e.toLLVMType(instruction.ty);
// One LLVM call param per input operand, in source order.
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
defer alloc.free(param_types);
const call_args = alloc.alloc(c.LLVMValueRef, n_inputs) catch unreachable;
defer alloc.free(call_args);
{
var i: usize = 0;
for (a.operands) |op| {
if (op.role != .input) continue;
const raw_ty = e.argIRTypeOrFail(op.operand);
const llvm_ty = e.toLLVMType(raw_ty);
param_types[i] = llvm_ty;
call_args[i] = e.coerceArg(e.resolveRef(op.operand), llvm_ty);
i += 1;
}
}
// ── Constraint string: outputs first, then inputs, then ~{clobber}. ──
var cons: std.ArrayList(u8) = .empty;
defer cons.deinit(alloc);
self.appendAsmConstraints(&cons, a, false); // outputs (out_value / out_place)
self.appendAsmConstraints(&cons, a, true); // inputs
for (a.clobbers) |cl| {
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
cons.appendSlice(alloc, "~{") catch unreachable;
cons.appendSlice(alloc, e.ir_mod.types.getString(cl)) catch unreachable;
cons.append(alloc, '}') catch unreachable;
}
// ── Template rewrite: %[name]->${N}, %%->%, $->$$, %=->${:uid}. ──
var rendered: std.ArrayList(u8) = .empty;
defer rendered.deinit(alloc);
self.renderAsmTemplate(&rendered, a);
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_inputs), 0);
const asm_val = c.LLVMGetInlineAsm(
fn_ty,
rendered.items.ptr,
rendered.items.len,
cons.items.ptr,
cons.items.len,
@intFromBool(a.has_side_effects),
0, // IsAlignStack
c.LLVMInlineAsmDialectATT,
0, // CanThrow
);
const label: [*:0]const u8 = if (instruction.ty == .void) "" else "asm";
const result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
// Always mapRef — the IR Ref counter advances regardless of result type.
e.mapRef(result);
}
/// Append the constraint fragments for one role group (outputs or inputs),
/// comma-separated, with each operand's `,` rewritten to LLVM's `|`
/// (alternative-constraint separator). Mirrors `FuncGen.airAssembly`.
fn appendAsmConstraints(self: Ops, cons: *std.ArrayList(u8), a: InlineAsm, inputs: bool) void {
const e = self.e;
const alloc = e.alloc;
for (a.operands) |op| {
const is_input = op.role == .input;
if (is_input != inputs) continue;
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
const s = e.ir_mod.types.getString(op.constraint);
for (s) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
}
}
/// The positional index of a named operand in the LLVM operand list
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
/// guarantees every `%[name]` names an operand, so callers can assume a hit.
fn asmOperandIndex(self: Ops, a: InlineAsm, name: []const u8) ?usize {
const e = self.e;
var idx: usize = 0;
for ([_]bool{ false, true }) |inputs| {
for (a.operands) |op| {
const is_input = op.role == .input;
if (is_input != inputs) continue;
if (op.name != .empty and std.mem.eql(u8, e.ir_mod.types.getString(op.name), name)) return idx;
idx += 1;
}
}
return null;
}
/// Rewrite the asm template into LLVM form. State machine over the bytes:
/// `$`→`$$`, `%%`→`%`, `%=`→`${:uid}`, `%[name]`→`${N}`, `%[name:mod]`→
/// `${N:mod}`. Port of `FuncGen.zig`'s template rewriter.
fn renderAsmTemplate(self: Ops, out: *std.ArrayList(u8), a: InlineAsm) void {
const e = self.e;
const alloc = e.alloc;
const tmpl = e.ir_mod.types.getString(a.template);
var i: usize = 0;
while (i < tmpl.len) {
const ch = tmpl[i];
if (ch == '$') {
out.appendSlice(alloc, "$$") catch unreachable;
i += 1;
continue;
}
if (ch == '%' and i + 1 < tmpl.len) {
const nxt = tmpl[i + 1];
if (nxt == '%') {
out.append(alloc, '%') catch unreachable;
i += 2;
continue;
}
if (nxt == '=') {
out.appendSlice(alloc, "${:uid}") catch unreachable;
i += 2;
continue;
}
if (nxt == '[') {
const close = std.mem.indexOfScalarPos(u8, tmpl, i + 2, ']').?; // lowering validated
var name = tmpl[i + 2 .. close];
var modifier: ?[]const u8 = null;
if (std.mem.indexOfScalar(u8, name, ':')) |colon| {
modifier = name[colon + 1 ..];
name = name[0..colon];
}
const idx = self.asmOperandIndex(a, name).?; // lowering validated
var buf: [16]u8 = undefined;
const ds = std.fmt.bufPrint(&buf, "{d}", .{idx}) catch unreachable;
out.appendSlice(alloc, "${") catch unreachable;
out.appendSlice(alloc, ds) catch unreachable;
if (modifier) |m| {
out.append(alloc, ':') catch unreachable;
out.appendSlice(alloc, m) catch unreachable;
}
out.append(alloc, '}') catch unreachable;
i = close + 1;
continue;
}
}
out.append(alloc, ch) catch unreachable;
i += 1;
}
}
pub fn emitCall(self: Ops, instruction: *const Inst, call_op: Call) void {
// Evaluate comptime functions at compile time
const callee_func = &self.e.ir_mod.functions.items[call_op.callee.index()];