feat(asm): Phase C.1 + D — inline asm codegen (runs end-to-end)
lowerAsmExpr stops bailing and builds the inline_asm op: resolves each operand's
effective name (§II.5 — explicit [name] else the {reg} pin), interns
template/constraints/clobbers, lowers input Refs, derives the result TypeId
(0→void, 1→T). Adds the last deferred validation (every %[name] must name an
operand). Multi-output (N>1) bails with a named "Phase E" diagnostic.
emitInlineAsm (backend/llvm/ops.zig) ports Zig's airAssembly: assembles the LLVM
constraint string (outputs → inputs → ~{clobber}, ',' → '|'), rewrites the
template (%[name]→${N}, %%→%, $→$$, %=→${:uid}), then LLVMGetInlineAsm +
LLVMBuildCall2 (AT&T dialect). Dispatch wired in emit_llvm.zig (replacing the C.0
@panic tripwire).
inferType gains an .asm_expr arm (expr_typer.zig) so a bare `x := asm {…-> T}`
binding types correctly — without it the binding inferred .unresolved and
silently produced 0.
llvm_shim.c: LLVMInitializeNativeAsmParser() — the JIT must assemble inline asm
at run time.
Verified end-to-end on the aarch64 host: `mov`/`add` with register-class inputs
and a value output run (exit 42/99), `nop volatile` runs (exit 0). IR is
textbook: `call i64 asm "add ${0},${1},${2}", "=r,r,r"(…)`.
Locked with 1645 (aarch64 add, runs; ir-only on non-aarch64) + 1646 (:= binding).
Updated 1640 (now Phase-E bail) + 1642 (now runs).
zig build test green (654 corpus, 446 unit).
This commit is contained in:
@@ -24,6 +24,7 @@ const Call = ir_inst.Call;
|
||||
const CallIndirect = ir_inst.CallIndirect;
|
||||
const ObjcMsgSend = ir_inst.ObjcMsgSend;
|
||||
const JniMsgSend = ir_inst.JniMsgSend;
|
||||
const InlineAsm = ir_inst.InlineAsm;
|
||||
const BuiltinCall = ir_inst.BuiltinCall;
|
||||
const TriOp = ir_inst.TriOp;
|
||||
const Branch = ir_inst.Branch;
|
||||
@@ -774,6 +775,161 @@ pub const Ops = struct {
|
||||
self.e.mapRef(result);
|
||||
}
|
||||
|
||||
/// Inline assembly (ASM stream Phase D) — the port of Zig's `airAssembly`.
|
||||
/// Handles 0 value outputs (void) and 1 (scalar); multi-output tuples are
|
||||
/// Phase E (lowering bails before reaching here). Builds the LLVM constraint
|
||||
/// string, rewrites the `%[name]` template, then `LLVMGetInlineAsm` +
|
||||
/// `LLVMBuildCall2`.
|
||||
pub fn emitInlineAsm(self: Ops, instruction: *const Inst, a: InlineAsm) void {
|
||||
const e = self.e;
|
||||
const alloc = e.alloc;
|
||||
|
||||
var n_inputs: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) n_inputs += 1;
|
||||
}
|
||||
|
||||
// Result LLVM type: void (no value output) or the single scalar.
|
||||
const ret_ty = if (instruction.ty == .void) e.cached_void else e.toLLVMType(instruction.ty);
|
||||
|
||||
// One LLVM call param per input operand, in source order.
|
||||
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
|
||||
defer alloc.free(param_types);
|
||||
const call_args = alloc.alloc(c.LLVMValueRef, n_inputs) catch unreachable;
|
||||
defer alloc.free(call_args);
|
||||
{
|
||||
var i: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role != .input) continue;
|
||||
const raw_ty = e.argIRTypeOrFail(op.operand);
|
||||
const llvm_ty = e.toLLVMType(raw_ty);
|
||||
param_types[i] = llvm_ty;
|
||||
call_args[i] = e.coerceArg(e.resolveRef(op.operand), llvm_ty);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Constraint string: outputs first, then inputs, then ~{clobber}. ──
|
||||
var cons: std.ArrayList(u8) = .empty;
|
||||
defer cons.deinit(alloc);
|
||||
self.appendAsmConstraints(&cons, a, false); // outputs (out_value / out_place)
|
||||
self.appendAsmConstraints(&cons, a, true); // inputs
|
||||
for (a.clobbers) |cl| {
|
||||
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
|
||||
cons.appendSlice(alloc, "~{") catch unreachable;
|
||||
cons.appendSlice(alloc, e.ir_mod.types.getString(cl)) catch unreachable;
|
||||
cons.append(alloc, '}') catch unreachable;
|
||||
}
|
||||
|
||||
// ── Template rewrite: %[name]->${N}, %%->%, $->$$, %=->${:uid}. ──
|
||||
var rendered: std.ArrayList(u8) = .empty;
|
||||
defer rendered.deinit(alloc);
|
||||
self.renderAsmTemplate(&rendered, a);
|
||||
|
||||
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_inputs), 0);
|
||||
const asm_val = c.LLVMGetInlineAsm(
|
||||
fn_ty,
|
||||
rendered.items.ptr,
|
||||
rendered.items.len,
|
||||
cons.items.ptr,
|
||||
cons.items.len,
|
||||
@intFromBool(a.has_side_effects),
|
||||
0, // IsAlignStack
|
||||
c.LLVMInlineAsmDialectATT,
|
||||
0, // CanThrow
|
||||
);
|
||||
const label: [*:0]const u8 = if (instruction.ty == .void) "" else "asm";
|
||||
const result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
|
||||
// Always mapRef — the IR Ref counter advances regardless of result type.
|
||||
e.mapRef(result);
|
||||
}
|
||||
|
||||
/// Append the constraint fragments for one role group (outputs or inputs),
|
||||
/// comma-separated, with each operand's `,` rewritten to LLVM's `|`
|
||||
/// (alternative-constraint separator). Mirrors `FuncGen.airAssembly`.
|
||||
fn appendAsmConstraints(self: Ops, cons: *std.ArrayList(u8), a: InlineAsm, inputs: bool) void {
|
||||
const e = self.e;
|
||||
const alloc = e.alloc;
|
||||
for (a.operands) |op| {
|
||||
const is_input = op.role == .input;
|
||||
if (is_input != inputs) continue;
|
||||
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
|
||||
const s = e.ir_mod.types.getString(op.constraint);
|
||||
for (s) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
|
||||
}
|
||||
}
|
||||
|
||||
/// The positional index of a named operand in the LLVM operand list
|
||||
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
|
||||
/// guarantees every `%[name]` names an operand, so callers can assume a hit.
|
||||
fn asmOperandIndex(self: Ops, a: InlineAsm, name: []const u8) ?usize {
|
||||
const e = self.e;
|
||||
var idx: usize = 0;
|
||||
for ([_]bool{ false, true }) |inputs| {
|
||||
for (a.operands) |op| {
|
||||
const is_input = op.role == .input;
|
||||
if (is_input != inputs) continue;
|
||||
if (op.name != .empty and std.mem.eql(u8, e.ir_mod.types.getString(op.name), name)) return idx;
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Rewrite the asm template into LLVM form. State machine over the bytes:
|
||||
/// `$`→`$$`, `%%`→`%`, `%=`→`${:uid}`, `%[name]`→`${N}`, `%[name:mod]`→
|
||||
/// `${N:mod}`. Port of `FuncGen.zig`'s template rewriter.
|
||||
fn renderAsmTemplate(self: Ops, out: *std.ArrayList(u8), a: InlineAsm) void {
|
||||
const e = self.e;
|
||||
const alloc = e.alloc;
|
||||
const tmpl = e.ir_mod.types.getString(a.template);
|
||||
var i: usize = 0;
|
||||
while (i < tmpl.len) {
|
||||
const ch = tmpl[i];
|
||||
if (ch == '$') {
|
||||
out.appendSlice(alloc, "$$") catch unreachable;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (ch == '%' and i + 1 < tmpl.len) {
|
||||
const nxt = tmpl[i + 1];
|
||||
if (nxt == '%') {
|
||||
out.append(alloc, '%') catch unreachable;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
if (nxt == '=') {
|
||||
out.appendSlice(alloc, "${:uid}") catch unreachable;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
if (nxt == '[') {
|
||||
const close = std.mem.indexOfScalarPos(u8, tmpl, i + 2, ']').?; // lowering validated
|
||||
var name = tmpl[i + 2 .. close];
|
||||
var modifier: ?[]const u8 = null;
|
||||
if (std.mem.indexOfScalar(u8, name, ':')) |colon| {
|
||||
modifier = name[colon + 1 ..];
|
||||
name = name[0..colon];
|
||||
}
|
||||
const idx = self.asmOperandIndex(a, name).?; // lowering validated
|
||||
var buf: [16]u8 = undefined;
|
||||
const ds = std.fmt.bufPrint(&buf, "{d}", .{idx}) catch unreachable;
|
||||
out.appendSlice(alloc, "${") catch unreachable;
|
||||
out.appendSlice(alloc, ds) catch unreachable;
|
||||
if (modifier) |m| {
|
||||
out.append(alloc, ':') catch unreachable;
|
||||
out.appendSlice(alloc, m) catch unreachable;
|
||||
}
|
||||
out.append(alloc, '}') catch unreachable;
|
||||
i = close + 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
out.append(alloc, ch) catch unreachable;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn emitCall(self: Ops, instruction: *const Inst, call_op: Call) void {
|
||||
// Evaluate comptime functions at compile time
|
||||
const callee_func = &self.e.ir_mod.functions.items[call_op.callee.index()];
|
||||
|
||||
@@ -1563,11 +1563,7 @@ pub const LLVMEmitter = struct {
|
||||
// ── Calls ─────────────────────────────────────────────
|
||||
.objc_msg_send => |msg| self.ops().emitObjcMsgSend(instruction, msg),
|
||||
.jni_msg_send => |msg| self.ops().emitJniMsgSend(instruction, msg),
|
||||
// Tripwire (ASM stream): the IR op exists (Phase C.0) but emit lands
|
||||
// in Phase D. Until then `lowerAsmExpr` still bails, so no inline_asm
|
||||
// op is ever created — reaching here means lowering switched over
|
||||
// before emit was ready. Crash loudly rather than miscompile.
|
||||
.inline_asm => @panic("inline_asm reached LLVM emit before Phase D — lowering must still bail until emitInlineAsm lands"),
|
||||
.inline_asm => |a| self.ops().emitInlineAsm(instruction, a),
|
||||
.call => |call_op| self.ops().emitCall(instruction, call_op),
|
||||
.call_indirect => |call_op| self.ops().emitCallIndirect(instruction, call_op),
|
||||
|
||||
|
||||
@@ -398,6 +398,22 @@ pub const ExprTyper = struct {
|
||||
}
|
||||
break :blk self.l.inferExprType(nc.rhs);
|
||||
},
|
||||
// Inline asm result type from the `out_value` operands: 0 → void,
|
||||
// 1 → that operand's type. N>1 (tuple) is Phase E → `.unresolved`
|
||||
// here (lowering bails on it anyway). Mirrors `lowerAsmExpr`, so a
|
||||
// bare `x := asm {…-> T}` binding types correctly.
|
||||
.asm_expr => |ae| blk: {
|
||||
var n_out: usize = 0;
|
||||
var first_out: ?*Node = null;
|
||||
for (ae.operands) |op| {
|
||||
if (op.role != .out_value) continue;
|
||||
n_out += 1;
|
||||
if (first_out == null) first_out = op.payload;
|
||||
}
|
||||
if (n_out == 0) break :blk .void;
|
||||
if (n_out == 1) break :blk self.l.resolveTypeWithBindings(first_out.?);
|
||||
break :blk .unresolved;
|
||||
},
|
||||
// Statements don't produce values (`.return_stmt` is handled above
|
||||
// as `.noreturn` — it diverges rather than yielding `void`).
|
||||
.assignment, .var_decl, .const_decl, .fn_decl,
|
||||
|
||||
@@ -2261,9 +2261,98 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
|
||||
// Shape is valid — codegen just isn't implemented yet (Phases C–E).
|
||||
diags.addFmt(.err, span, "inline assembly codegen is not yet implemented (ASM stream: lowering + emit land in Phases C–E)", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
// (4) Every `%[name]` in the template must name an operand (effective name:
|
||||
// explicit `[name]` or auto-derived register). Caught here so emit's
|
||||
// template rewriter never sees an unknown reference. §II.6.
|
||||
{
|
||||
const tmpl = ae.template.data.string_literal.raw;
|
||||
var i: usize = 0;
|
||||
while (i < tmpl.len) : (i += 1) {
|
||||
if (tmpl[i] != '%' or i + 1 >= tmpl.len) continue;
|
||||
const nxt = tmpl[i + 1];
|
||||
if (nxt == '%' or nxt == '=') {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (nxt != '[') continue;
|
||||
const close = std.mem.indexOfScalarPos(u8, tmpl, i + 2, ']') orelse {
|
||||
diags.addFmt(.err, span, "unterminated `%[` in asm template", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
};
|
||||
var ref_name = tmpl[i + 2 .. close];
|
||||
if (std.mem.indexOfScalar(u8, ref_name, ':')) |colon| ref_name = ref_name[0..colon];
|
||||
var found = false;
|
||||
for (ae.operands) |op| {
|
||||
const eff = op.name orelse (pinnedRegister(op.constraint) orelse "");
|
||||
if (eff.len != 0 and std.mem.eql(u8, eff, ref_name)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
diags.addFmt(.err, span, "asm template references `%[{s}]` but no operand is named `{s}`", .{ ref_name, ref_name });
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
i = close;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Build the IR op (C.1). D emits 0 or 1 value output; N>1 (tuple result)
|
||||
// is Phase E — bail loudly until then. ──
|
||||
var n_value_outputs: usize = 0;
|
||||
for (ae.operands) |op| {
|
||||
if (op.role == .out_value) n_value_outputs += 1;
|
||||
}
|
||||
if (n_value_outputs > 1) {
|
||||
diags.addFmt(.err, span, "multi-output (tuple-returning) inline assembly is not yet implemented (ASM stream Phase E)", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
|
||||
// Result type: 0 outputs → void; 1 → that operand's resolved type. (The
|
||||
// resolver diagnoses an unresolvable type and returns `.unresolved`.)
|
||||
var result_ty: TypeId = .void;
|
||||
for (ae.operands) |op| {
|
||||
if (op.role == .out_value) {
|
||||
result_ty = self.resolveTypeWithBindings(op.payload);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result_ty == .unresolved) return self.emitPlaceholder("inline_asm");
|
||||
|
||||
// IR operands, in source order (= `%N` index space + LLVM operand order).
|
||||
const ir_ops = self.alloc.alloc(inst_mod.InlineAsm.AsmOperand, ae.operands.len) catch unreachable;
|
||||
for (ae.operands, 0..) |op, i| {
|
||||
// Effective name (design §II.5): explicit `[name]`, else auto-derived
|
||||
// from a `{reg}` pin, else anonymous (`.empty`).
|
||||
const eff_name: []const u8 = op.name orelse (pinnedRegister(op.constraint) orelse "");
|
||||
ir_ops[i] = .{
|
||||
.role = switch (op.role) {
|
||||
.out_value => .out_value,
|
||||
.out_place => .out_place,
|
||||
.input => .input,
|
||||
},
|
||||
.name = if (eff_name.len == 0) types.StringId.empty else self.module.types.internString(eff_name),
|
||||
.constraint = self.module.types.internString(op.constraint),
|
||||
// input → the lowered value Ref; an output yields its value (none).
|
||||
.operand = if (op.role == .input) self.lowerExpr(op.payload) else Ref.none,
|
||||
};
|
||||
}
|
||||
|
||||
const ir_clobbers = self.alloc.alloc(types.StringId, ae.clobbers.len) catch unreachable;
|
||||
for (ae.clobbers, 0..) |cl, i| {
|
||||
ir_clobbers[i] = self.module.types.internString(cl);
|
||||
}
|
||||
|
||||
// Template text RAW — no sx escape processing (matches `#string` literal
|
||||
// bytes; the `%[name]`/`%%`/`$` rewrite happens at emit). §II.11.
|
||||
const template_text = ae.template.data.string_literal.raw;
|
||||
|
||||
return self.builder.emit(.{ .inline_asm = .{
|
||||
.template = self.module.types.internString(template_text),
|
||||
.operands = ir_ops,
|
||||
.clobbers = ir_clobbers,
|
||||
.has_side_effects = ae.is_volatile,
|
||||
} }, result_ty);
|
||||
}
|
||||
|
||||
/// If `node` names a `for xs: (*x)` by-ref capture (an `*elem`), returns
|
||||
|
||||
Reference in New Issue
Block a user