feat(asm): read-write + place outputs
Implements read-write (`+r` / `+{reg}`) `-> @place` outputs. LLVM has
no `+` constraint, so a read-write place lowers to:
- an output `=` constraint (return slot, stored back through the
place after the call), with the leading `+` rewritten to `=`; plus
- a TIED input constraint (the decimal index of that output) appended
after the regular inputs, seeded with the place's loaded value
passed as a call arg.
Tied inputs are appended last so existing operand indices (%[name] ->
${N}) are undisturbed; asmOperandIndex stays correct. Lowering no longer
rejects `+` (indirect `*` still rejected). emitInlineAsm grows the
arg/param arrays by the rw count, loads each seed, and emits the tied
constraint.
Verified by running: increment-in-place (41 -> 42) and a mixed case
(rw place + regular input + value output) producing the textbook
"=r,=r,r,0" constraint with correct ${N} indices. 1650 flipped from
the rejection lock to a runnable aarch64-pinned example (ir-only
elsewhere). zig build test green (658 corpus, 446 unit).
This commit is contained in:
1
examples/expected/1650-platform-asm-rw-place.build
Normal file
1
examples/expected/1650-platform-asm-rw-place.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
@@ -1 +1 @@
|
||||
1
|
||||
42
|
||||
|
||||
20
examples/expected/1650-platform-asm-rw-place.ir
Normal file
20
examples/expected/1650-platform-asm-rw-place.ir
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @compute() #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 41, ptr %alloca, align 8
|
||||
%asm.rw.seed = load i64, ptr %alloca, align 8
|
||||
%asm = call i64 asm sideeffect "add ${0}, ${0}, #1", "=r,0"(i64 %asm.rw.seed)
|
||||
store i64 %asm, ptr %alloca, align 8
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
ret i64 %load
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @compute()
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
@@ -1,5 +1 @@
|
||||
error: read-write (`+`) asm outputs are not yet implemented; use a write-only `=` output
|
||||
--> examples/1650-platform-asm-rw-place.sx:8:5
|
||||
|
|
||||
8 | asm volatile { "add %[v], %[v], #1", [v] "+r" -> @x };
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
@@ -785,9 +785,15 @@ pub const Ops = struct {
|
||||
const alloc = e.alloc;
|
||||
|
||||
var n_inputs: usize = 0;
|
||||
var n_rw: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) n_inputs += 1;
|
||||
if (op.role == .out_place and asmIsReadWrite(e, op)) n_rw += 1;
|
||||
}
|
||||
// Read-write (`+`) place outputs become an LLVM output `=` PLUS a tied
|
||||
// input seeded with the place's loaded value — so they add one call arg
|
||||
// (the seed) each, appended after the regular inputs.
|
||||
const n_args = n_inputs + n_rw;
|
||||
|
||||
// Combined LLVM return type: ALL outputs (out_value + out_place) in
|
||||
// source order, each as its `out_ty`. out_place outputs come back in a
|
||||
@@ -806,10 +812,12 @@ pub const Ops = struct {
|
||||
else => c.LLVMStructTypeInContext(e.context, out_llvm.items.ptr, @intCast(n_out), 0),
|
||||
};
|
||||
|
||||
// One LLVM call param per input operand, in source order.
|
||||
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
|
||||
// One LLVM call param per input operand (source order), then one per
|
||||
// read-write seed (source order) — the arg order MUST match the input
|
||||
// constraint order (regular inputs, then tied inputs; see below).
|
||||
const param_types = alloc.alloc(c.LLVMTypeRef, n_args) catch unreachable;
|
||||
defer alloc.free(param_types);
|
||||
const call_args = alloc.alloc(c.LLVMValueRef, n_inputs) catch unreachable;
|
||||
const call_args = alloc.alloc(c.LLVMValueRef, n_args) catch unreachable;
|
||||
defer alloc.free(call_args);
|
||||
{
|
||||
var i: usize = 0;
|
||||
@@ -821,6 +829,15 @@ pub const Ops = struct {
|
||||
call_args[i] = e.coerceArg(e.resolveRef(op.operand), llvm_ty);
|
||||
i += 1;
|
||||
}
|
||||
// Read-write seeds: load each `+` place's current value (op.operand
|
||||
// is its address) and pass it as the tied input's arg.
|
||||
for (a.operands) |op| {
|
||||
if (op.role != .out_place or !asmIsReadWrite(e, op)) continue;
|
||||
const llvm_ty = e.toLLVMType(op.out_ty);
|
||||
param_types[i] = llvm_ty;
|
||||
call_args[i] = c.LLVMBuildLoad2(e.builder, llvm_ty, e.resolveRef(op.operand), "asm.rw.seed");
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Constraint string: outputs first, then inputs, then ~{clobber}. ──
|
||||
@@ -828,6 +845,23 @@ pub const Ops = struct {
|
||||
defer cons.deinit(alloc);
|
||||
self.appendAsmConstraints(&cons, a, false); // outputs (out_value / out_place)
|
||||
self.appendAsmConstraints(&cons, a, true); // inputs
|
||||
// Tied inputs for read-write (`+`) place outputs: each references the
|
||||
// LLVM index of the output it ties to (outputs are numbered first, in
|
||||
// source order). Appended AFTER the regular inputs so existing operand
|
||||
// indices (`%[name]` → `${N}`) are undisturbed.
|
||||
{
|
||||
var out_idx: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) continue; // not an output — doesn't advance out_idx
|
||||
if (op.role == .out_place and asmIsReadWrite(e, op)) {
|
||||
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
|
||||
var buf: [16]u8 = undefined;
|
||||
const ds = std.fmt.bufPrint(&buf, "{d}", .{out_idx}) catch unreachable;
|
||||
cons.appendSlice(alloc, ds) catch unreachable;
|
||||
}
|
||||
out_idx += 1;
|
||||
}
|
||||
}
|
||||
for (a.clobbers) |cl| {
|
||||
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
|
||||
cons.appendSlice(alloc, "~{") catch unreachable;
|
||||
@@ -840,7 +874,7 @@ pub const Ops = struct {
|
||||
defer rendered.deinit(alloc);
|
||||
self.renderAsmTemplate(&rendered, a);
|
||||
|
||||
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_inputs), 0);
|
||||
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_args), 0);
|
||||
const asm_val = c.LLVMGetInlineAsm(
|
||||
fn_ty,
|
||||
rendered.items.ptr,
|
||||
@@ -853,7 +887,7 @@ pub const Ops = struct {
|
||||
0, // CanThrow
|
||||
);
|
||||
const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
|
||||
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
|
||||
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_args), label);
|
||||
|
||||
// Fast path — no write-through outputs: every output is a value output,
|
||||
// so the asm's return (void / scalar / `{T…}` struct) IS the sx result
|
||||
@@ -908,11 +942,26 @@ pub const Ops = struct {
|
||||
const is_input = op.role == .input;
|
||||
if (is_input != inputs) continue;
|
||||
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
|
||||
const s = e.ir_mod.types.getString(op.constraint);
|
||||
for (s) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
|
||||
var body = e.ir_mod.types.getString(op.constraint);
|
||||
// Read-write (`+`) place outputs lower to an LLVM output `=` plus a
|
||||
// tied input (appended separately). LLVM has no `+`, so emit `=` for
|
||||
// the output half here.
|
||||
if (!is_input and body.len > 0 and body[0] == '+') {
|
||||
cons.append(alloc, '=') catch unreachable;
|
||||
body = body[1..];
|
||||
}
|
||||
for (body) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
|
||||
}
|
||||
}
|
||||
|
||||
/// True if `op` is a read-write (`+`) place output — its constraint begins
|
||||
/// with `+`. Such operands emit an LLVM output `=` plus a tied input seeded
|
||||
/// with the place's loaded value.
|
||||
fn asmIsReadWrite(e: *LLVMEmitter, op: InlineAsm.AsmOperand) bool {
|
||||
const s = e.ir_mod.types.getString(op.constraint);
|
||||
return s.len > 0 and s[0] == '+';
|
||||
}
|
||||
|
||||
/// The positional index of a named operand in the LLVM operand list
|
||||
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
|
||||
/// guarantees every `%[name]` names an operand, so callers can assume a hit.
|
||||
|
||||
@@ -2345,12 +2345,10 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
|
||||
.input => operand_ref = self.lowerExpr(op.payload),
|
||||
.out_value => out_ty = self.resolveTypeWithBindings(op.payload),
|
||||
.out_place => {
|
||||
// Read-write (`+`) and indirect-memory (`*`) place outputs aren't
|
||||
// implemented yet — reject loudly rather than miscompile (§II.11).
|
||||
if (op.constraint.len > 0 and op.constraint[0] == '+') {
|
||||
diags.addFmt(.err, span, "read-write (`+`) asm outputs are not yet implemented; use a write-only `=` output", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
// Indirect-memory (`*`) place outputs aren't implemented yet —
|
||||
// reject loudly rather than miscompile (§II.11). Read-write (`+`)
|
||||
// outputs ARE implemented (emit ties an input to the output and
|
||||
// seeds it with the place's loaded value; see `emitInlineAsm`).
|
||||
if (std.mem.indexOfScalar(u8, op.constraint, '*') != null) {
|
||||
diags.addFmt(.err, span, "indirect-memory (`*`) asm outputs are not yet implemented", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
|
||||
Reference in New Issue
Block a user