feat(asm): read-write + place outputs

Implements read-write (`+r` / `+{reg}`) `-> @place` outputs. LLVM has
no `+` constraint, so a read-write place lowers to:

  - an output `=` constraint (return slot, stored back through the
    place after the call), with the leading `+` rewritten to `=`; plus
  - a TIED input constraint (the decimal index of that output) appended
    after the regular inputs, seeded with the place's loaded value
    passed as a call arg.

Tied inputs are appended last so existing operand indices (%[name] ->
${N}) are undisturbed; asmOperandIndex stays correct. Lowering no longer
rejects `+` (indirect `*` still rejected). emitInlineAsm grows the
arg/param arrays by the rw count, loads each seed, and emits the tied
constraint.

Verified by running: increment-in-place (41 -> 42) and a mixed case
(rw place + regular input + value output) producing the textbook
"=r,=r,r,0" constraint with correct ${N} indices. 1650 flipped from
the rejection lock to a runnable aarch64-pinned example (ir-only
elsewhere). zig build test green (658 corpus, 446 unit).
This commit is contained in:
agra
2026-06-15 23:07:38 +03:00
parent 335ac52374
commit 4128416d48
6 changed files with 83 additions and 19 deletions

View File

@@ -0,0 +1 @@
{ "target": "macos" }

View File

@@ -0,0 +1,20 @@
; Function Attrs: nounwind
define internal i64 @compute() #0 {
entry:
%alloca = alloca i64, align 8
store i64 41, ptr %alloca, align 8
%asm.rw.seed = load i64, ptr %alloca, align 8
%asm = call i64 asm sideeffect "add ${0}, ${0}, #1", "=r,0"(i64 %asm.rw.seed)
store i64 %asm, ptr %alloca, align 8
%load = load i64, ptr %alloca, align 8
ret i64 %load
}
; Function Attrs: nounwind
define i32 @main() #0 {
entry:
%call = call i64 @compute()
%ca.tr = trunc i64 %call to i32
ret i32 %ca.tr
}

View File

@@ -1,5 +1 @@
error: read-write (`+`) asm outputs are not yet implemented; use a write-only `=` output
--> examples/1650-platform-asm-rw-place.sx:8:5
|
8 | asm volatile { "add %[v], %[v], #1", [v] "+r" -> @x };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -785,9 +785,15 @@ pub const Ops = struct {
const alloc = e.alloc;
var n_inputs: usize = 0;
var n_rw: usize = 0;
for (a.operands) |op| {
if (op.role == .input) n_inputs += 1;
if (op.role == .out_place and asmIsReadWrite(e, op)) n_rw += 1;
}
// Read-write (`+`) place outputs become an LLVM output `=` PLUS a tied
// input seeded with the place's loaded value — so they add one call arg
// (the seed) each, appended after the regular inputs.
const n_args = n_inputs + n_rw;
// Combined LLVM return type: ALL outputs (out_value + out_place) in
// source order, each as its `out_ty`. out_place outputs come back in a
@@ -806,10 +812,12 @@ pub const Ops = struct {
else => c.LLVMStructTypeInContext(e.context, out_llvm.items.ptr, @intCast(n_out), 0),
};
// One LLVM call param per input operand, in source order.
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
// One LLVM call param per input operand (source order), then one per
// read-write seed (source order) — the arg order MUST match the input
// constraint order (regular inputs, then tied inputs; see below).
const param_types = alloc.alloc(c.LLVMTypeRef, n_args) catch unreachable;
defer alloc.free(param_types);
const call_args = alloc.alloc(c.LLVMValueRef, n_inputs) catch unreachable;
const call_args = alloc.alloc(c.LLVMValueRef, n_args) catch unreachable;
defer alloc.free(call_args);
{
var i: usize = 0;
@@ -821,6 +829,15 @@ pub const Ops = struct {
call_args[i] = e.coerceArg(e.resolveRef(op.operand), llvm_ty);
i += 1;
}
// Read-write seeds: load each `+` place's current value (op.operand
// is its address) and pass it as the tied input's arg.
for (a.operands) |op| {
if (op.role != .out_place or !asmIsReadWrite(e, op)) continue;
const llvm_ty = e.toLLVMType(op.out_ty);
param_types[i] = llvm_ty;
call_args[i] = c.LLVMBuildLoad2(e.builder, llvm_ty, e.resolveRef(op.operand), "asm.rw.seed");
i += 1;
}
}
// ── Constraint string: outputs first, then inputs, then ~{clobber}. ──
@@ -828,6 +845,23 @@ pub const Ops = struct {
defer cons.deinit(alloc);
self.appendAsmConstraints(&cons, a, false); // outputs (out_value / out_place)
self.appendAsmConstraints(&cons, a, true); // inputs
// Tied inputs for read-write (`+`) place outputs: each references the
// LLVM index of the output it ties to (outputs are numbered first, in
// source order). Appended AFTER the regular inputs so existing operand
// indices (`%[name]` → `${N}`) are undisturbed.
{
var out_idx: usize = 0;
for (a.operands) |op| {
if (op.role == .input) continue; // not an output — doesn't advance out_idx
if (op.role == .out_place and asmIsReadWrite(e, op)) {
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
var buf: [16]u8 = undefined;
const ds = std.fmt.bufPrint(&buf, "{d}", .{out_idx}) catch unreachable;
cons.appendSlice(alloc, ds) catch unreachable;
}
out_idx += 1;
}
}
for (a.clobbers) |cl| {
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
cons.appendSlice(alloc, "~{") catch unreachable;
@@ -840,7 +874,7 @@ pub const Ops = struct {
defer rendered.deinit(alloc);
self.renderAsmTemplate(&rendered, a);
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_inputs), 0);
const fn_ty = c.LLVMFunctionType(ret_ty, param_types.ptr, @intCast(n_args), 0);
const asm_val = c.LLVMGetInlineAsm(
fn_ty,
rendered.items.ptr,
@@ -853,7 +887,7 @@ pub const Ops = struct {
0, // CanThrow
);
const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_args), label);
// Fast path — no write-through outputs: every output is a value output,
// so the asm's return (void / scalar / `{T…}` struct) IS the sx result
@@ -908,11 +942,26 @@ pub const Ops = struct {
const is_input = op.role == .input;
if (is_input != inputs) continue;
if (cons.items.len != 0) cons.append(alloc, ',') catch unreachable;
const s = e.ir_mod.types.getString(op.constraint);
for (s) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
var body = e.ir_mod.types.getString(op.constraint);
// Read-write (`+`) place outputs lower to an LLVM output `=` plus a
// tied input (appended separately). LLVM has no `+`, so emit `=` for
// the output half here.
if (!is_input and body.len > 0 and body[0] == '+') {
cons.append(alloc, '=') catch unreachable;
body = body[1..];
}
for (body) |ch| cons.append(alloc, if (ch == ',') '|' else ch) catch unreachable;
}
}
/// True if `op` is a read-write (`+`) place output — its constraint begins
/// with `+`. Such operands emit an LLVM output `=` plus a tied input seeded
/// with the place's loaded value.
fn asmIsReadWrite(e: *LLVMEmitter, op: InlineAsm.AsmOperand) bool {
const s = e.ir_mod.types.getString(op.constraint);
return s.len > 0 and s[0] == '+';
}
/// The positional index of a named operand in the LLVM operand list
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
/// guarantees every `%[name]` names an operand, so callers can assume a hit.

View File

@@ -2345,12 +2345,10 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
.input => operand_ref = self.lowerExpr(op.payload),
.out_value => out_ty = self.resolveTypeWithBindings(op.payload),
.out_place => {
// Read-write (`+`) and indirect-memory (`*`) place outputs aren't
// implemented yet — reject loudly rather than miscompile (§II.11).
if (op.constraint.len > 0 and op.constraint[0] == '+') {
diags.addFmt(.err, span, "read-write (`+`) asm outputs are not yet implemented; use a write-only `=` output", .{});
return self.emitPlaceholder("inline_asm");
}
// Indirect-memory (`*`) place outputs aren't implemented yet —
// reject loudly rather than miscompile (§II.11). Read-write (`+`)
// outputs ARE implemented (emit ties an input to the output and
// seeds it with the place's loaded value; see `emitInlineAsm`).
if (std.mem.indexOfScalar(u8, op.constraint, '*') != null) {
diags.addFmt(.err, span, "indirect-memory (`*`) asm outputs are not yet implemented", .{});
return self.emitPlaceholder("inline_asm");