feat(asm): indirect-memory =*m place outputs

Implements indirect-memory (`=*m`) `-> @place` outputs — the last
substantive asm feature. Unlike a write-through `=` output (which
returns a value that is then stored), an indirect output passes the
place ADDRESS to the asm and the asm writes through it; there is no
return slot.

emitInlineAsm:
  - indirect outputs are excluded from the LLVM return type;
  - their pointer is passed as an opaque `ptr` call arg, placed FIRST
    (the arg-consuming constraint order is: output-section indirect
    pointers, then inputs, then read-write tied seeds);
  - each indirect arg gets an `elementtype(T)` call-site attribute
    (required in the opaque-pointer era), T = the pointee type;
  - the store-back loop skips indirect outputs (already written).
New asmIsIndirect helper. Lowering stops rejecting `*` (constraint kept
verbatim; `=*m` reaches the constraint string as-is). asmOperandIndex
is unchanged — indirect outputs still count as operands, so `%[name]`
${N} numbering holds.

Verified by running on aarch64: store-through-pointer (str x9, %[out]
→ 42, IR `=*m,~{x9}` with `ptr elementtype(i64)`) and a mixed case
(indirect + value output + input → `=*m,=r,r`, indirect ptr arg first,
${0}/${1}/${2} correct). 1652 flipped from the rejection lock to a
runnable aarch64 example (ir-only elsewhere). zig build test green
(661 corpus, 446 unit).
This commit is contained in:
agra
2026-06-16 07:09:17 +03:00
parent 2a43713d7f
commit cb6c032c58
6 changed files with 74 additions and 36 deletions

View File

@@ -0,0 +1 @@
{ "target": "macos" }

View File

@@ -0,0 +1,18 @@
; Function Attrs: nounwind
define internal i64 @poke() #0 {
entry:
%alloca = alloca i64, align 8
store i64 0, ptr %alloca, align 8
call void asm sideeffect " mov x9, #42\0A str x9, ${0}\0A", "=*m,~{x9}"(ptr elementtype(i64) %alloca)
%load = load i64, ptr %alloca, align 8
ret i64 %load
}
; Function Attrs: nounwind
define i32 @main() #0 {
entry:
%call = call i64 @poke()
%ca.tr = trunc i64 %call to i32
ret i32 %ca.tr
}

View File

@@ -1,19 +1 @@
error: indirect-memory (`*`) asm outputs are not yet implemented
--> examples/1652-platform-asm-indirect-mem.sx:9:5
|
9 | asm volatile {
| ^^^^^^^^^^^^^^
10 | #string ASM
| ^^^^^^^^^^^^^^^^^^^
11 | mov x9, #42
| ^^^^^^^^^^^^^^^^^^^
12 | str x9, %[out]
| ^^^^^^^^^^^^^^^^^^^^^^
13 | ASM,
| ^^^^
14 | [out] "=*m" -> @x,
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
15 | clobbers(.x9),
| ^^^^^^^^^^^^^^^^^^^^^^
16 | };
| ^^^^^

View File

@@ -786,23 +786,28 @@ pub const Ops = struct {
var n_inputs: usize = 0;
var n_rw: usize = 0;
var n_indirect: usize = 0;
for (a.operands) |op| {
if (op.role == .input) n_inputs += 1;
if (op.role == .out_place and asmIsReadWrite(e, op)) n_rw += 1;
if (op.role == .out_place and asmIsIndirect(e, op)) n_indirect += 1;
}
// Read-write (`+`) place outputs become an LLVM output `=` PLUS a tied
// input seeded with the place's loaded value — so they add one call arg
// (the seed) each, appended after the regular inputs.
const n_args = n_inputs + n_rw;
// Arg layout — MUST match the arg-consuming constraint order. Indirect
// (`=*m`) outputs sit in the OUTPUT section (their pointer is an arg, no
// return slot), so they come first; then regular inputs; then read-write
// (`+`) tied-input seeds (appended last). Direct outputs consume no arg.
// [indirect output pointers] ++ [inputs] ++ [read-write seeds]
const n_args = n_indirect + n_inputs + n_rw;
// Combined LLVM return type: ALL outputs (out_value + out_place) in
// source order, each as its `out_ty`. out_place outputs come back in a
// return slot too — they get `store`d through their address below; only
// out_value outputs join the sx result. 0 → void, 1 → scalar, N → struct.
// Combined LLVM return type: the DIRECT outputs only (out_value +
// write-through / read-write out_place), source order. An indirect
// (`=*m`) output does NOT return a value — the asm writes through its
// pointer arg — so it is excluded here. 0 → void, 1 → scalar, N → struct.
var out_llvm: std.ArrayList(c.LLVMTypeRef) = .empty;
defer out_llvm.deinit(alloc);
for (a.operands) |op| {
if (op.role == .input) continue;
if (asmIsIndirect(e, op)) continue;
out_llvm.append(alloc, e.toLLVMType(op.out_ty)) catch unreachable;
}
const n_out = out_llvm.items.len;
@@ -821,6 +826,16 @@ pub const Ops = struct {
defer alloc.free(call_args);
{
var i: usize = 0;
// Indirect-memory output pointers (source order): the place address,
// through which the asm writes. Passed as an opaque `ptr`; the
// pointee type is carried by an `elementtype` attribute added after
// the call. No return slot.
for (a.operands) |op| {
if (op.role != .out_place or !asmIsIndirect(e, op)) continue;
param_types[i] = e.cached_ptr;
call_args[i] = e.resolveRef(op.operand);
i += 1;
}
for (a.operands) |op| {
if (op.role != .input) continue;
const raw_ty = e.argIRTypeOrFail(op.operand);
@@ -889,6 +904,21 @@ pub const Ops = struct {
const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_args), label);
// Indirect (`=*m`) output args are opaque pointers — LLVM (opaque-pointer
// era) requires an `elementtype(T)` attribute naming the pointee on each.
// They occupy arg slots 0..n_indirect-1 (call-site attr index is 1-based).
if (n_indirect != 0) {
const et_kind = c.LLVMGetEnumAttributeKindForName("elementtype", 11);
var j: usize = 0;
for (a.operands) |op| {
if (op.role != .out_place or !asmIsIndirect(e, op)) continue;
const et_attr = c.LLVMCreateTypeAttribute(e.context, et_kind, e.toLLVMType(op.out_ty));
const idx: c.LLVMAttributeIndex = @bitCast(@as(i32, @intCast(j + 1)));
c.LLVMAddCallSiteAttribute(raw_result, idx, et_attr);
j += 1;
}
}
// Fast path — no write-through outputs: every output is a value output,
// so the asm's return (void / scalar / `{T…}` struct) IS the sx result
// (the struct already matches sx's tuple representation). No split.
@@ -909,6 +939,9 @@ pub const Ops = struct {
var slot: c_uint = 0;
for (a.operands) |op| {
if (op.role == .input) continue;
// Indirect (`=*m`) outputs have no return slot — the asm already
// wrote through their pointer arg. Skip (no extract, no store-back).
if (asmIsIndirect(e, op)) continue;
const v = if (n_out == 1) raw_result else c.LLVMBuildExtractValue(e.builder, raw_result, slot, "asm.out");
slot += 1;
if (op.role == .out_place) {
@@ -962,6 +995,14 @@ pub const Ops = struct {
return s.len > 0 and s[0] == '+';
}
/// True if `op` is an indirect-memory (`=*m`) place output — its constraint
/// contains `*`. The place address is passed as an opaque pointer arg (with
/// an `elementtype` attribute) and the asm writes through it; no return slot.
fn asmIsIndirect(e: *LLVMEmitter, op: InlineAsm.AsmOperand) bool {
const s = e.ir_mod.types.getString(op.constraint);
return std.mem.indexOfScalar(u8, s, '*') != null;
}
/// The positional index of a named operand in the LLVM operand list
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
/// guarantees every `%[name]` names an operand, so callers can assume a hit.

View File

@@ -2369,14 +2369,10 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
.input => operand_ref = self.lowerExpr(op.payload),
.out_value => out_ty = self.resolveTypeWithBindings(op.payload),
.out_place => {
// Indirect-memory (`*`) place outputs aren't implemented yet —
// reject loudly rather than miscompile (§II.11). Read-write (`+`)
// outputs ARE implemented (emit ties an input to the output and
// seeds it with the place's loaded value; see `emitInlineAsm`).
if (std.mem.indexOfScalar(u8, op.constraint, '*') != null) {
diags.addFmt(.err, span, "indirect-memory (`*`) asm outputs are not yet implemented", .{});
return self.emitPlaceholder("inline_asm");
}
// Read-write (`+`) outputs tie an input to the output and seed
// it with the place's loaded value; indirect-memory (`=*m`)
// outputs pass the place address as a pointer arg and the asm
// writes through it — both handled in `emitInlineAsm`.
// `@place` lowers to its address (a pointer); the asm result is
// stored through it. The stored type is the pointee.
operand_ref = self.lowerExpr(op.payload);