feat(asm): indirect-memory =*m place outputs
Implements indirect-memory (`=*m`) `-> @place` outputs — the last
substantive asm feature. Unlike a write-through `=` output (which
returns a value that is then stored), an indirect output passes the
place ADDRESS to the asm and the asm writes through it; there is no
return slot.
emitInlineAsm:
- indirect outputs are excluded from the LLVM return type;
- their pointer is passed as an opaque `ptr` call arg, placed FIRST
(the arg-consuming constraint order is: output-section indirect
pointers, then inputs, then read-write tied seeds);
- each indirect arg gets an `elementtype(T)` call-site attribute
(required in the opaque-pointer era), T = the pointee type;
- the store-back loop skips indirect outputs (already written).
New asmIsIndirect helper. Lowering stops rejecting `*` (constraint kept
verbatim; `=*m` reaches the constraint string as-is). asmOperandIndex
is unchanged — indirect outputs still count as operands, so `%[name]`
${N} numbering holds.
Verified by running on aarch64: store-through-pointer (str x9, %[out]
→ 42, IR `=*m,~{x9}` with `ptr elementtype(i64)`) and a mixed case
(indirect + value output + input → `=*m,=r,r`, indirect ptr arg first,
${0}/${1}/${2} correct). 1652 flipped from the rejection lock to a
runnable aarch64 example (ir-only elsewhere). zig build test green
(661 corpus, 446 unit).
This commit is contained in:
1
examples/expected/1652-platform-asm-indirect-mem.build
Normal file
1
examples/expected/1652-platform-asm-indirect-mem.build
Normal file
@@ -0,0 +1 @@
|
||||
{ "target": "macos" }
|
||||
@@ -1 +1 @@
|
||||
1
|
||||
42
|
||||
|
||||
18
examples/expected/1652-platform-asm-indirect-mem.ir
Normal file
18
examples/expected/1652-platform-asm-indirect-mem.ir
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal i64 @poke() #0 {
|
||||
entry:
|
||||
%alloca = alloca i64, align 8
|
||||
store i64 0, ptr %alloca, align 8
|
||||
call void asm sideeffect " mov x9, #42\0A str x9, ${0}\0A", "=*m,~{x9}"(ptr elementtype(i64) %alloca)
|
||||
%load = load i64, ptr %alloca, align 8
|
||||
ret i64 %load
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call i64 @poke()
|
||||
%ca.tr = trunc i64 %call to i32
|
||||
ret i32 %ca.tr
|
||||
}
|
||||
@@ -1,19 +1 @@
|
||||
error: indirect-memory (`*`) asm outputs are not yet implemented
|
||||
--> examples/1652-platform-asm-indirect-mem.sx:9:5
|
||||
|
|
||||
9 | asm volatile {
|
||||
| ^^^^^^^^^^^^^^
|
||||
10 | #string ASM
|
||||
| ^^^^^^^^^^^^^^^^^^^
|
||||
11 | mov x9, #42
|
||||
| ^^^^^^^^^^^^^^^^^^^
|
||||
12 | str x9, %[out]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
13 | ASM,
|
||||
| ^^^^
|
||||
14 | [out] "=*m" -> @x,
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
15 | clobbers(.x9),
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
16 | };
|
||||
| ^^^^^
|
||||
|
||||
|
||||
@@ -786,23 +786,28 @@ pub const Ops = struct {
|
||||
|
||||
var n_inputs: usize = 0;
|
||||
var n_rw: usize = 0;
|
||||
var n_indirect: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) n_inputs += 1;
|
||||
if (op.role == .out_place and asmIsReadWrite(e, op)) n_rw += 1;
|
||||
if (op.role == .out_place and asmIsIndirect(e, op)) n_indirect += 1;
|
||||
}
|
||||
// Read-write (`+`) place outputs become an LLVM output `=` PLUS a tied
|
||||
// input seeded with the place's loaded value — so they add one call arg
|
||||
// (the seed) each, appended after the regular inputs.
|
||||
const n_args = n_inputs + n_rw;
|
||||
// Arg layout — MUST match the arg-consuming constraint order. Indirect
|
||||
// (`=*m`) outputs sit in the OUTPUT section (their pointer is an arg, no
|
||||
// return slot), so they come first; then regular inputs; then read-write
|
||||
// (`+`) tied-input seeds (appended last). Direct outputs consume no arg.
|
||||
// [indirect output pointers] ++ [inputs] ++ [read-write seeds]
|
||||
const n_args = n_indirect + n_inputs + n_rw;
|
||||
|
||||
// Combined LLVM return type: ALL outputs (out_value + out_place) in
|
||||
// source order, each as its `out_ty`. out_place outputs come back in a
|
||||
// return slot too — they get `store`d through their address below; only
|
||||
// out_value outputs join the sx result. 0 → void, 1 → scalar, N → struct.
|
||||
// Combined LLVM return type: the DIRECT outputs only (out_value +
|
||||
// write-through / read-write out_place), source order. An indirect
|
||||
// (`=*m`) output does NOT return a value — the asm writes through its
|
||||
// pointer arg — so it is excluded here. 0 → void, 1 → scalar, N → struct.
|
||||
var out_llvm: std.ArrayList(c.LLVMTypeRef) = .empty;
|
||||
defer out_llvm.deinit(alloc);
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) continue;
|
||||
if (asmIsIndirect(e, op)) continue;
|
||||
out_llvm.append(alloc, e.toLLVMType(op.out_ty)) catch unreachable;
|
||||
}
|
||||
const n_out = out_llvm.items.len;
|
||||
@@ -821,6 +826,16 @@ pub const Ops = struct {
|
||||
defer alloc.free(call_args);
|
||||
{
|
||||
var i: usize = 0;
|
||||
// Indirect-memory output pointers (source order): the place address,
|
||||
// through which the asm writes. Passed as an opaque `ptr`; the
|
||||
// pointee type is carried by an `elementtype` attribute added after
|
||||
// the call. No return slot.
|
||||
for (a.operands) |op| {
|
||||
if (op.role != .out_place or !asmIsIndirect(e, op)) continue;
|
||||
param_types[i] = e.cached_ptr;
|
||||
call_args[i] = e.resolveRef(op.operand);
|
||||
i += 1;
|
||||
}
|
||||
for (a.operands) |op| {
|
||||
if (op.role != .input) continue;
|
||||
const raw_ty = e.argIRTypeOrFail(op.operand);
|
||||
@@ -889,6 +904,21 @@ pub const Ops = struct {
|
||||
const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
|
||||
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_args), label);
|
||||
|
||||
// Indirect (`=*m`) output args are opaque pointers — LLVM (opaque-pointer
|
||||
// era) requires an `elementtype(T)` attribute naming the pointee on each.
|
||||
// They occupy arg slots 0..n_indirect-1 (call-site attr index is 1-based).
|
||||
if (n_indirect != 0) {
|
||||
const et_kind = c.LLVMGetEnumAttributeKindForName("elementtype", 11);
|
||||
var j: usize = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role != .out_place or !asmIsIndirect(e, op)) continue;
|
||||
const et_attr = c.LLVMCreateTypeAttribute(e.context, et_kind, e.toLLVMType(op.out_ty));
|
||||
const idx: c.LLVMAttributeIndex = @bitCast(@as(i32, @intCast(j + 1)));
|
||||
c.LLVMAddCallSiteAttribute(raw_result, idx, et_attr);
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Fast path — no write-through outputs: every output is a value output,
|
||||
// so the asm's return (void / scalar / `{T…}` struct) IS the sx result
|
||||
// (the struct already matches sx's tuple representation). No split.
|
||||
@@ -909,6 +939,9 @@ pub const Ops = struct {
|
||||
var slot: c_uint = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) continue;
|
||||
// Indirect (`=*m`) outputs have no return slot — the asm already
|
||||
// wrote through their pointer arg. Skip (no extract, no store-back).
|
||||
if (asmIsIndirect(e, op)) continue;
|
||||
const v = if (n_out == 1) raw_result else c.LLVMBuildExtractValue(e.builder, raw_result, slot, "asm.out");
|
||||
slot += 1;
|
||||
if (op.role == .out_place) {
|
||||
@@ -962,6 +995,14 @@ pub const Ops = struct {
|
||||
return s.len > 0 and s[0] == '+';
|
||||
}
|
||||
|
||||
/// True if `op` is an indirect-memory (`=*m`) place output — its constraint
|
||||
/// contains `*`. The place address is passed as an opaque pointer arg (with
|
||||
/// an `elementtype` attribute) and the asm writes through it; no return slot.
|
||||
fn asmIsIndirect(e: *LLVMEmitter, op: InlineAsm.AsmOperand) bool {
|
||||
const s = e.ir_mod.types.getString(op.constraint);
|
||||
return std.mem.indexOfScalar(u8, s, '*') != null;
|
||||
}
|
||||
|
||||
/// The positional index of a named operand in the LLVM operand list
|
||||
/// (outputs first, then inputs) — the `N` in `%[name]` → `${N}`. Lowering
|
||||
/// guarantees every `%[name]` names an operand, so callers can assume a hit.
|
||||
|
||||
@@ -2369,14 +2369,10 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
|
||||
.input => operand_ref = self.lowerExpr(op.payload),
|
||||
.out_value => out_ty = self.resolveTypeWithBindings(op.payload),
|
||||
.out_place => {
|
||||
// Indirect-memory (`*`) place outputs aren't implemented yet —
|
||||
// reject loudly rather than miscompile (§II.11). Read-write (`+`)
|
||||
// outputs ARE implemented (emit ties an input to the output and
|
||||
// seeds it with the place's loaded value; see `emitInlineAsm`).
|
||||
if (std.mem.indexOfScalar(u8, op.constraint, '*') != null) {
|
||||
diags.addFmt(.err, span, "indirect-memory (`*`) asm outputs are not yet implemented", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
// Read-write (`+`) outputs tie an input to the output and seed
|
||||
// it with the place's loaded value; indirect-memory (`=*m`)
|
||||
// outputs pass the place address as a pointer arg and the asm
|
||||
// writes through it — both handled in `emitInlineAsm`.
|
||||
// `@place` lowers to its address (a pointer); the asm result is
|
||||
// stored through it. The stored type is the pointee.
|
||||
operand_ref = self.lowerExpr(op.payload);
|
||||
|
||||
Reference in New Issue
Block a user