feat(asm): Phase 2 — -> @place write-through outputs

An asm result can be STORED through a place (a local / struct field) instead of returned; the place output does not join the result tuple. - parser.zig: `-> @place` parses `@place` as an ordinary address-of expression → an out_place operand (the in-function form; reuses the existing `@` prefix). - inst.zig: AsmOperand gains out_ty (the output slot's value type) so emit can build the combined return struct without re-deriving from Inst.ty. - lower/expr.zig: out_place operand = the lowered @place address, out_ty = the pointee. Read-write (`+`) and indirect-memory (`*`) constraints rejected loudly (not yet implemented) rather than miscompiled. - ops.zig emitInlineAsm: the LLVM return type is built from ALL outputs (out_value + out_place); after the call, out_place slots are stored through their address and out_value slots rebuild the sx result. Fast path when there are no place outputs (the struct return IS the result — pure-value asm IR unchanged). Verified: write-to-local (42), struct field, mixed value+place (v=10 b=20), `+` rejected. Locked with 1649-platform-asm-place-output (mixed, runs on aarch64). zig build test green (657 corpus, 446 unit).
2026-06-15 22:47:34 +03:00
parent b8800a234c
commit 967005621a
11 changed files with 198 additions and 24 deletions
--- a/src/backend/llvm/ops.zig
+++ b/src/backend/llvm/ops.zig
@@ -789,8 +789,22 @@ pub const Ops = struct {
            if (op.role == .input) n_inputs += 1;
        }

-        // Result LLVM type: void (no value output) or the single scalar.
-        const ret_ty = if (instruction.ty == .void) e.cached_void else e.toLLVMType(instruction.ty);
+        // Combined LLVM return type: ALL outputs (out_value + out_place) in
+        // source order, each as its `out_ty`. out_place outputs come back in a
+        // return slot too — they get `store`d through their address below; only
+        // out_value outputs join the sx result. 0 → void, 1 → scalar, N → struct.
+        var out_llvm: std.ArrayList(c.LLVMTypeRef) = .empty;
+        defer out_llvm.deinit(alloc);
+        for (a.operands) |op| {
+            if (op.role == .input) continue;
+            out_llvm.append(alloc, e.toLLVMType(op.out_ty)) catch unreachable;
+        }
+        const n_out = out_llvm.items.len;
+        const ret_ty: c.LLVMTypeRef = switch (n_out) {
+            0 => e.cached_void,
+            1 => out_llvm.items[0],
+            else => c.LLVMStructTypeInContext(e.context, out_llvm.items.ptr, @intCast(n_out), 0),
+        };

        // One LLVM call param per input operand, in source order.
        const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
@@ -838,8 +852,48 @@ pub const Ops = struct {
            c.LLVMInlineAsmDialectATT,
            0, // CanThrow
        );
-        const label: [*:0]const u8 = if (instruction.ty == .void) "" else "asm";
-        const result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
+        const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
+        const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
+
+        // Fast path — no write-through outputs: every output is a value output,
+        // so the asm's return (void / scalar / `{T…}` struct) IS the sx result
+        // (the struct already matches sx's tuple representation). No split.
+        var has_place = false;
+        for (a.operands) |op| {
+            if (op.role == .out_place) has_place = true;
+        }
+        if (!has_place) {
+            e.mapRef(raw_result);
+            return;
+        }
+
+        // ── Mixed/place outputs (source order): out_place → `store` the slot
+        // through its address; out_value → collect, then rebuild the sx result
+        // (0 → void/place-only call · 1 → that value · N → tuple `insertvalue`). ──
+        var value_vals: std.ArrayList(c.LLVMValueRef) = .empty;
+        defer value_vals.deinit(alloc);
+        var slot: c_uint = 0;
+        for (a.operands) |op| {
+            if (op.role == .input) continue;
+            const v = if (n_out == 1) raw_result else c.LLVMBuildExtractValue(e.builder, raw_result, slot, "asm.out");
+            slot += 1;
+            if (op.role == .out_place) {
+                _ = c.LLVMBuildStore(e.builder, v, e.resolveRef(op.operand));
+            } else {
+                value_vals.append(alloc, v) catch unreachable;
+            }
+        }
+
+        const result: c.LLVMValueRef = blk: {
+            if (value_vals.items.len == 0) break :blk raw_result;
+            if (value_vals.items.len == 1) break :blk value_vals.items[0];
+            const tuple_ty = e.toLLVMType(instruction.ty);
+            var agg = c.LLVMGetUndef(tuple_ty);
+            for (value_vals.items, 0..) |v, j| {
+                agg = c.LLVMBuildInsertValue(e.builder, agg, v, @intCast(j), "asm.tup");
+            }
+            break :blk agg;
+        };
        // Always mapRef — the IR Ref counter advances regardless of result type.
        e.mapRef(result);
    }