feat(asm): Phase 2 — -> @place write-through outputs
An asm result can be STORED through a place (a local / struct field) instead of returned; the place output does not join the result tuple. - parser.zig: `-> @place` parses `@place` as an ordinary address-of expression → an out_place operand (the in-function form; reuses the existing `@` prefix). - inst.zig: AsmOperand gains out_ty (the output slot's value type) so emit can build the combined return struct without re-deriving from Inst.ty. - lower/expr.zig: out_place operand = the lowered @place address, out_ty = the pointee. Read-write (`+`) and indirect-memory (`*`) constraints rejected loudly (not yet implemented) rather than miscompiled. - ops.zig emitInlineAsm: the LLVM return type is built from ALL outputs (out_value + out_place); after the call, out_place slots are stored through their address and out_value slots rebuild the sx result. Fast path when there are no place outputs (the struct return IS the result — pure-value asm IR unchanged). Verified: write-to-local (42), struct field, mixed value+place (v=10 b=20), `+` rejected. Locked with 1649-platform-asm-place-output (mixed, runs on aarch64). zig build test green (657 corpus, 446 unit).
This commit is contained in:
@@ -789,8 +789,22 @@ pub const Ops = struct {
|
||||
if (op.role == .input) n_inputs += 1;
|
||||
}
|
||||
|
||||
// Result LLVM type: void (no value output) or the single scalar.
|
||||
const ret_ty = if (instruction.ty == .void) e.cached_void else e.toLLVMType(instruction.ty);
|
||||
// Combined LLVM return type: ALL outputs (out_value + out_place) in
|
||||
// source order, each as its `out_ty`. out_place outputs come back in a
|
||||
// return slot too — they get `store`d through their address below; only
|
||||
// out_value outputs join the sx result. 0 → void, 1 → scalar, N → struct.
|
||||
var out_llvm: std.ArrayList(c.LLVMTypeRef) = .empty;
|
||||
defer out_llvm.deinit(alloc);
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) continue;
|
||||
out_llvm.append(alloc, e.toLLVMType(op.out_ty)) catch unreachable;
|
||||
}
|
||||
const n_out = out_llvm.items.len;
|
||||
const ret_ty: c.LLVMTypeRef = switch (n_out) {
|
||||
0 => e.cached_void,
|
||||
1 => out_llvm.items[0],
|
||||
else => c.LLVMStructTypeInContext(e.context, out_llvm.items.ptr, @intCast(n_out), 0),
|
||||
};
|
||||
|
||||
// One LLVM call param per input operand, in source order.
|
||||
const param_types = alloc.alloc(c.LLVMTypeRef, n_inputs) catch unreachable;
|
||||
@@ -838,8 +852,48 @@ pub const Ops = struct {
|
||||
c.LLVMInlineAsmDialectATT,
|
||||
0, // CanThrow
|
||||
);
|
||||
const label: [*:0]const u8 = if (instruction.ty == .void) "" else "asm";
|
||||
const result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
|
||||
const label: [*:0]const u8 = if (n_out == 0) "" else "asm";
|
||||
const raw_result = c.LLVMBuildCall2(e.builder, fn_ty, asm_val, call_args.ptr, @intCast(n_inputs), label);
|
||||
|
||||
// Fast path — no write-through outputs: every output is a value output,
|
||||
// so the asm's return (void / scalar / `{T…}` struct) IS the sx result
|
||||
// (the struct already matches sx's tuple representation). No split.
|
||||
var has_place = false;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .out_place) has_place = true;
|
||||
}
|
||||
if (!has_place) {
|
||||
e.mapRef(raw_result);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Mixed/place outputs (source order): out_place → `store` the slot
|
||||
// through its address; out_value → collect, then rebuild the sx result
|
||||
// (0 → void/place-only call · 1 → that value · N → tuple `insertvalue`). ──
|
||||
var value_vals: std.ArrayList(c.LLVMValueRef) = .empty;
|
||||
defer value_vals.deinit(alloc);
|
||||
var slot: c_uint = 0;
|
||||
for (a.operands) |op| {
|
||||
if (op.role == .input) continue;
|
||||
const v = if (n_out == 1) raw_result else c.LLVMBuildExtractValue(e.builder, raw_result, slot, "asm.out");
|
||||
slot += 1;
|
||||
if (op.role == .out_place) {
|
||||
_ = c.LLVMBuildStore(e.builder, v, e.resolveRef(op.operand));
|
||||
} else {
|
||||
value_vals.append(alloc, v) catch unreachable;
|
||||
}
|
||||
}
|
||||
|
||||
const result: c.LLVMValueRef = blk: {
|
||||
if (value_vals.items.len == 0) break :blk raw_result;
|
||||
if (value_vals.items.len == 1) break :blk value_vals.items[0];
|
||||
const tuple_ty = e.toLLVMType(instruction.ty);
|
||||
var agg = c.LLVMGetUndef(tuple_ty);
|
||||
for (value_vals.items, 0..) |v, j| {
|
||||
agg = c.LLVMBuildInsertValue(e.builder, agg, v, @intCast(j), "asm.tup");
|
||||
}
|
||||
break :blk agg;
|
||||
};
|
||||
// Always mapRef — the IR Ref counter advances regardless of result type.
|
||||
e.mapRef(result);
|
||||
}
|
||||
|
||||
@@ -368,8 +368,15 @@ pub const InlineAsm = struct {
|
||||
name: StringId,
|
||||
/// Verbatim constraint, e.g. "={rax}", "=r", "+r", "{rdi}", "r".
|
||||
constraint: StringId,
|
||||
/// `input` → the value `Ref`; `out_value` → `.none` (the asm yields it).
|
||||
/// `input` → the value `Ref`; `out_value` → `.none` (the asm yields it);
|
||||
/// `out_place` → the place ADDRESS `Ref` (a pointer; the asm result is
|
||||
/// `store`d through it).
|
||||
operand: Ref,
|
||||
/// The value type carried by an OUTPUT slot — `out_value`: its result
|
||||
/// type; `out_place`: the pointee type stored through `operand`. `.void`
|
||||
/// for inputs (their type comes from the input `Ref`). Lets emit build
|
||||
/// the combined LLVM return struct without re-deriving from `Inst.ty`.
|
||||
out_ty: TypeId = .void,
|
||||
|
||||
pub const Role = enum { out_value, out_place, input };
|
||||
};
|
||||
|
||||
@@ -2339,6 +2339,36 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
|
||||
// Effective name (design §II.5): explicit `[name]`, else auto-derived
|
||||
// from a `{reg}` pin, else anonymous (`.empty`).
|
||||
const eff_name: []const u8 = op.name orelse (pinnedRegister(op.constraint) orelse "");
|
||||
var operand_ref: Ref = Ref.none;
|
||||
var out_ty: TypeId = .void;
|
||||
switch (op.role) {
|
||||
.input => operand_ref = self.lowerExpr(op.payload),
|
||||
.out_value => out_ty = self.resolveTypeWithBindings(op.payload),
|
||||
.out_place => {
|
||||
// Read-write (`+`) and indirect-memory (`*`) place outputs aren't
|
||||
// implemented yet — reject loudly rather than miscompile (§II.11).
|
||||
if (op.constraint.len > 0 and op.constraint[0] == '+') {
|
||||
diags.addFmt(.err, span, "read-write (`+`) asm outputs are not yet implemented; use a write-only `=` output", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
if (std.mem.indexOfScalar(u8, op.constraint, '*') != null) {
|
||||
diags.addFmt(.err, span, "indirect-memory (`*`) asm outputs are not yet implemented", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
// `@place` lowers to its address (a pointer); the asm result is
|
||||
// stored through it. The stored type is the pointee.
|
||||
operand_ref = self.lowerExpr(op.payload);
|
||||
const pty = self.inferExprType(op.payload);
|
||||
out_ty = if (!pty.isBuiltin()) blk: {
|
||||
const info = self.module.types.get(pty);
|
||||
break :blk if (info == .pointer) info.pointer.pointee else .unresolved;
|
||||
} else .unresolved;
|
||||
if (out_ty == .unresolved) {
|
||||
diags.addFmt(.err, span, "asm `-> @place` output target must be an addressable place", .{});
|
||||
return self.emitPlaceholder("inline_asm");
|
||||
}
|
||||
},
|
||||
}
|
||||
ir_ops[i] = .{
|
||||
.role = switch (op.role) {
|
||||
.out_value => .out_value,
|
||||
@@ -2347,8 +2377,8 @@ pub fn lowerAsmExpr(self: *Lowering, ae: *const ast.AsmExpr, span: ast.Span) Ref
|
||||
},
|
||||
.name = if (eff_name.len == 0) types.StringId.empty else self.module.types.internString(eff_name),
|
||||
.constraint = self.module.types.internString(op.constraint),
|
||||
// input → the lowered value Ref; an output yields its value (none).
|
||||
.operand = if (op.role == .input) self.lowerExpr(op.payload) else Ref.none,
|
||||
.operand = operand_ref,
|
||||
.out_ty = out_ty,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -2780,10 +2780,17 @@ pub const Parser = struct {
|
||||
var payload: *Node = undefined;
|
||||
if (self.current.tag == .arrow) {
|
||||
self.advance();
|
||||
if (self.current.tag == .at)
|
||||
return self.fail("`-> @place` write-through asm outputs are not supported yet (Phase 2); use a `-> Type` value output");
|
||||
role = .out_value;
|
||||
payload = try self.parseTypeExpr();
|
||||
if (self.current.tag == .at) {
|
||||
// `-> @place`: write-through output. `@place` is parsed as an
|
||||
// ordinary address-of expression (a pointer); lowering stores
|
||||
// the asm result through it. The output does NOT join the
|
||||
// result tuple.
|
||||
role = .out_place;
|
||||
payload = try self.parseUnary();
|
||||
} else {
|
||||
role = .out_value;
|
||||
payload = try self.parseTypeExpr();
|
||||
}
|
||||
} else if (self.current.tag == .equal) {
|
||||
self.advance();
|
||||
role = .input;
|
||||
|
||||
Reference in New Issue
Block a user