fix(0109): hoist all per-instruction allocas to the function entry block
An alloca built at its use site re-executes on every pass through that block, and LLVM reclaims allocas only at ret — so loop-body locals, nested-loop index slots, and emitter spill temps (ig.tmp, sret slots, ABI coercion temps, byval materialization) grew the stack per iteration and long loops segfaulted on stack exhaustion. New LLVMEmitter.buildEntryAlloca inserts after existing entry-block allocas and restores the builder position; every LLVMBuildAlloca site reachable during instruction emission now routes through it. Initialization stores stay at the use site (per-iteration re-init is unchanged), and entry slots become mem2reg-promotable. The 35 .ir snapshot diffs are pure alloca position moves (type multisets verified identical per file). Regression: examples/0047-basic-loop-local-stack-reuse.sx (segfaulted pre-fix on both the 1M-iteration body-local loop and the 3M-iteration nested loop).
This commit is contained in:
@@ -116,7 +116,7 @@ pub const AbiLowering = struct {
|
||||
}
|
||||
|
||||
pub fn materializeByvalArg(self: AbiLowering, val: c.LLVMValueRef, struct_ty: c.LLVMTypeRef) c.LLVMValueRef {
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, struct_ty, "byval.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(struct_ty, "byval.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, val, tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
@@ -326,7 +326,7 @@ pub const Ops = struct {
|
||||
// ── Memory ────────────────────────────────────────────
|
||||
pub fn emitAlloca(self: Ops, elem_ty: TypeId) void {
|
||||
const llvm_ty = self.e.toLLVMType(elem_ty);
|
||||
const result = c.LLVMBuildAlloca(self.e.builder, llvm_ty, "alloca");
|
||||
const result = self.e.buildEntryAlloca(llvm_ty, "alloca");
|
||||
self.e.mapRef(result);
|
||||
}
|
||||
|
||||
@@ -503,7 +503,7 @@ pub const Ops = struct {
|
||||
|
||||
var sret_slot: c.LLVMValueRef = null;
|
||||
if (uses_sret) {
|
||||
sret_slot = c.LLVMBuildAlloca(self.e.builder, raw_ret_ty, "objc.sret");
|
||||
sret_slot = self.e.buildEntryAlloca(raw_ret_ty, "objc.sret");
|
||||
param_types[0] = self.e.cached_ptr;
|
||||
call_args[0] = sret_slot;
|
||||
}
|
||||
@@ -710,7 +710,7 @@ pub const Ops = struct {
|
||||
// outer phi shape. Instead, return both via tuple
|
||||
// through an auxiliary local — simplest is to attach
|
||||
// `cls` to a per-invocation slot. Use a stack alloca.
|
||||
const cls_slot = c.LLVMBuildAlloca(self.e.builder, self.e.cached_ptr, "jni.parent.cls.slot");
|
||||
const cls_slot = self.e.buildEntryAlloca(self.e.cached_ptr, "jni.parent.cls.slot");
|
||||
_ = c.LLVMBuildStore(self.e.builder, cls, cls_slot);
|
||||
// Tag the slot pointer onto the phi result via the
|
||||
// generated metadata: we'll re-extract by re-running
|
||||
@@ -814,7 +814,7 @@ pub const Ops = struct {
|
||||
defer self.e.alloc.free(args);
|
||||
var sret_slot: c.LLVMValueRef = null;
|
||||
if (callee_uses_sret) {
|
||||
sret_slot = c.LLVMBuildAlloca(self.e.builder, callee_raw_ret, "sret.slot");
|
||||
sret_slot = self.e.buildEntryAlloca(callee_raw_ret, "sret.slot");
|
||||
args[0] = sret_slot;
|
||||
}
|
||||
for (call_op.args, 0..) |arg_ref, j| {
|
||||
@@ -1345,7 +1345,7 @@ pub const Ops = struct {
|
||||
const payload_val = self.e.resolveRef(ei.payload);
|
||||
|
||||
// alloca union, store tag, bitcast payload area, store payload
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, union_ty, "ei.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(union_ty, "ei.tmp");
|
||||
// Store tag at field 0
|
||||
const tag_ptr = c.LLVMBuildStructGEP2(self.e.builder, union_ty, tmp, 0, "ei.tagp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, tag_val, tag_ptr);
|
||||
@@ -1389,7 +1389,7 @@ pub const Ops = struct {
|
||||
const base_kind = c.LLVMGetTypeKind(base_ty);
|
||||
if (base_kind == c.LLVMStructTypeKind) {
|
||||
// Tagged union: alloca, store, GEP field 1 (payload area), bitcast, load
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ep.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "ep.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
const payload_ptr = c.LLVMBuildStructGEP2(self.e.builder, base_ty, tmp, 1, "ep.pp");
|
||||
const typed_ptr = c.LLVMBuildBitCast(self.e.builder, payload_ptr, self.e.cached_ptr, "ep.cast");
|
||||
@@ -1408,13 +1408,13 @@ pub const Ops = struct {
|
||||
const kind = c.LLVMGetTypeKind(base_ty);
|
||||
if (kind == c.LLVMStructTypeKind) {
|
||||
// Tagged union { header, payload_bytes } — access payload at field 1
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ug.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "ug.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
const payload_ptr = c.LLVMBuildStructGEP2(self.e.builder, base_ty, tmp, 1, "ug.pp");
|
||||
self.e.mapRef(c.LLVMBuildLoad2(self.e.builder, result_ty, payload_ptr, "ug.val"));
|
||||
} else {
|
||||
// Untagged union [N x i8] — alloca, store, reinterpret-load
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ug.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "ug.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
self.e.mapRef(c.LLVMBuildLoad2(self.e.builder, result_ty, tmp, "ug.val"));
|
||||
}
|
||||
@@ -1455,7 +1455,7 @@ pub const Ops = struct {
|
||||
self.e.mapRef(c.LLVMBuildExtractElement(self.e.builder, base, idx32, "ve"));
|
||||
} else if (kind == c.LLVMArrayTypeKind) {
|
||||
// Fixed-size array value — alloca, store, GEP, load
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ig.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "ig.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
const elem_ty = self.e.toLLVMType(instruction.ty);
|
||||
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), idx };
|
||||
@@ -1487,7 +1487,7 @@ pub const Ops = struct {
|
||||
const kind = c.LLVMGetTypeKind(base_ty);
|
||||
if (kind == c.LLVMArrayTypeKind) {
|
||||
// Fixed-size array value — alloca, store, GEP
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "igp.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "igp.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), idx };
|
||||
self.e.mapRef(c.LLVMBuildGEP2(self.e.builder, base_ty, tmp, &indices, 2, "igp.ptr"));
|
||||
@@ -1587,7 +1587,7 @@ pub const Ops = struct {
|
||||
self.e.mapRef(result);
|
||||
} else if (base_kind == c.LLVMArrayTypeKind) {
|
||||
// Array: alloca, GEP to element at lo, compute len
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ss.arr");
|
||||
const tmp = self.e.buildEntryAlloca(base_ty, "ss.arr");
|
||||
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
|
||||
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), lo };
|
||||
const new_ptr = c.LLVMBuildGEP2(self.e.builder, base_ty, tmp, &indices, 2, "ss.ptr");
|
||||
@@ -1611,7 +1611,7 @@ pub const Ops = struct {
|
||||
const arr_kind = c.LLVMGetTypeKind(arr_ty);
|
||||
if (arr_kind == c.LLVMArrayTypeKind) {
|
||||
const len = c.LLVMGetArrayLength2(arr_ty);
|
||||
const tmp = c.LLVMBuildAlloca(self.e.builder, arr_ty, "a2s.tmp");
|
||||
const tmp = self.e.buildEntryAlloca(arr_ty, "a2s.tmp");
|
||||
_ = c.LLVMBuildStore(self.e.builder, arr, tmp);
|
||||
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), c.LLVMConstInt(self.e.cached_i64, 0, 0) };
|
||||
const elem_ptr = c.LLVMBuildGEP2(self.e.builder, arr_ty, tmp, &indices, 2, "a2s.ptr");
|
||||
|
||||
@@ -1386,6 +1386,33 @@ pub const LLVMEmitter = struct {
|
||||
self.debugInfo().endFunctionDebug();
|
||||
}
|
||||
|
||||
/// Build an alloca in the current function's ENTRY block, not at the
|
||||
/// builder's position. An alloca executed inside a loop body allocates
|
||||
/// fresh stack on every iteration (LLVM only reclaims at `ret`), so any
|
||||
/// alloca reachable per-instruction must be hoisted here; only entry-block
|
||||
/// allocas are static frame slots (and mem2reg-promotable). Insertion goes
|
||||
/// after existing entry allocas; the builder position is restored.
|
||||
pub fn buildEntryAlloca(self: *LLVMEmitter, ty: c.LLVMTypeRef, name: [*:0]const u8) c.LLVMValueRef {
|
||||
const cur_bb = c.LLVMGetInsertBlock(self.builder);
|
||||
const func = c.LLVMGetBasicBlockParent(cur_bb);
|
||||
const entry_bb = c.LLVMGetEntryBasicBlock(func);
|
||||
if (entry_bb == cur_bb) {
|
||||
return c.LLVMBuildAlloca(self.builder, ty, name);
|
||||
}
|
||||
var insert_before = c.LLVMGetFirstInstruction(entry_bb);
|
||||
while (insert_before != null) : (insert_before = c.LLVMGetNextInstruction(insert_before)) {
|
||||
if (c.LLVMGetInstructionOpcode(insert_before) != c.LLVMAlloca) break;
|
||||
}
|
||||
if (insert_before != null) {
|
||||
c.LLVMPositionBuilderBefore(self.builder, insert_before);
|
||||
} else {
|
||||
c.LLVMPositionBuilderAtEnd(self.builder, entry_bb);
|
||||
}
|
||||
const result = c.LLVMBuildAlloca(self.builder, ty, name);
|
||||
c.LLVMPositionBuilderAtEnd(self.builder, cur_bb);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// After emitting all blocks, fill in PHI incoming values from branch args.
|
||||
fn fixupPhiNodes(self: *LLVMEmitter, func: *const Function, func_idx: u32) void {
|
||||
if (self.pending_phis.items.len == 0) return;
|
||||
@@ -2101,7 +2128,7 @@ pub const LLVMEmitter = struct {
|
||||
}
|
||||
// Struct/Array/Vector types: store to alloca, ptrtoint for the pointer
|
||||
if (kind == c.LLVMStructTypeKind or kind == c.LLVMArrayTypeKind or kind == c.LLVMVectorTypeKind or kind == c.LLVMScalableVectorTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, ty, "ba.tmp");
|
||||
const tmp = self.buildEntryAlloca(ty, "ba.tmp");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return c.LLVMBuildPtrToInt(self.builder, tmp, self.cached_i64, "ba.p2i");
|
||||
}
|
||||
@@ -2279,14 +2306,14 @@ pub const LLVMEmitter = struct {
|
||||
}
|
||||
// Struct → Integer (C ABI coercion: store struct to memory, load as integer)
|
||||
if (val_kind == c.LLVMStructTypeKind and param_kind == c.LLVMIntegerTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, param_ty, "abi.tmp");
|
||||
const tmp = self.buildEntryAlloca(param_ty, "abi.tmp");
|
||||
_ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(param_ty), tmp);
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.coerce");
|
||||
}
|
||||
// Integer → Struct (C ABI return coercion: store integer to memory, load as struct)
|
||||
if (val_kind == c.LLVMIntegerTypeKind and param_kind == c.LLVMStructTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, val_ty, "abi.ret.tmp");
|
||||
const tmp = self.buildEntryAlloca(val_ty, "abi.ret.tmp");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.ret.coerce");
|
||||
}
|
||||
@@ -2295,19 +2322,19 @@ pub const LLVMEmitter = struct {
|
||||
// memory-bitcast pattern as the integer case; the array type carries
|
||||
// 16 bytes of storage so we alloca with param_ty to guarantee size.
|
||||
if (val_kind == c.LLVMStructTypeKind and param_kind == c.LLVMArrayTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, param_ty, "abi.struct2arr");
|
||||
const tmp = self.buildEntryAlloca(param_ty, "abi.struct2arr");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.coerce.arr");
|
||||
}
|
||||
// Array → Struct (return-side counterpart for 9..16-byte structs)
|
||||
if (val_kind == c.LLVMArrayTypeKind and param_kind == c.LLVMStructTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, val_ty, "abi.arr2struct");
|
||||
const tmp = self.buildEntryAlloca(val_ty, "abi.arr2struct");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.ret.coerce.arr");
|
||||
}
|
||||
// Array → Ptr (array decay: alloca + GEP to first element)
|
||||
if (val_kind == c.LLVMArrayTypeKind and param_kind == c.LLVMPointerTypeKind) {
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, val_ty, "ca.arr");
|
||||
const tmp = self.buildEntryAlloca(val_ty, "ca.arr");
|
||||
_ = c.LLVMBuildStore(self.builder, val, tmp);
|
||||
const zero = c.LLVMConstInt(self.cached_i64, 0, 0);
|
||||
var indices = [_]c.LLVMValueRef{ zero, zero };
|
||||
@@ -2720,7 +2747,7 @@ pub const LLVMEmitter = struct {
|
||||
field_val = c.LLVMConstInt(self.cached_i64, 0, 0);
|
||||
} else {
|
||||
const base_ty = c.LLVMTypeOf(base_val);
|
||||
const tmp = c.LLVMBuildAlloca(self.builder, base_ty, "fv.utmp");
|
||||
const tmp = self.buildEntryAlloca(base_ty, "fv.utmp");
|
||||
_ = c.LLVMBuildStore(self.builder, base_val, tmp);
|
||||
const payload_ptr = c.LLVMBuildStructGEP2(self.builder, base_ty, tmp, 1, "fv.pp");
|
||||
const field_llvm_ty = self.toLLVMType(field.ty);
|
||||
|
||||
Reference in New Issue
Block a user