fix(0109): hoist all per-instruction allocas to the function entry block

An alloca built at its use site re-executes on every pass through that
block, and LLVM reclaims allocas only at ret — so loop-body locals,
nested-loop index slots, and emitter spill temps (ig.tmp, sret slots, ABI
coercion temps, byval materialization) grew the stack per iteration and
long loops segfaulted on stack exhaustion.

New LLVMEmitter.buildEntryAlloca inserts after existing entry-block
allocas and restores the builder position; every LLVMBuildAlloca site
reachable during instruction emission now routes through it.
Initialization stores stay at the use site (per-iteration re-init is
unchanged), and entry slots become mem2reg-promotable. The 35 .ir
snapshot diffs are pure alloca position moves (type multisets verified
identical per file).

Regression: examples/0047-basic-loop-local-stack-reuse.sx (segfaulted
pre-fix on both the 1M-iteration body-local loop and the 3M-iteration
nested loop).
This commit is contained in:
agra
2026-06-10 17:27:11 +03:00
parent e81780e32e
commit 878c4226a6
43 changed files with 1661 additions and 1468 deletions

View File

@@ -326,7 +326,7 @@ pub const Ops = struct {
// ── Memory ────────────────────────────────────────────
pub fn emitAlloca(self: Ops, elem_ty: TypeId) void {
const llvm_ty = self.e.toLLVMType(elem_ty);
const result = c.LLVMBuildAlloca(self.e.builder, llvm_ty, "alloca");
const result = self.e.buildEntryAlloca(llvm_ty, "alloca");
self.e.mapRef(result);
}
@@ -503,7 +503,7 @@ pub const Ops = struct {
var sret_slot: c.LLVMValueRef = null;
if (uses_sret) {
sret_slot = c.LLVMBuildAlloca(self.e.builder, raw_ret_ty, "objc.sret");
sret_slot = self.e.buildEntryAlloca(raw_ret_ty, "objc.sret");
param_types[0] = self.e.cached_ptr;
call_args[0] = sret_slot;
}
@@ -710,7 +710,7 @@ pub const Ops = struct {
// outer phi shape. Instead, return both via tuple
// through an auxiliary local — simplest is to attach
// `cls` to a per-invocation slot. Use a stack alloca.
const cls_slot = c.LLVMBuildAlloca(self.e.builder, self.e.cached_ptr, "jni.parent.cls.slot");
const cls_slot = self.e.buildEntryAlloca(self.e.cached_ptr, "jni.parent.cls.slot");
_ = c.LLVMBuildStore(self.e.builder, cls, cls_slot);
// Tag the slot pointer onto the phi result via the
// generated metadata: we'll re-extract by re-running
@@ -814,7 +814,7 @@ pub const Ops = struct {
defer self.e.alloc.free(args);
var sret_slot: c.LLVMValueRef = null;
if (callee_uses_sret) {
sret_slot = c.LLVMBuildAlloca(self.e.builder, callee_raw_ret, "sret.slot");
sret_slot = self.e.buildEntryAlloca(callee_raw_ret, "sret.slot");
args[0] = sret_slot;
}
for (call_op.args, 0..) |arg_ref, j| {
@@ -1345,7 +1345,7 @@ pub const Ops = struct {
const payload_val = self.e.resolveRef(ei.payload);
// alloca union, store tag, bitcast payload area, store payload
const tmp = c.LLVMBuildAlloca(self.e.builder, union_ty, "ei.tmp");
const tmp = self.e.buildEntryAlloca(union_ty, "ei.tmp");
// Store tag at field 0
const tag_ptr = c.LLVMBuildStructGEP2(self.e.builder, union_ty, tmp, 0, "ei.tagp");
_ = c.LLVMBuildStore(self.e.builder, tag_val, tag_ptr);
@@ -1389,7 +1389,7 @@ pub const Ops = struct {
const base_kind = c.LLVMGetTypeKind(base_ty);
if (base_kind == c.LLVMStructTypeKind) {
// Tagged union: alloca, store, GEP field 1 (payload area), bitcast, load
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ep.tmp");
const tmp = self.e.buildEntryAlloca(base_ty, "ep.tmp");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
const payload_ptr = c.LLVMBuildStructGEP2(self.e.builder, base_ty, tmp, 1, "ep.pp");
const typed_ptr = c.LLVMBuildBitCast(self.e.builder, payload_ptr, self.e.cached_ptr, "ep.cast");
@@ -1408,13 +1408,13 @@ pub const Ops = struct {
const kind = c.LLVMGetTypeKind(base_ty);
if (kind == c.LLVMStructTypeKind) {
// Tagged union { header, payload_bytes } — access payload at field 1
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ug.tmp");
const tmp = self.e.buildEntryAlloca(base_ty, "ug.tmp");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
const payload_ptr = c.LLVMBuildStructGEP2(self.e.builder, base_ty, tmp, 1, "ug.pp");
self.e.mapRef(c.LLVMBuildLoad2(self.e.builder, result_ty, payload_ptr, "ug.val"));
} else {
// Untagged union [N x i8] — alloca, store, reinterpret-load
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ug.tmp");
const tmp = self.e.buildEntryAlloca(base_ty, "ug.tmp");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
self.e.mapRef(c.LLVMBuildLoad2(self.e.builder, result_ty, tmp, "ug.val"));
}
@@ -1455,7 +1455,7 @@ pub const Ops = struct {
self.e.mapRef(c.LLVMBuildExtractElement(self.e.builder, base, idx32, "ve"));
} else if (kind == c.LLVMArrayTypeKind) {
// Fixed-size array value — alloca, store, GEP, load
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ig.tmp");
const tmp = self.e.buildEntryAlloca(base_ty, "ig.tmp");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
const elem_ty = self.e.toLLVMType(instruction.ty);
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), idx };
@@ -1487,7 +1487,7 @@ pub const Ops = struct {
const kind = c.LLVMGetTypeKind(base_ty);
if (kind == c.LLVMArrayTypeKind) {
// Fixed-size array value — alloca, store, GEP
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "igp.tmp");
const tmp = self.e.buildEntryAlloca(base_ty, "igp.tmp");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), idx };
self.e.mapRef(c.LLVMBuildGEP2(self.e.builder, base_ty, tmp, &indices, 2, "igp.ptr"));
@@ -1587,7 +1587,7 @@ pub const Ops = struct {
self.e.mapRef(result);
} else if (base_kind == c.LLVMArrayTypeKind) {
// Array: alloca, GEP to element at lo, compute len
const tmp = c.LLVMBuildAlloca(self.e.builder, base_ty, "ss.arr");
const tmp = self.e.buildEntryAlloca(base_ty, "ss.arr");
_ = c.LLVMBuildStore(self.e.builder, base, tmp);
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), lo };
const new_ptr = c.LLVMBuildGEP2(self.e.builder, base_ty, tmp, &indices, 2, "ss.ptr");
@@ -1611,7 +1611,7 @@ pub const Ops = struct {
const arr_kind = c.LLVMGetTypeKind(arr_ty);
if (arr_kind == c.LLVMArrayTypeKind) {
const len = c.LLVMGetArrayLength2(arr_ty);
const tmp = c.LLVMBuildAlloca(self.e.builder, arr_ty, "a2s.tmp");
const tmp = self.e.buildEntryAlloca(arr_ty, "a2s.tmp");
_ = c.LLVMBuildStore(self.e.builder, arr, tmp);
var indices = [_]c.LLVMValueRef{ c.LLVMConstInt(self.e.cached_i64, 0, 0), c.LLVMConstInt(self.e.cached_i64, 0, 0) };
const elem_ptr = c.LLVMBuildGEP2(self.e.builder, arr_ty, tmp, &indices, 2, "a2s.ptr");