This commit is contained in:
agra
2026-03-02 09:49:43 +02:00
parent f763765ea2
commit ba9c4d69ce
6 changed files with 460 additions and 103 deletions

View File

@@ -83,6 +83,9 @@ pub const LLVMEmitter = struct {
// Cached field name arrays for reflection (TypeId → LLVM global)
field_name_arrays: std.AutoHashMap(u32, c.LLVMValueRef),
// Target configuration (stored for ABI decisions during emission)
target_config: TargetConfig,
const PendingPhi = struct {
phi: c.LLVMValueRef,
block_id: BlockId, // the block this phi belongs to
@@ -154,6 +157,7 @@ pub const LLVMEmitter = struct {
.any_struct_type = null,
.closure_struct_type = null,
.field_name_arrays = std.AutoHashMap(u32, c.LLVMValueRef).init(alloc),
.target_config = target_config,
};
}
@@ -195,6 +199,9 @@ pub const LLVMEmitter = struct {
/// Compare IR typeSizeBytes against LLVMABISizeOfType for all user-defined types.
fn verifySizes(self: *LLVMEmitter) void {
// Skip for WASM: wasm32 has 4-byte pointers vs IR's assumed 8-byte,
// so struct sizes will differ. LLVM handles emission correctly.
if (self.target_config.isWasm()) return;
const dl = c.LLVMGetModuleDataLayout(self.llvm_module);
if (dl == null) return;
const type_count = self.ir_mod.types.infos.items.len;
@@ -290,6 +297,19 @@ pub const LLVMEmitter = struct {
fn declareFunction(self: *LLVMEmitter, func: *const Function, func_idx: u32) void {
const name = self.ir_mod.types.getString(func.name);
// Skip builtins that are declared via getOrDeclare* with correct C-compatible types.
// The IR lowering creates extern stubs with IR types (e.g. memset → void return),
// but the C ABI may differ (memset returns ptr). Let getOrDeclare* handle these.
if (func.is_extern and isBuiltinLibcName(name)) {
// Still register in func_map so call resolution works
const builtin_fn = self.getOrDeclareBuiltinByName(name);
if (builtin_fn) |bf| {
self.func_map.put(func_idx, bf) catch unreachable;
return;
}
}
const is_main = std.mem.eql(u8, name, "main");
// main always returns i32 at the LLVM level (JIT expects it)
@@ -663,7 +683,7 @@ pub const LLVMEmitter = struct {
},
.heap_alloc => |un| {
// malloc(size) → *void
const size = self.resolveRef(un.operand);
const size = self.coerceArg(self.resolveRef(un.operand), self.sizeType());
const malloc_fn = self.getOrDeclareMalloc();
var args = [_]c.LLVMValueRef{size};
const result = c.LLVMBuildCall2(
@@ -977,7 +997,7 @@ pub const LLVMEmitter = struct {
if (val_w > elt_w) {
field_val = c.LLVMBuildTrunc(self.builder, field_val, elt, "atrunc");
} else if (val_w < elt_w) {
field_val = c.LLVMBuildSExt(self.builder, field_val, elt, "aext");
field_val = c.LLVMBuildZExt(self.builder, field_val, elt, "aext");
}
}
}
@@ -1277,12 +1297,12 @@ pub const LLVMEmitter = struct {
const base = self.resolveRef(ss.base);
var lo = self.resolveRef(ss.lo);
var hi = self.resolveRef(ss.hi);
// Normalize lo/hi to i64 for consistent arithmetic
// Normalize lo/hi to i64 for consistent arithmetic (indices are unsigned)
if (c.LLVMTypeOf(lo) != self.cached_i64) {
lo = c.LLVMBuildSExt(self.builder, lo, self.cached_i64, "ss.lo64");
lo = c.LLVMBuildZExt(self.builder, lo, self.cached_i64, "ss.lo64");
}
if (c.LLVMTypeOf(hi) != self.cached_i64) {
hi = c.LLVMBuildSExt(self.builder, hi, self.cached_i64, "ss.hi64");
hi = c.LLVMBuildZExt(self.builder, hi, self.cached_i64, "ss.hi64");
}
const base_ty = c.LLVMTypeOf(base);
const base_kind = c.LLVMGetTypeKind(base_ty);
@@ -1354,7 +1374,7 @@ pub const LLVMEmitter = struct {
// Builtins that map to libc functions or LLVM intrinsics
switch (bi.builtin) {
.malloc => {
const size = self.resolveRef(bi.args[0]);
const size = self.coerceArg(self.resolveRef(bi.args[0]), self.sizeType());
const malloc_fn = self.getOrDeclareMalloc();
var args = [_]c.LLVMValueRef{size};
self.mapRef(c.LLVMBuildCall2(self.builder, self.getMallocType(), malloc_fn, &args, 1, "malloc"));
@@ -1369,7 +1389,7 @@ pub const LLVMEmitter = struct {
.memcpy => {
const dst = self.resolveRef(bi.args[0]);
const src = self.resolveRef(bi.args[1]);
const len = self.resolveRef(bi.args[2]);
const len = self.coerceArg(self.resolveRef(bi.args[2]), self.sizeType());
const memcpy_fn = self.getOrDeclareMemcpy();
var args = [_]c.LLVMValueRef{ dst, src, len };
_ = c.LLVMBuildCall2(self.builder, self.getMemcpyType(), memcpy_fn, &args, 3, "");
@@ -1378,11 +1398,9 @@ pub const LLVMEmitter = struct {
.memset => {
const dst = self.resolveRef(bi.args[0]);
var val = self.resolveRef(bi.args[1]);
const len = self.resolveRef(bi.args[2]);
// memset expects i32 for byte value — truncate if needed
if (c.LLVMTypeOf(val) != self.cached_i32) {
val = c.LLVMBuildTrunc(self.builder, val, self.cached_i32, "ms.trunc");
}
const len = self.coerceArg(self.resolveRef(bi.args[2]), self.sizeType());
// memset expects i32 for byte value — coerce width
val = self.coerceArg(val, self.cached_i32);
const memset_fn = self.getOrDeclareMemset();
var args = [_]c.LLVMValueRef{ dst, val, len };
_ = c.LLVMBuildCall2(self.builder, self.getMemsetType(), memset_fn, &args, 3, "");
@@ -1408,11 +1426,16 @@ pub const LLVMEmitter = struct {
const str_val = self.resolveRef(bi.args[0]);
const raw_ptr = c.LLVMBuildExtractValue(self.builder, str_val, 0, "str.ptr");
const str_len = c.LLVMBuildExtractValue(self.builder, str_val, 1, "str.len");
// On wasm32, count param is i32 (size_t)
const count = if (self.target_config.isWasm())
c.LLVMBuildTrunc(self.builder, str_len, self.cached_i32, "len.tr")
else
str_len;
const write_fn = self.getOrDeclareWrite();
var write_args = [_]c.LLVMValueRef{
c.LLVMConstInt(self.cached_i32, 1, 0), // fd = stdout
raw_ptr,
str_len,
count,
};
_ = c.LLVMBuildCall2(self.builder, self.getWriteType(), write_fn, &write_args, 3, "");
self.advanceRefCounter();
@@ -1579,15 +1602,26 @@ pub const LLVMEmitter = struct {
const a_ty = c.LLVMTypeOf(a);
const kind = c.LLVMGetTypeKind(a_ty);
if (kind == c.LLVMStructTypeKind) {
const has = c.LLVMBuildExtractValue(self.builder, a, 1, "oc.has");
const unwrapped = c.LLVMBuildExtractValue(self.builder, a, 0, "oc.val");
// Coerce b_val to match unwrapped type
const uw_ty = c.LLVMTypeOf(unwrapped);
const b_ty = c.LLVMTypeOf(b_val);
if (uw_ty != b_ty) {
b_val = self.coerceArg(b_val, uw_ty);
const n_fields = c.LLVMCountStructElementTypes(a_ty);
const f1_ty = if (n_fields >= 2) c.LLVMStructGetTypeAtIndex(a_ty, 1) else null;
const is_ti1 = if (f1_ty) |ft| c.LLVMGetTypeKind(ft) == c.LLVMIntegerTypeKind and c.LLVMGetIntTypeWidth(ft) == 1 else false;
if (is_ti1) {
// Standard optional {T, i1}: extract has_value and unwrap
const has = c.LLVMBuildExtractValue(self.builder, a, 1, "oc.has");
const unwrapped = c.LLVMBuildExtractValue(self.builder, a, 0, "oc.val");
const uw_ty = c.LLVMTypeOf(unwrapped);
const b_ty = c.LLVMTypeOf(b_val);
if (uw_ty != b_ty) {
b_val = self.coerceArg(b_val, uw_ty);
}
self.mapRef(c.LLVMBuildSelect(self.builder, has, unwrapped, b_val, "oc.sel"));
} else {
// ?Closure {fn_ptr, env}: check if fn_ptr is null
const fn_ptr = c.LLVMBuildExtractValue(self.builder, a, 0, "oc.fn");
const is_nonnull = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, fn_ptr, c.LLVMConstNull(c.LLVMTypeOf(fn_ptr)), "oc.nn");
// Select the full closure struct, not just the fn_ptr
self.mapRef(c.LLVMBuildSelect(self.builder, is_nonnull, a, b_val, "oc.sel"));
}
self.mapRef(c.LLVMBuildSelect(self.builder, has, unwrapped, b_val, "oc.sel"));
} else {
// ?*T — select on null
const is_nonnull = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, a, c.LLVMConstNull(a_ty), "oc.nn");
@@ -1927,8 +1961,14 @@ pub const LLVMEmitter = struct {
if (kind == c.LLVMIntegerTypeKind and rhs_kind == c.LLVMIntegerTypeKind) {
const lw = c.LLVMGetIntTypeWidth(lhs_ty);
const rw = c.LLVMGetIntTypeWidth(rhs_ty);
if (lw < rw) lhs = c.LLVMBuildZExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildZExt(self.builder, rhs, lhs_ty, "cmp.ext");
const is_unsigned = self.isRefUnsigned(bin.lhs) or self.isRefUnsigned(bin.rhs);
if (is_unsigned) {
if (lw < rw) lhs = c.LLVMBuildZExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildZExt(self.builder, rhs, lhs_ty, "cmp.ext");
} else {
if (lw < rw) lhs = c.LLVMBuildSExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildSExt(self.builder, rhs, lhs_ty, "cmp.ext");
}
}
// Pointer vs integer: coerce int to null pointer
if (kind == c.LLVMPointerTypeKind and rhs_kind == c.LLVMIntegerTypeKind) {
@@ -1981,7 +2021,6 @@ pub const LLVMEmitter = struct {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
const b = self.builder;
const i64_ty = c.LLVMInt64TypeInContext(self.context);
const i32_ty = c.LLVMInt32TypeInContext(self.context);
const i1_ty = c.LLVMInt1TypeInContext(self.context);
const ptr_ty = c.LLVMPointerTypeInContext(self.context, 0);
@@ -2005,12 +2044,14 @@ pub const LLVMEmitter = struct {
// memcmp block
c.LLVMPositionBuilderAtEnd(b, memcmp_bb);
const size_ty = self.sizeType();
const memcmp_fn = c.LLVMGetNamedFunction(self.llvm_module, "memcmp") orelse blk: {
var params = [_]c.LLVMTypeRef{ ptr_ty, ptr_ty, i64_ty };
var params = [_]c.LLVMTypeRef{ ptr_ty, ptr_ty, size_ty };
const fn_type = c.LLVMFunctionType(i32_ty, &params, 3, 0);
break :blk c.LLVMAddFunction(self.llvm_module, "memcmp", fn_type);
};
var args = [_]c.LLVMValueRef{ lhs_ptr, rhs_ptr, lhs_len };
const cmp_len = self.coerceArg(lhs_len, size_ty);
var args = [_]c.LLVMValueRef{ lhs_ptr, rhs_ptr, cmp_len };
const fn_ty = c.LLVMGlobalGetValueType(memcmp_fn);
const cmp_result = c.LLVMBuildCall2(b, fn_ty, memcmp_fn, &args, 3, "memcmp");
const content_eq = c.LLVMBuildICmp(b, c.LLVMIntEQ, cmp_result, c.LLVMConstInt(i32_ty, 0, 0), "str.ceq");
@@ -2090,9 +2131,14 @@ pub const LLVMEmitter = struct {
return c.LLVMAddFunction(self.llvm_module, "free", fn_ty);
}
/// Returns the LLVM type for C `size_t`: i32 on wasm32, i64 on 64-bit targets.
fn sizeType(self: *LLVMEmitter) c.LLVMTypeRef {
return if (self.target_config.isWasm()) self.cached_i32 else self.cached_i64;
}
fn getMallocType(self: *LLVMEmitter) c.LLVMTypeRef {
// malloc(i64) → ptr
var param_types = [_]c.LLVMTypeRef{self.cached_i64};
// malloc(size_t) → ptr
var param_types = [_]c.LLVMTypeRef{self.sizeType()};
return c.LLVMFunctionType(self.cached_ptr, &param_types, 1, 0);
}
@@ -2108,8 +2154,8 @@ pub const LLVMEmitter = struct {
}
fn getMemcpyType(self: *LLVMEmitter) c.LLVMTypeRef {
// memcpy(ptr, ptr, i64) → ptr
var param_types = [_]c.LLVMTypeRef{ self.cached_ptr, self.cached_ptr, self.cached_i64 };
// memcpy(ptr, ptr, size_t) → ptr
var param_types = [_]c.LLVMTypeRef{ self.cached_ptr, self.cached_ptr, self.sizeType() };
return c.LLVMFunctionType(self.cached_ptr, &param_types, 3, 0);
}
@@ -2119,8 +2165,8 @@ pub const LLVMEmitter = struct {
}
fn getMemsetType(self: *LLVMEmitter) c.LLVMTypeRef {
// memset(ptr, i32, i64) → ptr
var param_types = [_]c.LLVMTypeRef{ self.cached_ptr, self.cached_i32, self.cached_i64 };
// memset(ptr, i32, size_t) → ptr
var param_types = [_]c.LLVMTypeRef{ self.cached_ptr, self.cached_i32, self.sizeType() };
return c.LLVMFunctionType(self.cached_ptr, &param_types, 3, 0);
}
@@ -2158,15 +2204,24 @@ pub const LLVMEmitter = struct {
return c.LLVMFunctionType(self.cached_f32, &param_types, 1, 0);
}
fn getOrDeclareMemcmp(self: *LLVMEmitter) c.LLVMValueRef {
if (c.LLVMGetNamedFunction(self.llvm_module, "memcmp")) |f| return f;
// memcmp(ptr, ptr, size_t) → i32
var param_types = [_]c.LLVMTypeRef{ self.cached_ptr, self.cached_ptr, self.sizeType() };
const fn_ty = c.LLVMFunctionType(self.cached_i32, &param_types, 3, 0);
return c.LLVMAddFunction(self.llvm_module, "memcmp", fn_ty);
}
fn getOrDeclareWrite(self: *LLVMEmitter) c.LLVMValueRef {
if (c.LLVMGetNamedFunction(self.llvm_module, "write")) |f| return f;
return c.LLVMAddFunction(self.llvm_module, "write", self.getWriteType());
}
fn getWriteType(self: *LLVMEmitter) c.LLVMTypeRef {
// write(fd: i32, buf: ptr, count: i64) → i64
var param_types = [_]c.LLVMTypeRef{ self.cached_i32, self.cached_ptr, self.cached_i64 };
return c.LLVMFunctionType(self.cached_i64, &param_types, 3, 0);
// write(fd: i32, buf: ptr, count: size_t) → ssize_t
const st = self.sizeType();
var param_types = [_]c.LLVMTypeRef{ self.cached_i32, self.cached_ptr, st };
return c.LLVMFunctionType(st, &param_types, 3, 0);
}
fn getOrDeclareSnprintf(self: *LLVMEmitter) c.LLVMValueRef {
@@ -2180,6 +2235,28 @@ pub const LLVMEmitter = struct {
return c.LLVMFunctionType(self.cached_i32, &param_types, 3, 1); // 1 = variadic
}
/// Check if a function name is a known libc builtin that has a dedicated
/// getOrDeclare* helper with correct C-compatible types.
fn isBuiltinLibcName(name: []const u8) bool {
const builtins = [_][]const u8{ "malloc", "free", "memcpy", "memset", "memcmp", "write", "snprintf" };
for (builtins) |b| {
if (std.mem.eql(u8, name, b)) return true;
}
return false;
}
/// Get or declare a builtin libc function by name, using the correct C-compatible type.
fn getOrDeclareBuiltinByName(self: *LLVMEmitter, name: []const u8) ?c.LLVMValueRef {
if (std.mem.eql(u8, name, "malloc")) return self.getOrDeclareMalloc();
if (std.mem.eql(u8, name, "free")) return self.getOrDeclareFree();
if (std.mem.eql(u8, name, "memcpy")) return self.getOrDeclareMemcpy();
if (std.mem.eql(u8, name, "memset")) return self.getOrDeclareMemset();
if (std.mem.eql(u8, name, "memcmp")) return self.getOrDeclareMemcmp();
if (std.mem.eql(u8, name, "write")) return self.getOrDeclareWrite();
if (std.mem.eql(u8, name, "snprintf")) return self.getOrDeclareSnprintf();
return null;
}
/// Build a string fat pointer {ptr, len} from raw pointer and length.
fn buildStringValue(self: *LLVMEmitter, ptr: c.LLVMValueRef, len: c.LLVMValueRef) c.LLVMValueRef {
const str_ty = self.getStringStructType();
@@ -2324,7 +2401,9 @@ pub const LLVMEmitter = struct {
if (val_w > param_w) {
return c.LLVMBuildTrunc(self.builder, val, param_ty, "ca.tr");
} else {
return c.LLVMBuildSExt(self.builder, val, param_ty, "ca.ext");
// Use ZExt by default — preserves bit pattern for unsigned types.
// Signed widening is handled by explicit widen instructions from the IR.
return c.LLVMBuildZExt(self.builder, val, param_ty, "ca.ext");
}
}
// Float → Float (width mismatch)
@@ -2337,8 +2416,14 @@ pub const LLVMEmitter = struct {
return c.LLVMBuildFPTrunc(self.builder, val, param_ty, "ca.fptrunc");
}
}
// Int → Float
// Int → Float (use SIToFP for i1/bool, UIToFP otherwise for safe default)
if (val_kind == c.LLVMIntegerTypeKind and (param_kind == c.LLVMFloatTypeKind or param_kind == c.LLVMDoubleTypeKind)) {
const val_w = c.LLVMGetIntTypeWidth(val_ty);
if (val_w == 1) {
return c.LLVMBuildUIToFP(self.builder, val, param_ty, "ca.uitofp");
}
// Default to SIToFP since most sx integers are signed (s64).
// Explicit unsigned conversions go through the IR widen/narrow path.
return c.LLVMBuildSIToFP(self.builder, val, param_ty, "ca.sitofp");
}
// Float → Int
@@ -2592,13 +2677,22 @@ pub const LLVMEmitter = struct {
// - HFA (homogeneous float aggregate) → leave as-is (LLVM handles it)
fn abiCoerceParamType(self: *LLVMEmitter, ir_ty: TypeId, llvm_ty: c.LLVMTypeRef) c.LLVMTypeRef {
// String/slice → raw pointer
// String/slice → raw pointer (universal across all targets for foreign calls)
if (ir_ty == .string) return self.cached_ptr;
if (!ir_ty.isBuiltin()) {
const info = self.ir_mod.types.get(ir_ty);
if (info == .slice) return self.cached_ptr;
}
// WASM32: i64 → i32 for C ABI (size_t/ssize_t are 32-bit on wasm32)
if (self.target_config.isWasm()) {
if (c.LLVMGetTypeKind(llvm_ty) == c.LLVMIntegerTypeKind and c.LLVMGetIntTypeWidth(llvm_ty) == 64) {
// s64/u64 in extern decls → i32 on wasm32 (matches C's size_t, ssize_t, etc.)
return self.cached_i32;
}
return llvm_ty;
}
// Only coerce struct types
if (c.LLVMGetTypeKind(llvm_ty) != c.LLVMStructTypeKind) return llvm_ty;
@@ -2947,6 +3041,11 @@ pub const LLVMEmitter = struct {
if (ref.isNone()) return false;
const func = &self.ir_mod.functions.items[self.current_func_idx];
const ref_idx = ref.index();
// Check function parameters first (refs 0..N-1)
if (ref_idx < func.params.len) {
const ty = func.params[ref_idx].ty;
return ty == .u8 or ty == .u16 or ty == .u32 or ty == .u64;
}
for (func.blocks.items) |*block| {
const first = block.first_ref;
if (ref_idx >= first and ref_idx < first + @as(u32, @intCast(block.insts.items.len))) {