ir done'ish

This commit is contained in:
agra
2026-03-01 22:38:41 +02:00
parent 6a920dbd2c
commit f763765ea2
17 changed files with 1443 additions and 15017 deletions

View File

@@ -2,8 +2,8 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const llvm = @import("../llvm_api.zig");
const c = llvm.c;
const codegen = @import("../codegen.zig");
const TargetConfig = codegen.TargetConfig;
const target_mod = @import("../target.zig");
const TargetConfig = target_mod.TargetConfig;
const ir_types = @import("types.zig");
const TypeId = ir_types.TypeId;
const TypeInfo = ir_types.TypeInfo;
@@ -188,6 +188,29 @@ pub const LLVMEmitter = struct {
if (func.is_extern or func.blocks.items.len == 0) continue;
self.emitFunction(&func, @intCast(i));
}
// Pass 3: Verify typeSizeBytes matches LLVM's ABI sizes
self.verifySizes();
}
/// Compare IR typeSizeBytes against LLVMABISizeOfType for all user-defined types.
fn verifySizes(self: *LLVMEmitter) void {
const dl = c.LLVMGetModuleDataLayout(self.llvm_module);
if (dl == null) return;
const type_count = self.ir_mod.types.infos.items.len;
for (TypeId.first_user..type_count) |idx| {
const ty = TypeId.fromIndex(@intCast(idx));
const info = self.ir_mod.types.get(ty);
// Only verify aggregate types where sizing is non-trivial
switch (info) {
.@"struct", .@"union", .tagged_union, .tuple => {},
else => continue,
}
const llvm_ty = self.toLLVMType(ty);
const llvm_size = c.LLVMABISizeOfType(dl, llvm_ty);
const ir_size = self.ir_mod.types.typeSizeBytes(ty);
std.debug.assert(llvm_size == ir_size);
}
}
/// Run comptime side-effect functions (e.g., `#run main();` at top level).
@@ -241,6 +264,7 @@ pub const LLVMEmitter = struct {
.int => |v| c.LLVMConstInt(llvm_ty, @bitCast(v), 1),
.float => |v| c.LLVMConstReal(llvm_ty, v),
.boolean => |v| c.LLVMConstInt(llvm_ty, @intFromBool(v), 0),
.string => |sid| self.emitConstStringGlobal(self.ir_mod.types.getString(sid)),
else => c.LLVMConstNull(llvm_ty),
};
c.LLVMSetInitializer(llvm_global, init_val);
@@ -269,14 +293,16 @@ pub const LLVMEmitter = struct {
const is_main = std.mem.eql(u8, name, "main");
// main always returns i32 at the LLVM level (JIT expects it)
const ret_ty = if (is_main) self.cached_i32 else self.toLLVMType(func.ret);
const raw_ret_ty = self.toLLVMType(func.ret);
const ret_ty = if (is_main) self.cached_i32 else if (func.is_extern) self.abiCoerceParamType(func.ret, raw_ret_ty) else raw_ret_ty;
// Build parameter types
// Build parameter types — apply C ABI coercion for foreign (extern) functions
const param_count: c_uint = @intCast(func.params.len);
const param_types = self.alloc.alloc(c.LLVMTypeRef, func.params.len) catch unreachable;
defer self.alloc.free(param_types);
for (func.params, 0..) |param, j| {
param_types[j] = self.toLLVMType(param.ty);
const llvm_ty = self.toLLVMType(param.ty);
param_types[j] = if (func.is_extern) self.abiCoerceParamType(param.ty, llvm_ty) else llvm_ty;
}
const fn_type = c.LLVMFunctionType(ret_ty, param_types.ptr, param_count, 0);
@@ -356,7 +382,8 @@ pub const LLVMEmitter = struct {
// (blocks may not be in emission order due to nested control flow)
self.ref_counter = block.first_ref;
for (block.insts.items) |instruction| {
for (block.insts.items, 0..) |instruction, inst_i| {
_ = inst_i;
self.emitInst(&instruction, func_idx);
}
}
@@ -528,18 +555,21 @@ pub const LLVMEmitter = struct {
// ── Bitwise ────────────────────────────────────────────
.bit_and => |bin| {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
var lhs = self.resolveRef(bin.lhs);
var rhs = self.resolveRef(bin.rhs);
self.matchBinOpTypes(&lhs, &rhs, instruction.ty);
self.mapRef(c.LLVMBuildAnd(self.builder, lhs, rhs, "and"));
},
.bit_or => |bin| {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
var lhs = self.resolveRef(bin.lhs);
var rhs = self.resolveRef(bin.rhs);
self.matchBinOpTypes(&lhs, &rhs, instruction.ty);
self.mapRef(c.LLVMBuildOr(self.builder, lhs, rhs, "or"));
},
.bit_xor => |bin| {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
var lhs = self.resolveRef(bin.lhs);
var rhs = self.resolveRef(bin.rhs);
self.matchBinOpTypes(&lhs, &rhs, instruction.ty);
self.mapRef(c.LLVMBuildXor(self.builder, lhs, rhs, "xor"));
},
.bit_not => |un| {
@@ -547,13 +577,15 @@ pub const LLVMEmitter = struct {
self.mapRef(c.LLVMBuildNot(self.builder, operand, "not"));
},
.shl => |bin| {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
var lhs = self.resolveRef(bin.lhs);
var rhs = self.resolveRef(bin.rhs);
self.matchBinOpTypes(&lhs, &rhs, instruction.ty);
self.mapRef(c.LLVMBuildShl(self.builder, lhs, rhs, "shl"));
},
.shr => |bin| {
const lhs = self.resolveRef(bin.lhs);
const rhs = self.resolveRef(bin.rhs);
var lhs = self.resolveRef(bin.lhs);
var rhs = self.resolveRef(bin.rhs);
self.matchBinOpTypes(&lhs, &rhs, instruction.ty);
// Use arithmetic shift right for signed, logical for unsigned
const result = if (isSignedType(instruction.ty))
c.LLVMBuildAShr(self.builder, lhs, rhs, "ashr")
@@ -746,6 +778,15 @@ pub const LLVMEmitter = struct {
if (result.asInt()) |v| {
self.mapRef(c.LLVMConstInt(self.toLLVMType(instruction.ty), @bitCast(v), 0));
return;
} else if (result.asFloat()) |v| {
self.mapRef(c.LLVMConstReal(self.toLLVMType(instruction.ty), v));
return;
} else if (result.asBool()) |v| {
self.mapRef(c.LLVMConstInt(self.toLLVMType(instruction.ty), @intFromBool(v), 0));
return;
} else if (result == .string) {
self.mapRef(self.emitStringConstant(result.string));
return;
}
} else |_| {}
}
@@ -770,7 +811,12 @@ pub const LLVMEmitter = struct {
args[j] = self.coerceArg(args[j], param_types[j]);
}
}
const result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, if (instruction.ty == .void) "" else "call");
var result = c.LLVMBuildCall2(self.builder, fn_ty, callee, args.ptr, arg_count, if (instruction.ty == .void) "" else "call");
// Coerce ABI return value (e.g. i64) back to IR struct type if needed
if (instruction.ty != .void and callee_func.is_extern) {
const expected_ty = self.toLLVMType(instruction.ty);
result = self.coerceArg(result, expected_ty);
}
self.mapRef(result);
},
.call_indirect => |call_op| {
@@ -813,7 +859,11 @@ pub const LLVMEmitter = struct {
if (fn_params) |fp| {
for (0..call_op.args.len) |j| {
if (j < fp.len) {
const llvm_pty = self.toLLVMType(fp[j]);
var llvm_pty = self.toLLVMType(fp[j]);
// Array params in fn-ptr calls decay to pointers (C ABI)
if (c.LLVMGetTypeKind(llvm_pty) == c.LLVMArrayTypeKind) {
llvm_pty = self.cached_ptr;
}
param_tys[j] = llvm_pty;
args[j] = self.coerceArg(args[j], llvm_pty);
} else {
@@ -983,7 +1033,10 @@ pub const LLVMEmitter = struct {
// Safety: verify base is a pointer before GEP
const base_ty_kind = c.LLVMGetTypeKind(c.LLVMTypeOf(base_ptr));
if (base_ty_kind == c.LLVMPointerTypeKind) {
const struct_llvm_ty = self.resolveGepStructType(fa.base, instruction);
const struct_llvm_ty = if (fa.base_type) |bt|
self.toLLVMType(self.resolveAggregate(bt))
else
self.resolveGepStructType(fa.base, instruction);
const st_kind = c.LLVMGetTypeKind(struct_llvm_ty);
if (st_kind == c.LLVMStructTypeKind or st_kind == c.LLVMArrayTypeKind) {
const result = c.LLVMBuildStructGEP2(self.builder, struct_llvm_ty, base_ptr, @intCast(fa.field_index), "gep");
@@ -1006,8 +1059,9 @@ pub const LLVMEmitter = struct {
// Plain enum or builtin integer → integer constant
self.mapRef(c.LLVMConstInt(ty, ei.tag, 0));
} else if (ty_kind == c.LLVMStructTypeKind) {
// Tagged union with no payload — store tag into union struct
const tag_val = c.LLVMConstInt(self.cached_i64, ei.tag, 0);
// Tagged union with no payload — header field 0 holds the tag
const header_ty = c.LLVMStructGetTypeAtIndex(ty, 0);
const tag_val = c.LLVMConstInt(header_ty, ei.tag, 0);
var result = c.LLVMGetUndef(ty);
result = c.LLVMBuildInsertValue(self.builder, result, tag_val, 0, "ei.tag");
self.mapRef(result);
@@ -1015,9 +1069,10 @@ pub const LLVMEmitter = struct {
self.mapRef(c.LLVMConstInt(self.cached_i64, ei.tag, 0));
}
} else {
// Tagged union with payload — { tag, payload_bytes }
// Tagged union with payload — { header, payload_bytes }
const union_ty = self.toLLVMType(instruction.ty);
const tag_val = c.LLVMConstInt(self.cached_i64, ei.tag, 0);
const header_ty = c.LLVMStructGetTypeAtIndex(union_ty, 0);
const tag_val = c.LLVMConstInt(header_ty, ei.tag, 0);
const payload_val = self.resolveRef(ei.payload);
// alloca union, store tag, bitcast payload area, store payload
@@ -1040,7 +1095,17 @@ pub const LLVMEmitter = struct {
const kind = c.LLVMGetTypeKind(val_ty);
if (kind == c.LLVMStructTypeKind) {
// Tagged union — extract field 0 (tag)
self.mapRef(c.LLVMBuildExtractValue(self.builder, val, 0, "etag"));
var tag = c.LLVMBuildExtractValue(self.builder, val, 0, "etag");
// Truncate to declared tag width if needed (e.g. i64 → i32 for u32 tags)
// This is essential for FFI unions where the i64 tag slot contains
// a smaller tag + uninitialized padding (e.g. SDL_Event's u32 type + u32 reserved)
const target_ty = self.toLLVMType(instruction.ty);
const extracted_bits = c.LLVMGetIntTypeWidth(c.LLVMTypeOf(tag));
const target_bits = c.LLVMGetIntTypeWidth(target_ty);
if (target_bits < extracted_bits) {
tag = c.LLVMBuildTrunc(self.builder, tag, target_ty, "etag.trunc");
}
self.mapRef(tag);
} else {
// Plain enum — the value IS the tag
self.mapRef(val);
@@ -1071,28 +1136,34 @@ pub const LLVMEmitter = struct {
const base_ty = c.LLVMTypeOf(base);
const kind = c.LLVMGetTypeKind(base_ty);
if (kind == c.LLVMStructTypeKind) {
// { tag, payload_bytes } — extract payload then bitcast
// Tagged union { header, payload_bytes } — access payload at field 1
const tmp = c.LLVMBuildAlloca(self.builder, base_ty, "ug.tmp");
_ = c.LLVMBuildStore(self.builder, base, tmp);
const payload_ptr = c.LLVMBuildStructGEP2(self.builder, base_ty, tmp, 1, "ug.pp");
const typed_ptr = c.LLVMBuildBitCast(self.builder, payload_ptr, self.cached_ptr, "ug.cast");
self.mapRef(c.LLVMBuildLoad2(self.builder, result_ty, typed_ptr, "ug.val"));
self.mapRef(c.LLVMBuildLoad2(self.builder, result_ty, payload_ptr, "ug.val"));
} else {
// Plain reinterpret
self.mapRef(c.LLVMBuildBitCast(self.builder, base, result_ty, "ug.cast"));
// Untagged union [N x i8] — alloca, store, reinterpret-load
const tmp = c.LLVMBuildAlloca(self.builder, base_ty, "ug.tmp");
_ = c.LLVMBuildStore(self.builder, base, tmp);
self.mapRef(c.LLVMBuildLoad2(self.builder, result_ty, tmp, "ug.val"));
}
},
.union_gep => |fa| {
const base_ptr = self.resolveRef(fa.base);
const base_ty_kind = c.LLVMGetTypeKind(c.LLVMTypeOf(base_ptr));
if (base_ty_kind == c.LLVMPointerTypeKind) {
const union_llvm_ty = self.resolveGepStructType(fa.base, instruction);
const union_llvm_ty = if (fa.base_type) |bt|
self.toLLVMType(self.resolveAggregate(bt))
else
self.resolveGepStructType(fa.base, instruction);
const st_kind = c.LLVMGetTypeKind(union_llvm_ty);
if (st_kind == c.LLVMStructTypeKind) {
// Tagged union — payload is at field 1
const payload_ptr = c.LLVMBuildStructGEP2(self.builder, union_llvm_ty, base_ptr, 1, "ugep.pp");
self.mapRef(c.LLVMBuildBitCast(self.builder, payload_ptr, self.cached_ptr, "ugep.cast"));
self.mapRef(payload_ptr);
} else {
self.mapRef(c.LLVMGetUndef(self.cached_ptr));
// Untagged union — data starts at offset 0
self.mapRef(base_ptr);
}
} else {
self.mapRef(c.LLVMGetUndef(self.cached_ptr));
@@ -1317,21 +1388,19 @@ pub const LLVMEmitter = struct {
_ = c.LLVMBuildCall2(self.builder, self.getMemsetType(), memset_fn, &args, 3, "");
self.advanceRefCounter();
},
.sqrt => {
.sqrt, .sin, .cos, .floor => {
const val = self.resolveRef(bi.args[0]);
const val_ty = c.LLVMTypeOf(val);
const val_kind = c.LLVMGetTypeKind(val_ty);
if (val_kind == c.LLVMFloatTypeKind) {
// f32 → sqrtf
const sqrtf_fn = self.getOrDeclareSqrtf();
const f = self.getOrDeclareMathF32(bi.builtin);
var args = [_]c.LLVMValueRef{val};
self.mapRef(c.LLVMBuildCall2(self.builder, self.getSqrtfType(), sqrtf_fn, &args, 1, "sqrtf"));
self.mapRef(c.LLVMBuildCall2(self.builder, self.getMathF32Type(), f, &args, 1, @tagName(bi.builtin)));
} else {
// f64 → sqrt (default)
const coerced = if (val_kind != c.LLVMDoubleTypeKind) self.coerceArg(val, self.cached_f64) else val;
const sqrt_fn = self.getOrDeclareSqrt();
const f = self.getOrDeclareMathF64(bi.builtin);
var args = [_]c.LLVMValueRef{coerced};
self.mapRef(c.LLVMBuildCall2(self.builder, self.getSqrtType(), sqrt_fn, &args, 1, "sqrt"));
self.mapRef(c.LLVMBuildCall2(self.builder, self.getMathF64Type(), f, &args, 1, @tagName(bi.builtin)));
}
},
.out => {
@@ -1563,6 +1632,7 @@ pub const LLVMEmitter = struct {
const field_count: u32 = switch (field_info) {
.@"struct" => |s| @intCast(s.fields.len),
.@"union" => |u| @intCast(u.fields.len),
.tagged_union => |u| @intCast(u.fields.len),
.@"enum" => |e| @intCast(e.variants.len),
else => 0,
};
@@ -1787,6 +1857,15 @@ pub const LLVMEmitter = struct {
return self.cached_i64;
}
/// Resolve through pointer types to get the underlying aggregate type.
fn resolveAggregate(self: *LLVMEmitter, ty: TypeId) TypeId {
if (!ty.isBuiltin()) {
const info = self.ir_mod.types.get(ty);
if (info == .pointer) return info.pointer.pointee;
}
return ty;
}
// ── Comparison helpers ────────────────────────────────────────────
fn emitCmp(self: *LLVMEmitter, bin: ir_inst.BinOp, _: TypeId, int_pred: c_uint, float_pred: c_uint) void {
@@ -1813,19 +1892,27 @@ pub const LLVMEmitter = struct {
}
}
// Struct types (strings, slices): compare fields individually
// Struct types (strings, slices, tagged unions): compare fields individually
if (kind == c.LLVMStructTypeKind and rhs_kind == c.LLVMStructTypeKind) {
const n_fields = c.LLVMCountStructElementTypes(lhs_ty);
if (n_fields >= 2) {
// For {ptr, i64} structs (string/slice): compare ptr and len
// eq: (f0_l == f0_r) && (f1_l == f1_r)
// ne: (f0_l != f0_r) || (f1_l != f1_r)
const is_eq = (int_pred == c.LLVMIntEQ);
const f0_l = c.LLVMBuildExtractValue(self.builder, lhs, 0, "sc.l0");
const f0_r = c.LLVMBuildExtractValue(self.builder, rhs, 0, "sc.r0");
const cmp0 = c.LLVMBuildICmp(self.builder, @intCast(int_pred), f0_l, f0_r, "sc.c0");
// Check if field 1 is an array (tagged union payload) — skip comparison
// For tagged unions {tag, [N x i8]}, the tag comparison alone is sufficient
const f1_ty = c.LLVMStructGetTypeAtIndex(lhs_ty, 1);
const f1_kind = c.LLVMGetTypeKind(f1_ty);
if (f1_kind == c.LLVMArrayTypeKind) {
// Tagged union: compare tag only
self.mapRef(cmp0);
return;
}
const f1_l = c.LLVMBuildExtractValue(self.builder, lhs, 1, "sc.l1");
const f1_r = c.LLVMBuildExtractValue(self.builder, rhs, 1, "sc.r1");
const cmp0 = c.LLVMBuildICmp(self.builder, @intCast(int_pred), f0_l, f0_r, "sc.c0");
const cmp1 = c.LLVMBuildICmp(self.builder, @intCast(int_pred), f1_l, f1_r, "sc.c1");
const result = if (is_eq)
c.LLVMBuildAnd(self.builder, cmp0, cmp1, "sc.and")
@@ -1862,6 +1949,8 @@ pub const LLVMEmitter = struct {
var rhs = self.resolveRef(bin.rhs);
const lhs_ty = c.LLVMTypeOf(lhs);
const kind = c.LLVMGetTypeKind(lhs_ty);
// Determine signedness from IR operand type
const is_unsigned = self.isRefUnsigned(bin.lhs) or self.isRefUnsigned(bin.rhs);
// Coerce operands to same type if needed
if (kind == c.LLVMIntegerTypeKind) {
const rhs_ty = c.LLVMTypeOf(rhs);
@@ -1869,16 +1958,21 @@ pub const LLVMEmitter = struct {
if (rhs_kind == c.LLVMIntegerTypeKind) {
const lw = c.LLVMGetIntTypeWidth(lhs_ty);
const rw = c.LLVMGetIntTypeWidth(rhs_ty);
if (lw < rw) lhs = c.LLVMBuildSExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildSExt(self.builder, rhs, lhs_ty, "cmp.ext");
if (is_unsigned) {
if (lw < rw) lhs = c.LLVMBuildZExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildZExt(self.builder, rhs, lhs_ty, "cmp.ext");
} else {
if (lw < rw) lhs = c.LLVMBuildSExt(self.builder, lhs, rhs_ty, "cmp.ext")
else if (rw < lw) rhs = c.LLVMBuildSExt(self.builder, rhs, lhs_ty, "cmp.ext");
}
}
}
const result = if (kind == c.LLVMFloatTypeKind or kind == c.LLVMDoubleTypeKind)
c.LLVMBuildFCmp(self.builder, @intCast(float_pred), lhs, rhs, "fcmp")
else if (is_unsigned)
c.LLVMBuildICmp(self.builder, @intCast(unsigned_pred), lhs, rhs, "icmp")
else
// Default to signed comparison (most common in sx)
c.LLVMBuildICmp(self.builder, @intCast(signed_pred), lhs, rhs, "icmp");
_ = unsigned_pred;
self.mapRef(result);
}
@@ -2030,24 +2124,36 @@ pub const LLVMEmitter = struct {
return c.LLVMFunctionType(self.cached_ptr, &param_types, 3, 0);
}
fn getOrDeclareSqrt(self: *LLVMEmitter) c.LLVMValueRef {
if (c.LLVMGetNamedFunction(self.llvm_module, "sqrt")) |f| return f;
return c.LLVMAddFunction(self.llvm_module, "sqrt", self.getSqrtType());
fn getOrDeclareMathF64(self: *LLVMEmitter, id: ir_inst.BuiltinId) c.LLVMValueRef {
const name: [*:0]const u8 = switch (id) {
.sqrt => "sqrt",
.sin => "sin",
.cos => "cos",
.floor => "floor",
else => unreachable,
};
if (c.LLVMGetNamedFunction(self.llvm_module, name)) |f| return f;
return c.LLVMAddFunction(self.llvm_module, name, self.getMathF64Type());
}
fn getSqrtType(self: *LLVMEmitter) c.LLVMTypeRef {
// sqrt(f64) → f64
fn getMathF64Type(self: *LLVMEmitter) c.LLVMTypeRef {
var param_types = [_]c.LLVMTypeRef{self.cached_f64};
return c.LLVMFunctionType(self.cached_f64, &param_types, 1, 0);
}
fn getOrDeclareSqrtf(self: *LLVMEmitter) c.LLVMValueRef {
if (c.LLVMGetNamedFunction(self.llvm_module, "sqrtf")) |f| return f;
return c.LLVMAddFunction(self.llvm_module, "sqrtf", self.getSqrtfType());
fn getOrDeclareMathF32(self: *LLVMEmitter, id: ir_inst.BuiltinId) c.LLVMValueRef {
const name: [*:0]const u8 = switch (id) {
.sqrt => "sqrtf",
.sin => "sinf",
.cos => "cosf",
.floor => "floorf",
else => unreachable,
};
if (c.LLVMGetNamedFunction(self.llvm_module, name)) |f| return f;
return c.LLVMAddFunction(self.llvm_module, name, self.getMathF32Type());
}
fn getSqrtfType(self: *LLVMEmitter) c.LLVMTypeRef {
// sqrtf(f32) → f32
fn getMathF32Type(self: *LLVMEmitter) c.LLVMTypeRef {
var param_types = [_]c.LLVMTypeRef{self.cached_f32};
return c.LLVMFunctionType(self.cached_f32, &param_types, 1, 0);
}
@@ -2279,6 +2385,31 @@ pub const LLVMEmitter = struct {
}
}
}
// Struct → Integer (C ABI coercion: store struct to memory, load as integer)
if (val_kind == c.LLVMStructTypeKind and param_kind == c.LLVMIntegerTypeKind) {
const tmp = c.LLVMBuildAlloca(self.builder, param_ty, "abi.tmp");
_ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(param_ty), tmp);
_ = c.LLVMBuildStore(self.builder, val, tmp);
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.coerce");
}
// Integer → Struct (C ABI return coercion: store integer to memory, load as struct)
if (val_kind == c.LLVMIntegerTypeKind and param_kind == c.LLVMStructTypeKind) {
const tmp = c.LLVMBuildAlloca(self.builder, val_ty, "abi.ret.tmp");
_ = c.LLVMBuildStore(self.builder, val, tmp);
return c.LLVMBuildLoad2(self.builder, param_ty, tmp, "abi.ret.coerce");
}
// Array → Ptr (array decay: alloca + GEP to first element)
if (val_kind == c.LLVMArrayTypeKind and param_kind == c.LLVMPointerTypeKind) {
const tmp = c.LLVMBuildAlloca(self.builder, val_ty, "ca.arr");
_ = c.LLVMBuildStore(self.builder, val, tmp);
const zero = c.LLVMConstInt(self.cached_i64, 0, 0);
var indices = [_]c.LLVMValueRef{ zero, zero };
return c.LLVMBuildGEP2(self.builder, val_ty, tmp, &indices, 2, "ca.decay");
}
// Int → Ptr (null literal: inttoptr)
if (val_kind == c.LLVMIntegerTypeKind and param_kind == c.LLVMPointerTypeKind) {
return c.LLVMBuildIntToPtr(self.builder, val, param_ty, "ca.itp");
}
return val;
}
@@ -2390,19 +2521,48 @@ pub const LLVMEmitter = struct {
}
return c.LLVMStructTypeInContext(self.context, field_llvm_types.ptr, n, 0);
},
.@"enum" => self.cached_i64, // enums are i64 by default
.@"enum" => |e| {
// Use backing type if declared (e.g. enum u32 → i32), else i64
if (e.backing_type) |bt| return self.toLLVMType(bt);
return self.cached_i64;
},
.@"union" => |u| {
// Union: tag (i64) + largest-field payload
// For simplicity, use { i64, [N x i8] } where N = max field size
var max_size: u32 = 0;
// Untagged union — just [N x i8]
var max_size: usize = 0;
for (u.fields) |field| {
const sz = self.ir_mod.types.sizeOf(field.ty);
const sz = self.ir_mod.types.typeSizeBytes(field.ty);
if (sz > max_size) max_size = sz;
}
if (max_size == 0) max_size = 8;
return c.LLVMArrayType2(self.cached_i8, @intCast(max_size));
},
.tagged_union => |u| {
// Tagged union — { header, [N x i8] }
var max_size: usize = 0;
for (u.fields) |field| {
const sz = self.ir_mod.types.typeSizeBytes(field.ty);
if (sz > max_size) max_size = sz;
}
if (max_size == 0) max_size = 8;
var header_size: usize = self.ir_mod.types.typeSizeBytes(u.tag_type);
if (u.backing_type) |bt| {
const bi = self.ir_mod.types.get(bt);
if (bi == .@"struct" and bi.@"struct".fields.len > 1) {
header_size = 0;
const fields = bi.@"struct".fields;
for (fields[0 .. fields.len - 1]) |f| {
header_size += self.ir_mod.types.typeSizeBytes(f.ty);
}
const backing_payload = self.ir_mod.types.typeSizeBytes(fields[fields.len - 1].ty);
if (backing_payload > max_size) max_size = backing_payload;
}
}
const header_llvm = c.LLVMIntTypeInContext(self.context, @intCast(header_size * 8));
var field_types: [2]c.LLVMTypeRef = .{
self.cached_i64, // tag
c.LLVMArrayType2(self.cached_i8, max_size), // payload
header_llvm,
c.LLVMArrayType2(self.cached_i8, @intCast(max_size)),
};
return c.LLVMStructTypeInContext(self.context, &field_types, 2, 0);
},
@@ -2423,6 +2583,56 @@ pub const LLVMEmitter = struct {
};
}
// ── C ABI coercion for foreign functions ──────────────────────────
//
// On ARM64 (and x86_64), the C calling convention coerces small struct
// arguments to integers for register passing:
// - String/slice {ptr, i64} → ptr (extract raw pointer)
// - Small integer struct (≤ 8 bytes, non-HFA) → i64
// - HFA (homogeneous float aggregate) → leave as-is (LLVM handles it)
fn abiCoerceParamType(self: *LLVMEmitter, ir_ty: TypeId, llvm_ty: c.LLVMTypeRef) c.LLVMTypeRef {
// String/slice → raw pointer
if (ir_ty == .string) return self.cached_ptr;
if (!ir_ty.isBuiltin()) {
const info = self.ir_mod.types.get(ir_ty);
if (info == .slice) return self.cached_ptr;
}
// Only coerce struct types
if (c.LLVMGetTypeKind(llvm_ty) != c.LLVMStructTypeKind) return llvm_ty;
// Check if it's an HFA (all float or all double fields) — leave as-is
const n_fields = c.LLVMCountStructElementTypes(llvm_ty);
if (n_fields >= 1 and n_fields <= 4) {
var all_float = true;
var all_double = true;
var fi: c_uint = 0;
while (fi < n_fields) : (fi += 1) {
const ft = c.LLVMStructGetTypeAtIndex(llvm_ty, fi);
const fk = c.LLVMGetTypeKind(ft);
if (fk != c.LLVMFloatTypeKind) all_float = false;
if (fk != c.LLVMDoubleTypeKind) all_double = false;
}
if (all_float or all_double) return llvm_ty;
}
// Small struct (≤ 8 bytes) → coerce to i64
const size = c.LLVMABISizeOfType(
c.LLVMGetModuleDataLayout(self.llvm_module),
llvm_ty,
);
if (size <= 8) return self.cached_i64;
// Medium struct (9-16 bytes) → coerce to [2 x i64]
if (size <= 16) {
return c.LLVMArrayType2(self.cached_i64, 2);
}
// Large struct (> 16 bytes) → leave as-is (should be indirect, but handle later)
return llvm_ty;
}
// ── Cached composite types ──────────────────────────────────────
fn getStringStructType(self: *LLVMEmitter) c.LLVMTypeRef {
@@ -2457,6 +2667,25 @@ pub const LLVMEmitter = struct {
// ── String constant emission ────────────────────────────────────
/// Build a constant string { ptr, i64 } value without using the builder
/// (safe to call during global initialization, before any function body is emitted).
fn emitConstStringGlobal(self: *LLVMEmitter, str: []const u8) c.LLVMValueRef {
const str_z = self.alloc.dupeZ(u8, str) catch unreachable;
defer self.alloc.free(str_z);
const len: c_uint = @intCast(str.len + 1); // include null terminator
const str_const = c.LLVMConstStringInContext(self.context, str_z.ptr, len - 1, 0);
const arr_ty = c.LLVMArrayType2(self.cached_i8, len);
const str_global_val = c.LLVMAddGlobal(self.llvm_module, arr_ty, "str.data");
c.LLVMSetInitializer(str_global_val, str_const);
c.LLVMSetGlobalConstant(str_global_val, 1);
c.LLVMSetLinkage(str_global_val, c.LLVMPrivateLinkage);
c.LLVMSetUnnamedAddress(str_global_val, c.LLVMGlobalUnnamedAddr);
// Build constant { ptr, i64 } aggregate
const len_val = c.LLVMConstInt(self.cached_i64, str.len, 0);
var fields = [_]c.LLVMValueRef{ str_global_val, len_val };
return c.LLVMConstStructInContext(self.context, &fields, 2, 0);
}
fn emitStringConstant(self: *LLVMEmitter, str: []const u8) c.LLVMValueRef {
// LLVMBuildGlobalStringPtr needs a null-terminated C string
const str_z = self.alloc.dupeZ(u8, str) catch unreachable;
@@ -2490,6 +2719,9 @@ pub const LLVMEmitter = struct {
.@"union" => |u| {
for (u.fields) |f| name_ids.append(self.alloc, f.name) catch unreachable;
},
.tagged_union => |u| {
for (u.fields) |f| name_ids.append(self.alloc, f.name) catch unreachable;
},
.@"enum" => |e| {
for (e.variants) |v| name_ids.append(self.alloc, v) catch unreachable;
},
@@ -2538,6 +2770,7 @@ pub const LLVMEmitter = struct {
const fields = switch (info) {
.@"struct" => |s| s.fields,
.@"union" => |u| u.fields,
.tagged_union => |u| u.fields,
else => &[_]TypeInfo.StructInfo.Field{},
};
@@ -2572,7 +2805,7 @@ pub const LLVMEmitter = struct {
var case_values = std.ArrayList(c.LLVMValueRef).empty;
defer case_values.deinit(self.alloc);
const is_union = info == .@"union";
const is_union = info == .@"union" or info == .tagged_union;
for (fields, 0..) |field, i| {
const case_bb = c.LLVMAppendBasicBlockInContext(self.context, current_func, "fv.case");
c.LLVMAddCase(switch_inst, c.LLVMConstInt(self.cached_i64, @intCast(i), 0), case_bb);
@@ -2651,6 +2884,8 @@ pub const LLVMEmitter = struct {
if (c.LLVMVerifyModule(self.llvm_module, c.LLVMReturnStatusAction, &err_msg) != 0) {
if (err_msg != null) {
const msg = std.mem.span(err_msg);
// Dump IR to /tmp for debugging
_ = c.LLVMPrintModuleToFile(self.llvm_module, "/tmp/sx_debug.ll", null);
std.debug.print("LLVM verification failed: {s}\n", .{msg});
c.LLVMDisposeMessage(err_msg);
}
@@ -2707,6 +2942,20 @@ pub const LLVMEmitter = struct {
return error.EmitFailed;
}
}
/// Check if an IR Ref's type is an unsigned integer (u8, u16, u32, u64).
fn isRefUnsigned(self: *LLVMEmitter, ref: Ref) bool {
if (ref.isNone()) return false;
const func = &self.ir_mod.functions.items[self.current_func_idx];
const ref_idx = ref.index();
for (func.blocks.items) |*block| {
const first = block.first_ref;
if (ref_idx >= first and ref_idx < first + @as(u32, @intCast(block.insts.items.len))) {
const ty = block.insts.items[ref_idx - first].ty;
return ty == .u8 or ty == .u16 or ty == .u32 or ty == .u64;
}
}
return false;
}
};
// ── Type classification helpers ─────────────────────────────────────