This commit is contained in:
agra
2026-02-11 21:22:03 +02:00
parent 9d96f05d3b
commit 9a2501f662
5 changed files with 449 additions and 56 deletions

View File

@@ -12,6 +12,88 @@ const errors = @import("errors.zig");
const sema = @import("sema.zig");
const comptime_mod = @import("comptime.zig");
pub const TargetConfig = struct {
/// Target triple (e.g. "aarch64-apple-darwin"). Null = host default.
triple: ?[*:0]const u8 = null,
/// CPU name (e.g. "generic", "apple-m1"). Null = "generic".
cpu: ?[*:0]const u8 = null,
/// CPU features string (e.g. "+avx2"). Null = "".
features: ?[*:0]const u8 = null,
/// Optimization level.
opt_level: OptLevel = .default,
/// Library search paths (-L flags).
lib_paths: []const []const u8 = &.{},
/// Output path override.
output_path: ?[]const u8 = null,
/// Linker command (null = "cc" on Unix, "link.exe" on Windows).
linker: ?[]const u8 = null,
/// Sysroot for cross-compilation (passed as --sysroot to linker).
sysroot: ?[]const u8 = null,
pub const OptLevel = enum {
none,
less,
default,
aggressive,
pub fn toLLVM(self: OptLevel) c.LLVMCodeGenOptLevel {
return switch (self) {
.none => c.LLVMCodeGenLevelNone,
.less => c.LLVMCodeGenLevelLess,
.default => c.LLVMCodeGenLevelDefault,
.aggressive => c.LLVMCodeGenLevelAggressive,
};
}
};
/// Check if target triple indicates aarch64/arm64 (runtime check, not comptime).
pub fn isAarch64(self: TargetConfig) bool {
return self.tripleHasPrefix("aarch64", "arm64");
}
/// Check if target triple indicates x86_64/x86-64.
pub fn isX86_64(self: TargetConfig) bool {
return self.tripleHasPrefix("x86_64", "x86-64");
}
/// Check if target triple indicates Windows (contains "windows" or "win32").
pub fn isWindows(self: TargetConfig) bool {
return self.tripleContains("windows") or self.tripleContains("win32");
}
fn tripleHasPrefix(self: TargetConfig, prefix1: []const u8, prefix2: []const u8) bool {
if (self.triple) |t| {
const span = std.mem.span(t);
return std.mem.startsWith(u8, span, prefix1) or std.mem.startsWith(u8, span, prefix2);
}
const dt = c.LLVMGetDefaultTargetTriple();
defer c.LLVMDisposeMessage(dt);
const span = std.mem.span(dt);
return std.mem.startsWith(u8, span, prefix1) or std.mem.startsWith(u8, span, prefix2);
}
fn tripleContains(self: TargetConfig, needle: []const u8) bool {
if (self.triple) |t| {
return std.mem.indexOf(u8, std.mem.span(t), needle) != null;
}
const dt = c.LLVMGetDefaultTargetTriple();
defer c.LLVMDisposeMessage(dt);
return std.mem.indexOf(u8, std.mem.span(dt), needle) != null;
}
pub fn getCpu(self: TargetConfig) [*:0]const u8 {
return self.cpu orelse "generic";
}
pub fn getFeatures(self: TargetConfig) [*:0]const u8 {
return self.features orelse "";
}
pub fn getLinker(self: TargetConfig) []const u8 {
return self.linker orelse "cc";
}
};
pub const CodeGen = struct {
context: c.LLVMContextRef,
module: c.LLVMModuleRef,
@@ -95,6 +177,8 @@ pub const CodeGen = struct {
foreign_libraries: std.ArrayList([]const u8),
// Set of foreign function names (for ABI lowering at call sites)
foreign_fns: std.StringHashMap(void),
// Target configuration (triple, cpu, opt level, lib paths, linker)
target_config: TargetConfig = .{},
const DeferredFn = struct {
fd: ast.FnDecl,
@@ -167,10 +251,19 @@ pub const CodeGen = struct {
ty: Type, // sx type
};
pub fn init(allocator: std.mem.Allocator, module_name: [*:0]const u8) CodeGen {
pub fn init(allocator: std.mem.Allocator, module_name: [*:0]const u8, target_config: TargetConfig) CodeGen {
const ctx = c.LLVMContextCreate();
const module = c.LLVMModuleCreateWithNameInContext(module_name, ctx);
const builder = c.LLVMCreateBuilderInContext(ctx);
// Set target triple on module so it appears in IR output
if (target_config.triple) |t| {
c.LLVMSetTarget(module, t);
} else {
const default_triple = c.LLVMGetDefaultTargetTriple();
c.LLVMSetTarget(module, default_triple);
c.LLVMDisposeMessage(default_triple);
}
return .{
.context = ctx,
.module = module,
@@ -200,6 +293,7 @@ pub const CodeGen = struct {
.deferred_fn_bodies = std.ArrayList(DeferredFn).empty,
.foreign_libraries = std.ArrayList([]const u8).empty,
.foreign_fns = std.StringHashMap(void).init(allocator),
.target_config = target_config,
};
}
@@ -1143,6 +1237,8 @@ pub const CodeGen = struct {
const is_main = std.mem.eql(u8, name, "main");
const ret_llvm_type = if (is_main)
c.LLVMInt32TypeInContext(self.context)
else if (is_foreign and ret_sx_type.isStruct())
self.getForeignReturnABIType(ret_sx_type)
else
self.typeToLLVM(ret_sx_type);
@@ -1175,19 +1271,33 @@ pub const CodeGen = struct {
);
}
/// For foreign (C ABI) functions on ARM64, struct parameters must be lowered
/// to their ABI-equivalent types. LLVM does NOT do this automatically.
/// - HFA (1-4 same float/double fields): [N x float/double]
/// - Non-HFA ≤ 8 bytes: i64
/// - Non-HFA 9-16 bytes: [2 x i64]
/// For foreign (C ABI) functions, struct parameters must be lowered to their
/// ABI-equivalent types. LLVM does NOT do this automatically on all targets.
/// Dispatches to architecture-specific lowering based on target config.
fn getForeignParamABIType(self: *CodeGen, sx_ty: Type) c.LLVMTypeRef {
const is_aarch64 = comptime @import("builtin").cpu.arch == .aarch64;
if (!is_aarch64) return self.typeToLLVM(sx_ty);
if (!sx_ty.isStruct()) return self.typeToLLVM(sx_ty);
const sname = self.type_aliases.get(sx_ty.struct_type) orelse sx_ty.struct_type;
const info = self.struct_types.get(sname) orelse return self.typeToLLVM(sx_ty);
if (self.target_config.isAarch64()) {
return self.aarch64ParamABI(info);
} else if (self.target_config.isX86_64()) {
if (self.target_config.isWindows()) {
return self.win64ParamABI(info);
}
return self.x86_64SysVParamABI(info);
}
// Unknown architecture: pass struct type as-is (let LLVM backend handle it)
return info.llvm_type;
}
/// AArch64 ABI: struct parameter lowering.
/// - HFA (1-4 same float/double fields): [N x float/double]
/// - Non-HFA ≤ 8 bytes: i64
/// - Non-HFA 9-16 bytes: [2 x i64]
/// - > 16 bytes: pass as-is (indirect, not yet fully handled)
fn aarch64ParamABI(self: *CodeGen, info: StructInfo) c.LLVMTypeRef {
// Check HFA: 1-4 fields all of the same float type
const field_types = info.field_types;
if (field_types.len >= 1 and field_types.len <= 4) {
@@ -1215,10 +1325,119 @@ pub const CodeGen = struct {
const size = c.LLVMStoreSizeOfType(data_layout, info.llvm_type);
if (size <= 8) return c.LLVMInt64TypeInContext(self.context);
if (size <= 16) return c.LLVMArrayType2(c.LLVMInt64TypeInContext(self.context), 2);
// > 16 bytes: pass by pointer (indirect) — not yet handled, fall back to struct type
return info.llvm_type;
}
/// x86-64 SysV ABI: struct parameter lowering.
/// Each 8-byte "eightbyte" is classified as INTEGER or SSE:
/// - If all fields in the eightbyte are float/double: SSE (passed in XMM register)
/// - If any field is integer/pointer: INTEGER (passed in GPR)
/// - Structs > 16 bytes: passed in memory (by pointer)
fn x86_64SysVParamABI(self: *CodeGen, info: StructInfo) c.LLVMTypeRef {
const data_layout = c.LLVMGetModuleDataLayout(self.module);
const size = c.LLVMStoreSizeOfType(data_layout, info.llvm_type);
// > 16 bytes: MEMORY class (passed by pointer, handled by LLVM backend)
if (size > 16) return info.llvm_type;
// Single eightbyte (≤ 8 bytes)
if (size <= 8) {
return self.classifyEightbyte(info.field_types, size);
}
// Two eightbytes (9-16 bytes): classify each half independently
// Split fields into first eightbyte (offset < 8) and second eightbyte (offset >= 8)
var first_eb_types = std.ArrayList(Type).empty;
var second_eb_types = std.ArrayList(Type).empty;
var second_eb_size: u64 = 0;
const struct_ty = info.llvm_type;
for (info.field_types, 0..) |ft, idx| {
const offset = c.LLVMOffsetOfElement(data_layout, struct_ty, @intCast(idx));
if (offset < 8) {
first_eb_types.append(self.allocator, ft) catch return info.llvm_type;
} else {
second_eb_types.append(self.allocator, ft) catch return info.llvm_type;
const field_llvm = self.typeToLLVM(ft);
second_eb_size += c.LLVMStoreSizeOfType(data_layout, field_llvm);
}
}
const eb1 = self.classifyEightbyte(first_eb_types.items, 8);
const eb2 = self.classifyEightbyte(second_eb_types.items, if (second_eb_size > 0) second_eb_size else size - 8);
// Compose the two eightbytes into a struct type
var members: [2]c.LLVMTypeRef = .{ eb1, eb2 };
return c.LLVMStructTypeInContext(self.context, &members, 2, 0);
}
/// Classify a single x86-64 eightbyte: if all fields are float, return SSE type;
/// otherwise return an integer type matching the byte size.
fn classifyEightbyte(self: *CodeGen, field_types_in_eb: []const Type, byte_size: u64) c.LLVMTypeRef {
if (field_types_in_eb.len == 0) {
// No fields in this chunk — use integer padding
return c.LLVMIntTypeInContext(self.context, @intCast(byte_size * 8));
}
// Check if all fields are SSE (float/double)
var all_sse = true;
var float_count: u32 = 0;
var double_count: u32 = 0;
for (field_types_in_eb) |ft| {
if (ft == .f32) {
float_count += 1;
} else if (ft == .f64) {
double_count += 1;
} else {
all_sse = false;
break;
}
}
if (all_sse) {
// SSE class: return appropriate float type
if (double_count > 0 and float_count == 0) {
if (double_count == 1) return c.LLVMDoubleTypeInContext(self.context);
// Multiple doubles shouldn't fit in one eightbyte (double = 8 bytes)
return c.LLVMDoubleTypeInContext(self.context);
}
if (float_count > 0 and double_count == 0) {
if (float_count == 1) return c.LLVMFloatTypeInContext(self.context);
// 2 floats = 8 bytes, fits in one eightbyte
return c.LLVMArrayType2(c.LLVMFloatTypeInContext(self.context), @intCast(float_count));
}
// Mixed float/double in one eightbyte shouldn't happen (float=4, double=8)
// but fall through to integer just in case
}
// INTEGER class: coerce to integer matching the byte size
return c.LLVMIntTypeInContext(self.context, @intCast(byte_size * 8));
}
/// Windows x64 ABI: struct parameter lowering.
/// Only structs of exactly 1, 2, 4, or 8 bytes are passed in a register.
/// Everything else is passed by pointer (handled by LLVM backend).
fn win64ParamABI(self: *CodeGen, info: StructInfo) c.LLVMTypeRef {
const data_layout = c.LLVMGetModuleDataLayout(self.module);
const size = c.LLVMStoreSizeOfType(data_layout, info.llvm_type);
// Windows x64: only power-of-2 sizes ≤ 8 passed in register
if (size == 1 or size == 2 or size == 4 or size == 8) {
return c.LLVMIntTypeInContext(self.context, @intCast(size * 8));
}
// All other sizes: passed by pointer (LLVM handles byval)
return info.llvm_type;
}
/// For foreign functions returning structs, apply the same ABI lowering as parameters.
/// The rules for return values match parameter rules on both AArch64 and x86-64 SysV
/// for small structs (≤ 16 bytes). Larger structs use sret (handled by LLVM).
fn getForeignReturnABIType(self: *CodeGen, sx_ty: Type) c.LLVMTypeRef {
// Reuse the same classification as parameters — the rules are identical
// for small struct returns on both AArch64 and x86-64 SysV.
return self.getForeignParamABIType(sx_ty);
}
/// Convert a struct value to its C ABI representation for a foreign call.
/// Stores the struct to memory, then loads as the ABI type.
fn convertStructToABI(self: *CodeGen, struct_val: c.LLVMValueRef, struct_ty: c.LLVMTypeRef, abi_ty: c.LLVMTypeRef) c.LLVMValueRef {
@@ -5894,11 +6113,13 @@ pub const CodeGen = struct {
std.debug.print("{s}\n", .{ir[0..len]});
}
pub fn emitObject(self: *CodeGen, output_path: [*:0]const u8) !void {
fn emitToFile(self: *CodeGen, output_path: [*:0]const u8, file_type: c.LLVMCodeGenFileType) !void {
llvm.initAllTargets();
const triple = c.LLVMGetDefaultTargetTriple();
defer c.LLVMDisposeMessage(triple);
const cfg = self.target_config;
const triple_owned = cfg.triple == null;
const triple = cfg.triple orelse c.LLVMGetDefaultTargetTriple();
defer if (triple_owned) c.LLVMDisposeMessage(@constCast(triple));
var target: c.LLVMTargetRef = null;
var err_msg: [*c]u8 = null;
@@ -5912,9 +6133,9 @@ pub const CodeGen = struct {
const tm = c.LLVMCreateTargetMachine(
target,
triple,
"generic",
"",
c.LLVMCodeGenLevelDefault,
cfg.getCpu(),
cfg.getFeatures(),
cfg.opt_level.toLLVM(),
c.LLVMRelocPIC,
c.LLVMCodeModelDefault,
);
@@ -5923,24 +6144,58 @@ pub const CodeGen = struct {
c.LLVMSetTarget(self.module, triple);
var err_msg2: [*c]u8 = null;
if (c.LLVMTargetMachineEmitToFile(tm, self.module, output_path, c.LLVMObjectFile, &err_msg2) != 0) {
if (c.LLVMTargetMachineEmitToFile(tm, self.module, output_path, file_type, &err_msg2) != 0) {
defer c.LLVMDisposeMessage(err_msg2);
const msg = std.mem.span(err_msg2);
return self.emitErrorFmt("failed to emit object file: {s}", .{msg});
return self.emitErrorFmt("failed to emit file: {s}", .{msg});
}
}
pub fn link(allocator: std.mem.Allocator, io: std.Io, output_obj: []const u8, output_bin: []const u8, libraries: []const []const u8) !void {
var argv = std.ArrayList([]const u8).empty;
try argv.appendSlice(allocator, &.{ "cc", output_obj, "-o", output_bin });
pub fn emitObject(self: *CodeGen, output_path: [*:0]const u8) !void {
return self.emitToFile(output_path, c.LLVMObjectFile);
}
if (libraries.len > 0) {
// Add Homebrew library path on macOS
try argv.append(allocator, "-L/opt/homebrew/lib");
pub fn emitAssembly(self: *CodeGen, output_path: [*:0]const u8) !void {
return self.emitToFile(output_path, c.LLVMAssemblyFile);
}
pub fn link(allocator: std.mem.Allocator, io: std.Io, output_obj: []const u8, output_bin: []const u8, libraries: []const []const u8, target_config: TargetConfig) !void {
var argv = std.ArrayList([]const u8).empty;
if (target_config.isWindows()) {
// Windows: MSVC-style linker flags
const linker = target_config.linker orelse "link.exe";
try argv.appendSlice(allocator, &.{ linker, output_obj });
try argv.append(allocator, try std.fmt.allocPrint(allocator, "/OUT:{s}", .{output_bin}));
for (target_config.lib_paths) |lp| {
try argv.append(allocator, try std.fmt.allocPrint(allocator, "/LIBPATH:{s}", .{lp}));
}
for (libraries) |lib| {
try argv.append(allocator, try std.fmt.allocPrint(allocator, "{s}.lib", .{lib}));
}
} else {
// Unix: cc-style linker flags
try argv.appendSlice(allocator, &.{ target_config.getLinker(), output_obj, "-o", output_bin });
if (target_config.sysroot) |sr| {
try argv.append(allocator, try std.fmt.allocPrint(allocator, "--sysroot={s}", .{sr}));
}
// User-supplied library paths first
for (target_config.lib_paths) |lp| {
try argv.append(allocator, try std.fmt.allocPrint(allocator, "-L{s}", .{lp}));
}
// Auto-detect host OS library paths when linking foreign libraries
if (libraries.len > 0 and target_config.triple == null) {
for (host_lib_paths) |path| {
try argv.append(allocator, try std.fmt.allocPrint(allocator, "-L{s}", .{path}));
}
}
for (libraries) |lib| {
const flag = try std.fmt.allocPrint(allocator, "-l{s}", .{lib});
try argv.append(allocator, flag);
try argv.append(allocator, try std.fmt.allocPrint(allocator, "-l{s}", .{lib}));
}
}
@@ -5952,4 +6207,22 @@ pub const CodeGen = struct {
if (result != .exited) return error.LinkError;
if (result.exited != 0) return error.LinkError;
}
/// Common library paths for the host OS, computed at comptime.
const host_lib_paths = blk: {
const builtin = @import("builtin");
var paths: []const []const u8 = &.{};
if (builtin.os.tag == .macos) {
if (builtin.cpu.arch == .aarch64) {
// Apple Silicon Homebrew
paths = &.{ "/opt/homebrew/lib", "/usr/local/lib" };
} else {
// Intel Mac Homebrew
paths = &.{"/usr/local/lib"};
}
} else if (builtin.os.tag == .linux) {
paths = &.{ "/usr/local/lib", "/usr/lib" };
}
break :blk paths;
};
};