comptime format

This commit is contained in:
agra
2026-02-18 18:57:51 +02:00
parent 383f09a305
commit fbf8a62362
5 changed files with 435 additions and 29 deletions

View File

@@ -131,6 +131,8 @@ pub const CodeGen = struct {
scope_stack: std.ArrayList(Scope),
// Compile-time globals: maps name to global variable info for #run results
comptime_globals: std.StringHashMap(ComptimeGlobal),
// Local compile-time constant values (for :: decls with known values)
local_comptime_constants: std.StringHashMap(comptime_mod.Value),
// Top-level #run expressions for side effects only
comptime_side_effects: std.ArrayList(*Node),
// Generic function templates: maps name to AST for deferred monomorphization
@@ -384,6 +386,7 @@ pub const CodeGen = struct {
.current_function = null,
.scope_stack = std.ArrayList(Scope).empty,
.comptime_globals = std.StringHashMap(ComptimeGlobal).init(allocator),
.local_comptime_constants = std.StringHashMap(comptime_mod.Value).init(allocator),
.comptime_side_effects = std.ArrayList(*Node).empty,
.generic_templates = std.StringHashMap(ast.FnDecl).init(allocator),
.generic_instances = std.StringHashMap(c.LLVMValueRef).init(allocator),
@@ -1256,6 +1259,94 @@ pub const CodeGen = struct {
};
}
/// Try to evaluate a :: call expression entirely at compile time.
/// Works for any function where all args are comptime-known.
/// Returns the result string if successful, null to fall through to runtime codegen.
fn tryComptimeCallEval(self: *CodeGen, cd: ast.ConstDecl) ?comptime_mod.Value {
const call_node = cd.value.data.call;
// Resolve callee name
const callee_name = if (call_node.callee.data == .identifier)
call_node.callee.data.identifier.name
else if (call_node.callee.data == .field_access) blk: {
const fa = call_node.callee.data.field_access;
if (fa.object.data == .identifier) {
const qualified = std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ fa.object.data.identifier.name, fa.field }) catch return null;
break :blk qualified;
}
break :blk @as(?[]const u8, null);
} else null;
const cn = callee_name orelse return null;
// Look up the function — either generic template or regular fn_decl
const fd = self.findFnDecl(cn) orelse return null;
// Resolve all args to comptime values
var arg_values = self.allocator.alloc(comptime_mod.Value, call_node.args.len) catch return null;
for (call_node.args, 0..) |arg, i| {
arg_values[i] = self.resolveComptimeArg(arg) orelse return null;
}
// Set up VM and push all args onto the stack
var vm = comptime_mod.VM.init(self.allocator, if (self.sema_result) |sr| sr else null, self.root_decls, self);
for (arg_values) |val| {
vm.push(val) catch return null;
}
// Compile and invoke the function — the VM handles #insert, variadics, etc.
vm.compileFunctionAndInvoke(cn, fd, @intCast(arg_values.len)) catch return null;
// Run the VM to completion
const result = vm.run() catch return null;
return result;
}
/// Find a function declaration by name in generic_templates or root_decls.
fn findFnDecl(self: *CodeGen, name: []const u8) ?ast.FnDecl {
// Check generic templates first
if (self.generic_templates.get(name)) |fd| return fd;
// Search root_decls
for (self.root_decls) |decl| {
switch (decl.data) {
.fn_decl => |fd| {
if (std.mem.eql(u8, fd.name, name)) return fd;
},
.namespace_decl => |ns| {
for (ns.decls) |d| {
if (d.data == .fn_decl and std.mem.eql(u8, d.data.fn_decl.name, name))
return d.data.fn_decl;
}
},
else => {},
}
}
return null;
}
/// Resolve an AST node to a comptime Value using local_comptime_constants.
/// Handles literals, identifiers, and field accesses like `body.len`.
fn resolveComptimeArg(self: *CodeGen, node: *Node) ?comptime_mod.Value {
return switch (node.data) {
.string_literal => |sl| .{ .string_val = if (sl.is_raw) sl.raw else unescape.unescapeString(self.allocator, sl.raw) catch return null },
.int_literal => |il| .{ .int_val = il.value },
.identifier => |id| self.local_comptime_constants.get(id.name),
.field_access => |fa| {
const base = self.resolveComptimeArg(fa.object) orelse return null;
if (std.mem.eql(u8, fa.field, "len")) {
if (base == .string_val) {
return .{ .int_val = @intCast(base.string_val.len) };
}
if (base == .array_val) {
return .{ .int_val = @intCast(base.array_val.elements.len) };
}
}
return null;
},
else => null,
};
}
/// Substitute comptime param identifiers in an AST expression with their literal nodes.
/// Used before comptimeEval in #insert to resolve comptime function params.
fn substituteComptimeNodes(self: *CodeGen, node: *Node) !*Node {
@@ -1325,7 +1416,7 @@ pub const CodeGen = struct {
.void_val => self.constInt32(0),
.pointer_val => c.LLVMConstNull(self.ptrType()),
.null_val => c.LLVMConstNull(self.ptrType()),
.struct_val, .array_val, .type_val, .function_val, .byte_ptr_val, .union_val => unreachable,
.struct_val, .array_val, .type_val, .function_val, .byte_ptr_val, .union_val, .any_val => unreachable,
};
}
@@ -2849,6 +2940,28 @@ pub const CodeGen = struct {
return null;
}
// Try comptime evaluation for :: call expressions (all args must be comptime-known)
if (cd.value.data == .call) {
if (self.tryComptimeCallEval(cd)) |result| {
if (result == .string_val) {
const llvm_val = self.comptimeValueToLLVM(result, .string_type);
const llvm_ty = self.getStringStructType();
const alloca = try self.buildNamedAlloca(llvm_ty, cd.name);
_ = c.LLVMBuildStore(self.builder, llvm_val, alloca);
try self.registerVariable(cd.name, alloca, .string_type);
try self.local_comptime_constants.put(cd.name, result);
return null;
} else if (result == .int_val) {
const llvm_val = self.constInt64(@bitCast(result.int_val));
const alloca = try self.buildNamedAlloca(self.i64Type(), cd.name);
_ = c.LLVMBuildStore(self.builder, llvm_val, alloca);
try self.registerVariable(cd.name, alloca, Type.s(64));
try self.local_comptime_constants.put(cd.name, result);
return null;
}
}
}
var sx_ty: Type = Type.s(64);
if (cd.type_annotation) |ta| {
@@ -2894,6 +3007,16 @@ pub const CodeGen = struct {
const alloca = try self.buildNamedAlloca(llvm_ty, cd.name);
_ = c.LLVMBuildStore(self.builder, init_val, alloca);
try self.registerVariable(cd.name, alloca, sx_ty);
// Track comptime value for :: string/int literals (for comptime format evaluation)
if (cd.value.data == .string_literal) {
const sl = cd.value.data.string_literal;
const content = if (sl.is_raw) sl.raw else unescape.unescapeString(self.allocator, sl.raw) catch return null;
try self.local_comptime_constants.put(cd.name, .{ .string_val = content });
} else if (cd.value.data == .int_literal) {
try self.local_comptime_constants.put(cd.name, .{ .int_val = cd.value.data.int_literal.value });
}
return null;
}

View File

@@ -23,8 +23,14 @@ pub const Value = union(enum) {
pointer_val: PointerValue,
byte_ptr_val: BytePtr,
union_val: UnionValue,
any_val: AnyValue,
null_val: void,
pub const AnyValue = struct {
tag: i64, // matches ANY_TAG_* constants from codegen
value: *Value, // the inner value (heap-allocated)
};
pub const PointerValue = struct {
target: [*]Value,
};
@@ -149,9 +155,27 @@ pub const Value = union(enum) {
try buf.append(allocator, '}');
return buf.items;
},
.any_val => |v| v.value.format(allocator),
.null_val => allocator.dupe(u8, "null"),
};
}
/// Box a value as an Any with the appropriate tag.
pub fn boxAsAny(self: Value, allocator: std.mem.Allocator) !Value {
const tag: i64 = switch (self) {
.void_val => 0,
.bool_val => 1,
.int_val => 3,
.float32_val => 4,
.float_val => 5,
.string_val => 6,
.type_val => 10,
else => 0,
};
const heap_val = try allocator.create(Value);
heap_val.* = self;
return .{ .any_val = .{ .tag = tag, .value = heap_val } };
}
};
/// Bytecode instruction for the comptime VM.
@@ -234,6 +258,12 @@ pub const Instruction = union(enum) {
concat,
format_to_string, // convert top-of-stack value to string representation
// Any
unwrap_any, // pop any_val, push inner value (no-op if not any_val)
// Code insertion
eval_insert: InsertInfo, // pop string, parse as code, compile + execute inline
// Unions
make_union: UnionMake,
get_union_field: UnionFieldAccess,
@@ -243,6 +273,7 @@ pub const Instruction = union(enum) {
pub const BuiltinCall = struct { id: BuiltinId, arg_count: u8 };
pub const StructMake = struct { type_name: []const u8, field_count: u16, field_names: []const []const u8 };
pub const FnRef = struct { name: []const u8, param_count: u8 };
pub const InsertInfo = struct { local_names: []const []const u8 };
pub const UnionMake = struct { type_name: []const u8, word_count: u16 };
pub const UnionFieldAccess = struct { word_offset: u16, field_type: UnionFieldType };
};
@@ -251,7 +282,7 @@ pub const UnionFieldType = enum { int, float, bool_k, pointer, string };
pub const ValueKind = enum { int, float, f32_k, bool_k, string };
pub const BuiltinId = enum { print, out, sqrt, size_of, cast, malloc, free, memcpy, memset };
pub const BuiltinId = enum { print, out, sqrt, size_of, cast, malloc, free, memcpy, memset, type_of };
/// A compiled function or expression — a flat sequence of instructions.
pub const Chunk = struct {
@@ -266,6 +297,7 @@ const Node = ast.Node;
const sema = @import("sema.zig");
const codegen_mod = @import("codegen.zig");
const llvm = @import("llvm_api.zig");
const Parser = @import("parser.zig").Parser;
/// Compute byte size of a Type. Uses LLVM data layout via codegen if available,
/// otherwise falls back to known sizes for primitive types.
@@ -359,7 +391,7 @@ pub const Compiler = struct {
};
}
fn emit(self: *Compiler, instruction: Instruction) !void {
pub fn emit(self: *Compiler, instruction: Instruction) !void {
try self.instructions.append(self.allocator, instruction);
}
@@ -517,7 +549,7 @@ pub const Compiler = struct {
try self.emit(.{ .push_string = idx });
}
fn compileNode(self: *Compiler, node: *Node) anyerror!void {
pub fn compileNode(self: *Compiler, node: *Node) anyerror!void {
switch (node.data) {
.int_literal => |lit| {
try self.emit(.{ .push_int = lit.value });
@@ -635,7 +667,7 @@ pub const Compiler = struct {
switch (unop.op) {
.negate => try self.emit(.negate),
.not => try self.emit(.not),
.xx => {}, // cast — handle later
.xx => try self.emit(.unwrap_any), // autocast — unwraps any_val to inner value
.address_of => unreachable, // handled above
}
}
@@ -973,7 +1005,23 @@ pub const Compiler = struct {
},
.defer_stmt => {}, // defer not meaningful in comptime
.push_stmt => {}, // push not meaningful in comptime
.insert_expr => {}, // handled by codegen, not VM
.insert_expr => |ins| {
// Compile the inner expression (evaluates to a string at runtime).
// Then emit eval_insert which at VM execution time will:
// 1. Pop the string result
// 2. Parse it as code
// 3. Compile a sub-chunk (with current locals)
// 4. Execute inline in the current frame
try self.compileNode(ins.expr);
// Snapshot current local names so the VM can set up the sub-compiler
var names = std.ArrayList([]const u8).empty;
for (self.locals.items) |local| {
try names.append(self.allocator, local.name);
}
try self.emit(.{ .eval_insert = .{
.local_names = try names.toOwnedSlice(self.allocator),
} });
},
else => {
return error.UnsupportedExpression;
},
@@ -987,13 +1035,21 @@ pub const VM = struct {
sp: u16 = 0,
frames: [64]CallFrame = undefined,
fp: u8 = 0,
functions: std.StringHashMap(Chunk),
insert_stack: [16]InsertSave = undefined,
insert_sp: u8 = 0,
insert_locals: std.ArrayList(Compiler.Local) = std.ArrayList(Compiler.Local).empty,
functions: std.StringHashMap(*Chunk),
globals: std.StringHashMap(Value),
allocator: std.mem.Allocator,
sema_result: ?*const sema.SemaResult,
root_decls: []const *Node,
codegen: ?*codegen_mod.CodeGen,
pub const InsertSave = struct {
chunk: *const Chunk,
ip: u32,
};
pub const CallFrame = struct {
chunk: *const Chunk,
ip: u32,
@@ -1002,7 +1058,7 @@ pub const VM = struct {
pub fn init(allocator: std.mem.Allocator, sema_result: ?*const sema.SemaResult, root_decls: []const *Node, cg: ?*codegen_mod.CodeGen) VM {
return .{
.functions = std.StringHashMap(Chunk).init(allocator),
.functions = std.StringHashMap(*Chunk).init(allocator),
.globals = std.StringHashMap(Value).init(allocator),
.allocator = allocator,
.sema_result = sema_result,
@@ -1011,7 +1067,7 @@ pub const VM = struct {
};
}
fn push(self: *VM, value: Value) !void {
pub fn push(self: *VM, value: Value) !void {
if (self.sp >= 256) return error.StackOverflow;
self.stack[self.sp] = value;
self.sp += 1;
@@ -1036,10 +1092,18 @@ pub const VM = struct {
return self.run();
}
fn run(self: *VM) !Value {
pub fn run(self: *VM) !Value {
while (true) {
const frame = &self.frames[self.fp - 1];
if (frame.ip >= frame.chunk.code.len) {
// If we're inside an #insert, restore parent chunk and continue
if (self.insert_sp > 0) {
self.insert_sp -= 1;
const saved = self.insert_stack[self.insert_sp];
frame.chunk = saved.chunk;
frame.ip = saved.ip;
continue;
}
// End of chunk — return top of stack or void
if (self.sp > frame.base_slot) {
return self.pop();
@@ -1383,6 +1447,67 @@ pub const VM = struct {
try self.push(.{ .string_val = s });
},
// Any
.unwrap_any => {
const val = try self.pop();
if (val == .any_val) {
try self.push(val.any_val.value.*);
} else {
try self.push(val); // pass through for non-Any values
}
},
// Code insertion
.eval_insert => |info| {
// Pop the code string (result of evaluating the inner expression)
const code_val = try self.pop();
if (code_val != .string_val) return error.CompileError;
const code_z = self.allocator.dupeZ(u8, code_val.string_val) catch return error.OutOfMemory;
// Compile with parent's locals + any locals created by previous inserts
var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen);
for (info.local_names) |name| {
compiler.locals.append(self.allocator, .{ .name = name, .depth = 0 }) catch return error.OutOfMemory;
}
for (self.insert_locals.items) |local| {
compiler.locals.append(self.allocator, local) catch return error.OutOfMemory;
}
const pre_local_count = compiler.locals.items.len;
// Parse and compile each statement
var parser = Parser.init(self.allocator, code_z);
while (parser.current.tag != .eof) {
const stmt = parser.parseStmt() catch return error.CompileError;
compiler.compileNode(stmt) catch return error.CompileError;
}
// NO ret — sub-chunk runs inline, ends when ip >= code.len
// Track new locals created by this insert for subsequent inserts
if (compiler.locals.items.len > pre_local_count) {
for (compiler.locals.items[pre_local_count..]) |local| {
self.insert_locals.append(self.allocator, local) catch return error.OutOfMemory;
}
}
const sub_code = compiler.instructions.toOwnedSlice(self.allocator) catch return error.OutOfMemory;
const sub_strings = compiler.strings.toOwnedSlice(self.allocator) catch return error.OutOfMemory;
const sub_chunk = self.allocator.create(Chunk) catch return error.OutOfMemory;
sub_chunk.* = .{
.code = sub_code,
.strings = sub_strings,
.local_count = @intCast(compiler.locals.items.len),
.name = "insert",
};
// Save parent chunk/ip on insert stack, swap to sub-chunk
if (self.insert_sp >= 16) return error.StackOverflow;
self.insert_stack[self.insert_sp] = .{ .chunk = frame.chunk, .ip = frame.ip };
self.insert_sp += 1;
frame.chunk = sub_chunk;
frame.ip = 0;
continue; // re-enter the run loop, now executing sub-chunk
},
// Unions
.make_union => |um| {
const words = try self.allocator.alloc(Value, um.word_count);
@@ -1551,7 +1676,7 @@ pub const VM = struct {
fn callFunction(self: *VM, name: []const u8, arg_count: u8) !void {
// Look up chunk in cache
if (self.functions.getPtr(name)) |ptr| {
if (self.functions.get(name)) |ptr| {
return self.invokeChunk(ptr, arg_count);
}
@@ -1584,6 +1709,8 @@ pub const VM = struct {
self.fp += 1;
}
/// Execute a sub-chunk inline, sharing the current frame's stack base.
/// Used by #insert so generated code can access the caller's locals.
fn callBuiltin(self: *VM, id: BuiltinId, arg_count: u8) !void {
switch (id) {
.out => {
@@ -1744,6 +1871,26 @@ pub const VM = struct {
}
try self.push(.{ .void_val = {} });
},
.type_of => {
// type_of(val) — return the type tag (matching ANY_TAG_* constants)
if (arg_count >= 1) {
const val = try self.pop();
const tag: i64 = switch (val) {
.any_val => |av| av.tag,
.void_val => 0,
.bool_val => 1,
.int_val => 3,
.float32_val => 4,
.float_val => 5,
.string_val => 6,
.type_val => 10,
else => 0,
};
try self.push(.{ .int_val = tag });
} else {
try self.push(.{ .int_val = 0 });
}
},
}
}
@@ -1790,13 +1937,50 @@ pub const VM = struct {
return result;
}
fn compileFunctionAndInvoke(self: *VM, name: []const u8, fd: ast.FnDecl, arg_count: u8) !void {
pub fn compileFunctionAndInvoke(self: *VM, name: []const u8, fd: ast.FnDecl, arg_count: u8) !void {
// Check for variadic parameter and pack extra args into an array
var effective_arg_count = arg_count;
var variadic_idx: ?usize = null;
var fixed_count: u8 = 0;
for (fd.params, 0..) |param, i| {
if (param.is_variadic) {
variadic_idx = i;
break;
}
fixed_count += 1;
}
if (variadic_idx != null and arg_count >= fixed_count) {
// Pop all args from stack (in reverse order)
const total = @as(usize, arg_count);
var all_args = try self.allocator.alloc(Value, total);
var i: usize = total;
while (i > 0) {
i -= 1;
all_args[i] = try self.pop();
}
// Push fixed args back
for (all_args[0..fixed_count]) |arg| {
try self.push(arg);
}
// Box variadic args as any_val and pack into array_val
const variadic_count = total - fixed_count;
var elements = try self.allocator.alloc(Value, variadic_count);
for (0..variadic_count) |vi| {
elements[vi] = try all_args[fixed_count + vi].boxAsAny(self.allocator);
}
try self.push(.{ .array_val = .{ .elements = elements } });
effective_arg_count = fixed_count + 1; // fixed params + 1 array
}
var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen);
const chunk = try compiler.compileFunction(fd);
try self.functions.put(name, chunk);
if (self.functions.getPtr(name)) |ptr| {
return self.invokeChunk(ptr, arg_count);
}
const heap_chunk = try self.allocator.create(Chunk);
heap_chunk.* = chunk;
try self.functions.put(name, heap_chunk);
return self.invokeChunk(heap_chunk, effective_arg_count);
}
fn resolveGlobal(self: *VM, name: []const u8) VMError!Value {
@@ -1859,6 +2043,26 @@ pub const VM = struct {
if (Type.fromName(name)) |ty|
return self.cacheTypeGlobal(name, ty);
// Type category tags for match expressions on Any values
const type_tag: ?i64 = if (std.mem.eql(u8, name, "void")) 0
else if (std.mem.eql(u8, name, "bool")) 1
else if (std.mem.eql(u8, name, "int")) 3
else if (std.mem.eql(u8, name, "float")) 5
else if (std.mem.eql(u8, name, "string")) 6
else if (std.mem.eql(u8, name, "type")) 10
else if (std.mem.eql(u8, name, "struct")) 11
else if (std.mem.eql(u8, name, "enum")) 12
else if (std.mem.eql(u8, name, "vector")) 13
else if (std.mem.eql(u8, name, "array")) 14
else if (std.mem.eql(u8, name, "slice")) 15
else if (std.mem.eql(u8, name, "pointer")) 16
else null;
if (type_tag) |tag| {
const val = Value{ .int_val = tag };
self.globals.put(name, val) catch {};
return val;
}
return error.UndefinedVariable;
}
};