Files
sx/src/ir/inst.zig
agra b5241243e6 ERR/E3.0 (slice 3b): comptime trace resolution
#run failures now print the same `func at file:line:col` trace as
runtime, resolved in-process via the interpreter's IR/source tables.

- Read-side context-split op `.trace_resolve` (mirror of .trace_frame),
  lowered from a name-recognized `__trace_resolve_frame(u64) -> Frame`.
- emit_llvm: inttoptr the operand to *Frame + load (the value
  .trace_frame stamped in).
- interp: unpack (func_id << 32 | span.start); resolve func/file from
  module.functions and line/col via SourceLoc.compute over a new
  source_map (setSourceMap wired at every production interp site).
- trace.sx: frame_at -> u64; to_string routes each frame through
  __trace_resolve_frame, so one source works in both machines.

Compiled path behavior unchanged (243/244/247 identical; it now loads
via the op). New examples/253-comptime-trace.sx exercises the comptime
path. Gates: zig build, zig build test, run_examples.sh -> 291 passed.
2026-06-01 15:33:50 +03:00

580 lines
22 KiB
Zig

const std = @import("std");
const types = @import("types.zig");
const TypeId = types.TypeId;
const StringId = types.StringId;
// ── Handles ─────────────────────────────────────────────────────────────
/// Reference to an SSA value (instruction result).
pub const Ref = enum(u32) {
/// Sentinel for "no value" / unused operand.
none = std.math.maxInt(u32),
_,
pub fn index(self: Ref) u32 {
return @intFromEnum(self);
}
pub fn fromIndex(i: u32) Ref {
return @enumFromInt(i);
}
pub fn isNone(self: Ref) bool {
return self == .none;
}
};
pub const BlockId = enum(u32) {
_,
pub fn index(self: BlockId) u32 {
return @intFromEnum(self);
}
pub fn fromIndex(i: u32) BlockId {
return @enumFromInt(i);
}
};
pub const FuncId = enum(u32) {
_,
pub fn index(self: FuncId) u32 {
return @intFromEnum(self);
}
pub fn fromIndex(i: u32) FuncId {
return @enumFromInt(i);
}
};
pub const GlobalId = enum(u32) {
_,
pub fn index(self: GlobalId) u32 {
return @intFromEnum(self);
}
pub fn fromIndex(i: u32) GlobalId {
return @enumFromInt(i);
}
};
// ── Span ────────────────────────────────────────────────────────────────
pub const Span = struct {
start: u32 = 0,
end: u32 = 0,
};
// ── Instruction ─────────────────────────────────────────────────────────
pub const Inst = struct {
op: Op,
ty: TypeId,
span: Span = .{},
};
// ── Op (tagged union) ───────────────────────────────────────────────────
pub const Op = union(enum) {
// ── Constants ───────────────────────────────────────────────────
const_int: i64,
const_float: f64,
const_bool: bool,
const_string: StringId,
const_null,
const_undef, // `---` undefined initializer
/// ERR E4.1 — `is_comptime()` builtin. The SAME lowered IR is run by both
/// the comptime interpreter and the compiled backend, so this can't fold at
/// lower time: the interp evaluates it to `true`, emit_llvm emits constant
/// `false`. Lets stdlib (`process.exit`, `assert`) take a comptime-only
/// diagnostic branch that dead-codes out of compiled binaries.
is_comptime,
/// ERR E4.1 — `trace.print_interpreter_frames()`. At comptime the interp
/// walks its sx call-frame chain and appends it to the output; in compiled
/// code it's a no-op (only ever reached from a dead `is_comptime()` branch,
/// where there is no interpreter stack to walk).
interp_print_frames,
/// ERR E3.0 slice 3a — a return-trace frame value (`u64`) for the push site.
/// Niladic + span-stamped: it carries NO operands; each backend derives the
/// frame from its own context. `emit_llvm` resolves this instruction's span
/// + the current function → `{file,line,col,func}`, interns a `Frame` global,
/// and yields its address (`ptrtoint`). `interp` yields a packed
/// `(func_id << 32 | span.start)` for the comptime resolver (slice 3b). The
/// result feeds the existing `sx_trace_push(u64)` call.
trace_frame,
/// ERR E3.0 slice 3b — the read-side resolver: a raw trace-buffer `u64` →
/// a `Frame` value. The mirror of `trace_frame`'s context split.
/// `emit_llvm` reinterprets the operand as `*Frame` and loads it (the value
/// `trace_frame` stamped in). `interp` unpacks `(func_id, span.start)` and
/// resolves it via the module's functions + the source map into a `Frame`
/// aggregate. Result type is the `Frame` `TypeId`.
trace_resolve: UnaryOp,
/// Comptime-only Type value. Carried as a `Value.type_tag(TypeId)`
/// in the interpreter. NEVER emitted to LLVM — types are erased
/// after lowering. `emit_llvm` bails loudly if it sees one,
/// surfacing a "Type value reached runtime" diagnostic instead of
/// silently lowering to a stale int.
const_type: TypeId,
// ── Arithmetic ──────────────────────────────────────────────────
add: BinOp,
sub: BinOp,
mul: BinOp,
div: BinOp,
mod: BinOp,
neg: UnaryOp, // unary -x
// ── Bitwise ─────────────────────────────────────────────────────
bit_and: BinOp,
bit_or: BinOp,
bit_xor: BinOp,
bit_not: UnaryOp,
shl: BinOp,
shr: BinOp,
// ── Comparison ──────────────────────────────────────────────────
cmp_eq: BinOp,
cmp_ne: BinOp,
cmp_lt: BinOp,
cmp_le: BinOp,
cmp_gt: BinOp,
cmp_ge: BinOp,
str_eq: BinOp, // string/slice equality via memcmp
str_ne: BinOp, // string/slice inequality via memcmp
// ── Logical ─────────────────────────────────────────────────────
bool_and: BinOp, // short-circuit &&
bool_or: BinOp, // short-circuit ||
bool_not: UnaryOp,
// ── Conversions ─────────────────────────────────────────────────
widen: Conversion, // safe widening (s32 → s64)
narrow: Conversion, // truncation via `xx` (s64 → s32)
bitcast: Conversion, // reinterpret bits
int_to_float: Conversion,
float_to_int: Conversion,
// ── Memory ──────────────────────────────────────────────────────
alloca: TypeId, // stack allocation, result is *T
load: UnaryOp, // load from pointer
store: Store, // store value to pointer
// ── Struct ops ──────────────────────────────────────────────────
struct_init: Aggregate, // construct struct from field values
struct_get: FieldAccess, // read struct field by index
struct_gep: FieldAccess, // get pointer to struct field (GEP)
// ── Enum ops ────────────────────────────────────────────────────
enum_init: EnumInit, // construct enum value (tag + optional payload)
enum_tag: UnaryOp, // extract tag from enum/union
enum_payload: FieldAccess, // extract payload from tagged union
// ── Union ops ───────────────────────────────────────────────────
union_get: FieldAccess, // read union field (reinterpret)
union_gep: FieldAccess, // pointer to union field
// ── Array/Slice ops ─────────────────────────────────────────────
index_get: BinOp, // arr[idx] → value
index_gep: BinOp, // &arr[idx] → pointer
length: UnaryOp, // .len on slice/string/array
data_ptr: UnaryOp, // .ptr on slice/string
subslice: Subslice, // arr[lo..hi]
array_to_slice: UnaryOp, // [N]T → []T
// ── Tuple ops ───────────────────────────────────────────────────
tuple_init: Aggregate, // construct tuple from values
tuple_get: FieldAccess, // read tuple element by index
// ── Optional ops ────────────────────────────────────────────────
optional_wrap: UnaryOp, // T → ?T
optional_unwrap: UnaryOp, // ?T → T (UB if null)
optional_has_value: UnaryOp, // ?T → bool
optional_coalesce: BinOp, // a ?? b
// ── Pointer ops ─────────────────────────────────────────────────
addr_of: UnaryOp, // @x → *T
deref: UnaryOp, // p.* → T
// ── Vector ops ──────────────────────────────────────────────────
vec_splat: UnaryOp, // scalar → vector (broadcast)
vec_extract: BinOp, // vec[idx] → scalar
vec_insert: TriOp, // vec, idx, val → new_vec
// ── Calls ───────────────────────────────────────────────────────
call: Call,
call_indirect: CallIndirect,
call_closure: CallIndirect,
call_builtin: BuiltinCall,
compiler_call: CompilerCall,
/// `#objc_call(ReturnT)(recv, sel, args...)` — dispatched through
/// `objc_msgSend`. emit_llvm.zig synthesizes a per-call-site LLVM
/// function type from the arg/result Refs and reuses a single
/// declared `@objc_msgSend` symbol across all return-type
/// variants. Encoded as its own opcode (instead of `.call` /
/// `.call_indirect`) so the IR doesn't need a separate FuncId
/// per signature shape.
objc_msg_send: ObjcMsgSend,
/// `#jni_call(ReturnT)(env, target, name, sig, args...)` and
/// `#jni_static_call(ReturnT)(env, class, name, sig, args...)`.
/// emit_llvm.zig expands this into the JNI vtable indirection:
/// `(*env)->GetObjectClass` (instance only) → `GetMethodID` /
/// `GetStaticMethodID` → `Call<Type>Method` / `CallStatic<Type>Method`.
/// Method-ID caching across call sites is added in step 1.17.
jni_msg_send: JniMsgSend,
// ── Closure creation ────────────────────────────────────────────
closure_create: ClosureCreate,
// ── Globals ─────────────────────────────────────────────────────
global_get: GlobalId,
global_addr: GlobalId, // address of a global (pointer, not load)
global_set: GlobalSet,
func_ref: FuncId, // reference to a function (for function pointers)
// ── Block params (SSA phi alternative) ──────────────────────────
block_param: BlockParam,
// ── Any type ────────────────────────────────────────────────────
box_any: BoxAny, // T → Any (erase type)
unbox_any: UnaryOp, // Any → T (restore type)
// ── Reflection ─────────────────────────────────────────────────
field_name_get: FieldReflect, // field_name(T, i) → string (runtime index)
field_value_get: FieldReflect, // field_value(s, i) → Any (runtime struct + index)
error_tag_name_get: UnaryOp, // error_tag_name(e) → string (runtime tag id → name, via the always-linked tag-name table)
// ── Terminators ─────────────────────────────────────────────────
br: Branch,
cond_br: CondBranch,
switch_br: SwitchBranch,
ret: UnaryOp,
ret_void,
@"unreachable",
// ── Misc ────────────────────────────────────────────────────────
/// No-op placeholder for unlowered AST nodes.
placeholder: StringId, // name of the unlowered construct
};
// ── Operand structs ─────────────────────────────────────────────────────
pub const UnaryOp = struct {
operand: Ref,
};
pub const BinOp = struct {
lhs: Ref,
rhs: Ref,
};
pub const TriOp = struct {
a: Ref,
b: Ref,
c: Ref,
};
pub const Store = struct {
ptr: Ref,
val: Ref,
/// Declared type of the value being stored. Threaded through so the
/// interp's raw-pointer store knows the destination byte width — a
/// `.int` Value alone is ambiguous (s8/s16/s32/s64/u*/usize/pointer
/// all flatten to `.int`). The LLVM emitter ignores this (LLVM knows
/// the width from the SSA value's type already).
val_ty: TypeId = .void,
};
pub const Conversion = struct {
operand: Ref,
from: TypeId,
to: TypeId,
};
pub const FieldAccess = struct {
base: Ref,
field_index: u32,
/// The IR type of the aggregate being accessed (struct, union, etc.).
/// Used by the LLVM emitter to resolve the correct type for GEP operations
/// without guessing from LLVM value chains.
base_type: ?TypeId = null,
};
pub const Aggregate = struct {
fields: []const Ref,
};
pub const EnumInit = struct {
tag: u32,
payload: Ref, // Ref.none if no payload
};
pub const Subslice = struct {
base: Ref,
lo: Ref,
hi: Ref,
};
pub const Call = struct {
callee: FuncId,
args: []const Ref,
};
pub const CallIndirect = struct {
callee: Ref,
args: []const Ref,
};
/// `#objc_call` dispatch through `objc_msgSend`. emit_llvm reads
/// `recv`/`sel`/each arg's IR type to build the per-call-site LLVM
/// function type; the instruction's own `ty` field (`Inst.ty`) is the
/// Obj-C return type. One declared `@objc_msgSend` symbol is shared
/// across every distinct signature shape.
pub const ObjcMsgSend = struct {
recv: Ref,
sel: Ref,
args: []const Ref, // additional args after recv + sel
};
/// JNI dispatch payload. `env` is `JNIEnv*` (typed as ptr); `target`
/// is a `jobject` for instance calls and a `jclass` for static calls.
/// `name` and `sig` are pointers to NUL-terminated bytes (typically
/// `[*]u8` from a string-literal `.ptr`). When the source-level
/// `name` and `sig` are string literals, `cache_key` carries their
/// content so emit_llvm.zig can intern a shared `jclass GlobalRef` +
/// `jmethodID` slot keyed on `(name, sig)`; otherwise the lookup
/// stays uncached. The dispatch sequence is expanded in
/// emit_llvm.zig — see `Inst.jni_msg_send`.
pub const JniMsgSend = struct {
env: Ref,
target: Ref,
name: Ref,
sig: Ref,
args: []const Ref,
is_static: bool,
/// `true` when this is a `super.method(args)` dispatch from inside a
/// `#jni_main` Activity method body — lowers to `CallNonvirtual<T>Method`
/// against `parent_class_path`. Mutually exclusive with `is_static`.
is_nonvirtual: bool = false,
/// `true` when this is a `Foo.new(args)` constructor dispatch — lowers
/// to `FindClass(parent_class_path) + GetMethodID("<init>", sig) +
/// NewObject(env, clazz, mid, args...)`. Returns a fresh jobject.
/// Mutually exclusive with the other dispatch flags.
is_constructor: bool = false,
/// Foreign path of the parent class (e.g. `android/app/Activity`) when
/// `is_nonvirtual` is true, OR of the class being constructed when
/// `is_constructor` is true. emit_llvm uses `FindClass` to materialise
/// the jclass at the call site (per-call; caching is follow-up).
parent_class_path: ?[]const u8 = null,
cache_key: ?CacheKey = null,
};
pub const CacheKey = struct {
name_str: []const u8,
sig_str: []const u8,
};
pub const BuiltinCall = struct {
builtin: BuiltinId,
args: []const Ref,
};
pub const BuiltinId = enum(u16) {
out,
sqrt,
sin,
cos,
floor,
size_of,
align_of,
cast,
type_of,
alloc,
dealloc,
// Comptime-only reflection builtins. Today's `tryLowerReflectionCall`
// folds these at lower time when the type argument is statically
// resolvable — emits a `const_string` / `const_bool` directly.
// These BuiltinId entries are the FALLBACK path: when the arg is
// a runtime/interp-time value (e.g. `args[i]` inside a builder
// body, carrying a `.type_tag(TypeId)` only at interp execution),
// lowering emits a `builtin_call` to one of these. The interp
// implements them; emit_llvm bails (Type is comptime-only).
type_name,
type_eq,
has_impl,
};
pub const CompilerCall = struct {
name: u32, // StringPool id for qualified name (e.g. "BuildOptions.add_link_flag")
args: []const Ref,
};
pub const ClosureCreate = struct {
func: FuncId, // trampoline function
env: Ref, // allocated env pointer (or Ref.none for no captures)
};
pub const GlobalSet = struct {
global: GlobalId,
value: Ref,
};
pub const BlockParam = struct {
block: BlockId,
param_index: u32,
};
pub const BoxAny = struct {
operand: Ref,
source_type: TypeId,
};
pub const FieldReflect = struct {
base: Ref, // struct value (for field_value_get) or Ref.none (for field_name_get)
index: Ref, // runtime field index
struct_type: TypeId, // compile-time resolved struct type
};
pub const Branch = struct {
target: BlockId,
args: []const Ref, // block param values
};
pub const CondBranch = struct {
cond: Ref,
then_target: BlockId,
then_args: []const Ref,
else_target: BlockId,
else_args: []const Ref,
};
pub const SwitchBranch = struct {
operand: Ref,
cases: []const Case,
default: BlockId,
default_args: []const Ref,
pub const Case = struct {
value: i64,
target: BlockId,
args: []const Ref,
};
};
// ── Block ───────────────────────────────────────────────────────────────
pub const Block = struct {
name: StringId,
params: []const TypeId, // block parameter types (SSA phi alternative)
insts: std.ArrayList(Inst),
first_ref: u32 = 0, // ref index of the first instruction in this block
pub fn init(name: StringId, params: []const TypeId) Block {
return .{
.name = name,
.params = params,
.insts = std.ArrayList(Inst).empty,
};
}
pub fn deinit(self: *Block, alloc: std.mem.Allocator) void {
self.insts.deinit(alloc);
}
};
// ── Function ────────────────────────────────────────────────────────────
pub const Function = struct {
name: StringId,
params: []const Param,
ret: TypeId,
blocks: std.ArrayList(Block),
is_extern: bool = false,
is_comptime: bool = false,
linkage: Linkage = .internal,
call_conv: CallingConvention = .default,
source_file: ?[]const u8 = null,
/// Variadic tail at the IR signature level. Only `#foreign` decls reach
/// IR with this set — sx-side `..T` params are slice-packed before
/// lowering, so anything that survives is the C calling convention's
/// `...`. emit_llvm passes `is_var_arg=1` to `LLVMFunctionType`; call
/// sites apply the standard default argument promotions (s8/s16/bool →
/// s32, f32 → f64) to extras past the fixed param count.
is_variadic: bool = false,
/// True if `params[0]` is the synthetic `__sx_ctx: *Context`
/// parameter that every default-conv sx function receives. Callers
/// read this flag to decide whether to prepend their current
/// `__sx_ctx` value to the args of a call. Foreign decls and
/// `callconv(.c)` functions have it false.
has_implicit_ctx: bool = false,
pub const Param = struct {
name: StringId,
ty: TypeId,
};
pub const Linkage = enum {
internal,
external,
private,
};
pub const CallingConvention = types.TypeInfo.CallConv;
pub fn init(name: StringId, params: []const Param, ret: TypeId) Function {
return .{
.name = name,
.params = params,
.ret = ret,
.blocks = std.ArrayList(Block).empty,
};
}
pub fn deinit(self: *Function, alloc: std.mem.Allocator) void {
for (self.blocks.items) |*block| {
block.deinit(alloc);
}
self.blocks.deinit(alloc);
}
};
// ── Global ──────────────────────────────────────────────────────────────
pub const Global = struct {
name: StringId,
ty: TypeId,
init_val: ?ConstantValue = null,
is_extern: bool = false,
is_const: bool = false,
/// Thread-local storage. `global_get` / `global_set` emit normal LLVM
/// load/store instructions; LLVM handles the per-thread access through
/// the `thread_local` attribute on the global.
is_thread_local: bool = false,
/// For comptime globals: the function to interpret to get the init value.
comptime_func: ?FuncId = null,
};
// ── ConstantValue ───────────────────────────────────────────────────────
pub const ConstantValue = union(enum) {
int: i64,
float: f64,
boolean: bool,
string: StringId,
null_val,
undef,
zeroinit,
aggregate: []const ConstantValue,
/// Vtable constant: struct of function pointers, used for protocol vtable globals.
vtable: []const FuncId,
/// Function pointer leaf, for static initializers that include
/// function addresses inside nested aggregates (e.g. the inline
/// Allocator value `{ ctx, alloc_fn, dealloc_fn }` for the
/// process-wide default Context).
func_ref: FuncId,
};