const std = @import("std"); const types = @import("types.zig"); const TypeId = types.TypeId; const StringId = types.StringId; // ── Handles ───────────────────────────────────────────────────────────── /// Reference to an SSA value (instruction result). pub const Ref = enum(u32) { /// Sentinel for "no value" / unused operand. none = std.math.maxInt(u32), _, pub fn index(self: Ref) u32 { return @intFromEnum(self); } pub fn fromIndex(i: u32) Ref { return @enumFromInt(i); } pub fn isNone(self: Ref) bool { return self == .none; } }; pub const BlockId = enum(u32) { _, pub fn index(self: BlockId) u32 { return @intFromEnum(self); } pub fn fromIndex(i: u32) BlockId { return @enumFromInt(i); } }; pub const FuncId = enum(u32) { _, pub fn index(self: FuncId) u32 { return @intFromEnum(self); } pub fn fromIndex(i: u32) FuncId { return @enumFromInt(i); } }; pub const GlobalId = enum(u32) { _, pub fn index(self: GlobalId) u32 { return @intFromEnum(self); } pub fn fromIndex(i: u32) GlobalId { return @enumFromInt(i); } }; // ── Span ──────────────────────────────────────────────────────────────── pub const Span = struct { start: u32 = 0, end: u32 = 0, }; // ── Instruction ───────────────────────────────────────────────────────── pub const Inst = struct { op: Op, ty: TypeId, span: Span = .{}, }; // ── Op (tagged union) ─────────────────────────────────────────────────── pub const Op = union(enum) { // ── Constants ─────────────────────────────────────────────────── const_int: i64, const_float: f64, const_bool: bool, const_string: StringId, const_null, const_undef, // `---` undefined initializer /// ERR E4.1 — `is_comptime()` builtin. The SAME lowered IR is run by both /// the comptime interpreter and the compiled backend, so this can't fold at /// lower time: the interp evaluates it to `true`, emit_llvm emits constant /// `false`. Lets stdlib (`process.exit`, `assert`) take a comptime-only /// diagnostic branch that dead-codes out of compiled binaries. is_comptime, /// Comptime-only Type value. Carried as a `Value.type_tag(TypeId)` /// in the interpreter. NEVER emitted to LLVM — types are erased /// after lowering. `emit_llvm` bails loudly if it sees one, /// surfacing a "Type value reached runtime" diagnostic instead of /// silently lowering to a stale int. const_type: TypeId, // ── Arithmetic ────────────────────────────────────────────────── add: BinOp, sub: BinOp, mul: BinOp, div: BinOp, mod: BinOp, neg: UnaryOp, // unary -x // ── Bitwise ───────────────────────────────────────────────────── bit_and: BinOp, bit_or: BinOp, bit_xor: BinOp, bit_not: UnaryOp, shl: BinOp, shr: BinOp, // ── Comparison ────────────────────────────────────────────────── cmp_eq: BinOp, cmp_ne: BinOp, cmp_lt: BinOp, cmp_le: BinOp, cmp_gt: BinOp, cmp_ge: BinOp, str_eq: BinOp, // string/slice equality via memcmp str_ne: BinOp, // string/slice inequality via memcmp // ── Logical ───────────────────────────────────────────────────── bool_and: BinOp, // short-circuit && bool_or: BinOp, // short-circuit || bool_not: UnaryOp, // ── Conversions ───────────────────────────────────────────────── widen: Conversion, // safe widening (s32 → s64) narrow: Conversion, // truncation via `xx` (s64 → s32) bitcast: Conversion, // reinterpret bits int_to_float: Conversion, float_to_int: Conversion, // ── Memory ────────────────────────────────────────────────────── alloca: TypeId, // stack allocation, result is *T load: UnaryOp, // load from pointer store: Store, // store value to pointer // ── Struct ops ────────────────────────────────────────────────── struct_init: Aggregate, // construct struct from field values struct_get: FieldAccess, // read struct field by index struct_gep: FieldAccess, // get pointer to struct field (GEP) // ── Enum ops ──────────────────────────────────────────────────── enum_init: EnumInit, // construct enum value (tag + optional payload) enum_tag: UnaryOp, // extract tag from enum/union enum_payload: FieldAccess, // extract payload from tagged union // ── Union ops ─────────────────────────────────────────────────── union_get: FieldAccess, // read union field (reinterpret) union_gep: FieldAccess, // pointer to union field // ── Array/Slice ops ───────────────────────────────────────────── index_get: BinOp, // arr[idx] → value index_gep: BinOp, // &arr[idx] → pointer length: UnaryOp, // .len on slice/string/array data_ptr: UnaryOp, // .ptr on slice/string subslice: Subslice, // arr[lo..hi] array_to_slice: UnaryOp, // [N]T → []T // ── Tuple ops ─────────────────────────────────────────────────── tuple_init: Aggregate, // construct tuple from values tuple_get: FieldAccess, // read tuple element by index // ── Optional ops ──────────────────────────────────────────────── optional_wrap: UnaryOp, // T → ?T optional_unwrap: UnaryOp, // ?T → T (UB if null) optional_has_value: UnaryOp, // ?T → bool optional_coalesce: BinOp, // a ?? b // ── Pointer ops ───────────────────────────────────────────────── addr_of: UnaryOp, // @x → *T deref: UnaryOp, // p.* → T // ── Vector ops ────────────────────────────────────────────────── vec_splat: UnaryOp, // scalar → vector (broadcast) vec_extract: BinOp, // vec[idx] → scalar vec_insert: TriOp, // vec, idx, val → new_vec // ── Calls ─────────────────────────────────────────────────────── call: Call, call_indirect: CallIndirect, call_closure: CallIndirect, call_builtin: BuiltinCall, compiler_call: CompilerCall, /// `#objc_call(ReturnT)(recv, sel, args...)` — dispatched through /// `objc_msgSend`. emit_llvm.zig synthesizes a per-call-site LLVM /// function type from the arg/result Refs and reuses a single /// declared `@objc_msgSend` symbol across all return-type /// variants. Encoded as its own opcode (instead of `.call` / /// `.call_indirect`) so the IR doesn't need a separate FuncId /// per signature shape. objc_msg_send: ObjcMsgSend, /// `#jni_call(ReturnT)(env, target, name, sig, args...)` and /// `#jni_static_call(ReturnT)(env, class, name, sig, args...)`. /// emit_llvm.zig expands this into the JNI vtable indirection: /// `(*env)->GetObjectClass` (instance only) → `GetMethodID` / /// `GetStaticMethodID` → `CallMethod` / `CallStaticMethod`. /// Method-ID caching across call sites is added in step 1.17. jni_msg_send: JniMsgSend, // ── Closure creation ──────────────────────────────────────────── closure_create: ClosureCreate, // ── Globals ───────────────────────────────────────────────────── global_get: GlobalId, global_addr: GlobalId, // address of a global (pointer, not load) global_set: GlobalSet, func_ref: FuncId, // reference to a function (for function pointers) // ── Block params (SSA phi alternative) ────────────────────────── block_param: BlockParam, // ── Any type ──────────────────────────────────────────────────── box_any: BoxAny, // T → Any (erase type) unbox_any: UnaryOp, // Any → T (restore type) // ── Reflection ───────────────────────────────────────────────── field_name_get: FieldReflect, // field_name(T, i) → string (runtime index) field_value_get: FieldReflect, // field_value(s, i) → Any (runtime struct + index) error_tag_name_get: UnaryOp, // error_tag_name(e) → string (runtime tag id → name, via the always-linked tag-name table) // ── Terminators ───────────────────────────────────────────────── br: Branch, cond_br: CondBranch, switch_br: SwitchBranch, ret: UnaryOp, ret_void, @"unreachable", // ── Misc ──────────────────────────────────────────────────────── /// No-op placeholder for unlowered AST nodes. placeholder: StringId, // name of the unlowered construct }; // ── Operand structs ───────────────────────────────────────────────────── pub const UnaryOp = struct { operand: Ref, }; pub const BinOp = struct { lhs: Ref, rhs: Ref, }; pub const TriOp = struct { a: Ref, b: Ref, c: Ref, }; pub const Store = struct { ptr: Ref, val: Ref, /// Declared type of the value being stored. Threaded through so the /// interp's raw-pointer store knows the destination byte width — a /// `.int` Value alone is ambiguous (s8/s16/s32/s64/u*/usize/pointer /// all flatten to `.int`). The LLVM emitter ignores this (LLVM knows /// the width from the SSA value's type already). val_ty: TypeId = .void, }; pub const Conversion = struct { operand: Ref, from: TypeId, to: TypeId, }; pub const FieldAccess = struct { base: Ref, field_index: u32, /// The IR type of the aggregate being accessed (struct, union, etc.). /// Used by the LLVM emitter to resolve the correct type for GEP operations /// without guessing from LLVM value chains. base_type: ?TypeId = null, }; pub const Aggregate = struct { fields: []const Ref, }; pub const EnumInit = struct { tag: u32, payload: Ref, // Ref.none if no payload }; pub const Subslice = struct { base: Ref, lo: Ref, hi: Ref, }; pub const Call = struct { callee: FuncId, args: []const Ref, }; pub const CallIndirect = struct { callee: Ref, args: []const Ref, }; /// `#objc_call` dispatch through `objc_msgSend`. emit_llvm reads /// `recv`/`sel`/each arg's IR type to build the per-call-site LLVM /// function type; the instruction's own `ty` field (`Inst.ty`) is the /// Obj-C return type. One declared `@objc_msgSend` symbol is shared /// across every distinct signature shape. pub const ObjcMsgSend = struct { recv: Ref, sel: Ref, args: []const Ref, // additional args after recv + sel }; /// JNI dispatch payload. `env` is `JNIEnv*` (typed as ptr); `target` /// is a `jobject` for instance calls and a `jclass` for static calls. /// `name` and `sig` are pointers to NUL-terminated bytes (typically /// `[*]u8` from a string-literal `.ptr`). When the source-level /// `name` and `sig` are string literals, `cache_key` carries their /// content so emit_llvm.zig can intern a shared `jclass GlobalRef` + /// `jmethodID` slot keyed on `(name, sig)`; otherwise the lookup /// stays uncached. The dispatch sequence is expanded in /// emit_llvm.zig — see `Inst.jni_msg_send`. pub const JniMsgSend = struct { env: Ref, target: Ref, name: Ref, sig: Ref, args: []const Ref, is_static: bool, /// `true` when this is a `super.method(args)` dispatch from inside a /// `#jni_main` Activity method body — lowers to `CallNonvirtualMethod` /// against `parent_class_path`. Mutually exclusive with `is_static`. is_nonvirtual: bool = false, /// `true` when this is a `Foo.new(args)` constructor dispatch — lowers /// to `FindClass(parent_class_path) + GetMethodID("", sig) + /// NewObject(env, clazz, mid, args...)`. Returns a fresh jobject. /// Mutually exclusive with the other dispatch flags. is_constructor: bool = false, /// Foreign path of the parent class (e.g. `android/app/Activity`) when /// `is_nonvirtual` is true, OR of the class being constructed when /// `is_constructor` is true. emit_llvm uses `FindClass` to materialise /// the jclass at the call site (per-call; caching is follow-up). parent_class_path: ?[]const u8 = null, cache_key: ?CacheKey = null, }; pub const CacheKey = struct { name_str: []const u8, sig_str: []const u8, }; pub const BuiltinCall = struct { builtin: BuiltinId, args: []const Ref, }; pub const BuiltinId = enum(u16) { out, sqrt, sin, cos, floor, size_of, align_of, cast, type_of, alloc, dealloc, // Comptime-only reflection builtins. Today's `tryLowerReflectionCall` // folds these at lower time when the type argument is statically // resolvable — emits a `const_string` / `const_bool` directly. // These BuiltinId entries are the FALLBACK path: when the arg is // a runtime/interp-time value (e.g. `args[i]` inside a builder // body, carrying a `.type_tag(TypeId)` only at interp execution), // lowering emits a `builtin_call` to one of these. The interp // implements them; emit_llvm bails (Type is comptime-only). type_name, type_eq, has_impl, }; pub const CompilerCall = struct { name: u32, // StringPool id for qualified name (e.g. "BuildOptions.add_link_flag") args: []const Ref, }; pub const ClosureCreate = struct { func: FuncId, // trampoline function env: Ref, // allocated env pointer (or Ref.none for no captures) }; pub const GlobalSet = struct { global: GlobalId, value: Ref, }; pub const BlockParam = struct { block: BlockId, param_index: u32, }; pub const BoxAny = struct { operand: Ref, source_type: TypeId, }; pub const FieldReflect = struct { base: Ref, // struct value (for field_value_get) or Ref.none (for field_name_get) index: Ref, // runtime field index struct_type: TypeId, // compile-time resolved struct type }; pub const Branch = struct { target: BlockId, args: []const Ref, // block param values }; pub const CondBranch = struct { cond: Ref, then_target: BlockId, then_args: []const Ref, else_target: BlockId, else_args: []const Ref, }; pub const SwitchBranch = struct { operand: Ref, cases: []const Case, default: BlockId, default_args: []const Ref, pub const Case = struct { value: i64, target: BlockId, args: []const Ref, }; }; // ── Block ─────────────────────────────────────────────────────────────── pub const Block = struct { name: StringId, params: []const TypeId, // block parameter types (SSA phi alternative) insts: std.ArrayList(Inst), first_ref: u32 = 0, // ref index of the first instruction in this block pub fn init(name: StringId, params: []const TypeId) Block { return .{ .name = name, .params = params, .insts = std.ArrayList(Inst).empty, }; } pub fn deinit(self: *Block, alloc: std.mem.Allocator) void { self.insts.deinit(alloc); } }; // ── Function ──────────────────────────────────────────────────────────── pub const Function = struct { name: StringId, params: []const Param, ret: TypeId, blocks: std.ArrayList(Block), is_extern: bool = false, is_comptime: bool = false, linkage: Linkage = .internal, call_conv: CallingConvention = .default, source_file: ?[]const u8 = null, /// Variadic tail at the IR signature level. Only `#foreign` decls reach /// IR with this set — sx-side `..T` params are slice-packed before /// lowering, so anything that survives is the C calling convention's /// `...`. emit_llvm passes `is_var_arg=1` to `LLVMFunctionType`; call /// sites apply the standard default argument promotions (s8/s16/bool → /// s32, f32 → f64) to extras past the fixed param count. is_variadic: bool = false, /// True if `params[0]` is the synthetic `__sx_ctx: *Context` /// parameter that every default-conv sx function receives. Callers /// read this flag to decide whether to prepend their current /// `__sx_ctx` value to the args of a call. Foreign decls and /// `callconv(.c)` functions have it false. has_implicit_ctx: bool = false, pub const Param = struct { name: StringId, ty: TypeId, }; pub const Linkage = enum { internal, external, private, }; pub const CallingConvention = types.TypeInfo.CallConv; pub fn init(name: StringId, params: []const Param, ret: TypeId) Function { return .{ .name = name, .params = params, .ret = ret, .blocks = std.ArrayList(Block).empty, }; } pub fn deinit(self: *Function, alloc: std.mem.Allocator) void { for (self.blocks.items) |*block| { block.deinit(alloc); } self.blocks.deinit(alloc); } }; // ── Global ────────────────────────────────────────────────────────────── pub const Global = struct { name: StringId, ty: TypeId, init_val: ?ConstantValue = null, is_extern: bool = false, is_const: bool = false, /// Thread-local storage. `global_get` / `global_set` emit normal LLVM /// load/store instructions; LLVM handles the per-thread access through /// the `thread_local` attribute on the global. is_thread_local: bool = false, /// For comptime globals: the function to interpret to get the init value. comptime_func: ?FuncId = null, }; // ── ConstantValue ─────────────────────────────────────────────────────── pub const ConstantValue = union(enum) { int: i64, float: f64, boolean: bool, string: StringId, null_val, undef, zeroinit, aggregate: []const ConstantValue, /// Vtable constant: struct of function pointers, used for protocol vtable globals. vtable: []const FuncId, /// Function pointer leaf, for static initializers that include /// function addresses inside nested aggregates (e.g. the inline /// Allocator value `{ ctx, alloc_fn, dealloc_fn }` for the /// process-wide default Context). func_ref: FuncId, };