ir
This commit is contained in:
484
src/ir/types.zig
Normal file
484
src/ir/types.zig
Normal file
@@ -0,0 +1,484 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
// ── TypeId ──────────────────────────────────────────────────────────────
|
||||
// Opaque handle into the TypeTable. First 16 slots are reserved for builtins.
|
||||
|
||||
pub const TypeId = enum(u32) {
|
||||
// Builtin slots 0–15
|
||||
void = 0,
|
||||
bool = 1,
|
||||
s8 = 2,
|
||||
s16 = 3,
|
||||
s32 = 4,
|
||||
s64 = 5,
|
||||
u8 = 6,
|
||||
u16 = 7,
|
||||
u32 = 8,
|
||||
u64 = 9,
|
||||
f32 = 10,
|
||||
f64 = 11,
|
||||
string = 12, // [:0]u8
|
||||
any = 13,
|
||||
noreturn = 14,
|
||||
_reserved = 15,
|
||||
_, // user-defined types start at 16
|
||||
|
||||
pub const first_user: u32 = 16;
|
||||
|
||||
pub fn index(self: TypeId) u32 {
|
||||
return @intFromEnum(self);
|
||||
}
|
||||
|
||||
pub fn fromIndex(i: u32) TypeId {
|
||||
return @enumFromInt(i);
|
||||
}
|
||||
|
||||
pub fn isBuiltin(self: TypeId) bool {
|
||||
return self.index() < first_user;
|
||||
}
|
||||
};
|
||||
|
||||
// ── TypeInfo ────────────────────────────────────────────────────────────
|
||||
// Resolved type information stored in the TypeTable.
|
||||
// Unlike the AST-level `types.Type` which uses string names for references,
|
||||
// TypeInfo uses TypeId handles, making it fully resolved and internable.
|
||||
|
||||
pub const TypeInfo = union(enum) {
|
||||
signed: u8, // bit width: 1–64
|
||||
unsigned: u8,
|
||||
f32,
|
||||
f64,
|
||||
void,
|
||||
bool,
|
||||
string, // [:0]u8 — fat pointer {ptr, len}
|
||||
|
||||
@"struct": StructInfo,
|
||||
@"enum": EnumInfo,
|
||||
@"union": UnionInfo,
|
||||
array: ArrayInfo,
|
||||
slice: SliceInfo,
|
||||
pointer: PointerInfo,
|
||||
many_pointer: ManyPointerInfo,
|
||||
vector: VectorInfo,
|
||||
function: FunctionInfo,
|
||||
closure: ClosureInfo,
|
||||
optional: OptionalInfo,
|
||||
tuple: TupleInfo,
|
||||
any,
|
||||
protocol: ProtocolInfo,
|
||||
noreturn,
|
||||
|
||||
pub const StructInfo = struct {
|
||||
name: StringId,
|
||||
fields: []const Field,
|
||||
|
||||
pub const Field = struct {
|
||||
name: StringId,
|
||||
ty: TypeId,
|
||||
};
|
||||
};
|
||||
|
||||
pub const EnumInfo = struct {
|
||||
name: StringId,
|
||||
variants: []const StringId,
|
||||
};
|
||||
|
||||
pub const UnionInfo = struct {
|
||||
name: StringId,
|
||||
fields: []const StructInfo.Field,
|
||||
tag_type: ?TypeId, // tagged union enum type, null if untagged
|
||||
};
|
||||
|
||||
pub const ArrayInfo = struct {
|
||||
element: TypeId,
|
||||
length: u32,
|
||||
};
|
||||
|
||||
pub const SliceInfo = struct {
|
||||
element: TypeId,
|
||||
};
|
||||
|
||||
pub const PointerInfo = struct {
|
||||
pointee: TypeId,
|
||||
};
|
||||
|
||||
pub const ManyPointerInfo = struct {
|
||||
element: TypeId,
|
||||
};
|
||||
|
||||
pub const VectorInfo = struct {
|
||||
element: TypeId,
|
||||
length: u32,
|
||||
};
|
||||
|
||||
pub const FunctionInfo = struct {
|
||||
params: []const TypeId,
|
||||
ret: TypeId,
|
||||
};
|
||||
|
||||
pub const ClosureInfo = struct {
|
||||
params: []const TypeId,
|
||||
ret: TypeId,
|
||||
};
|
||||
|
||||
pub const OptionalInfo = struct {
|
||||
child: TypeId,
|
||||
};
|
||||
|
||||
pub const TupleInfo = struct {
|
||||
fields: []const TypeId,
|
||||
names: ?[]const StringId,
|
||||
};
|
||||
|
||||
pub const ProtocolInfo = struct {
|
||||
name: StringId,
|
||||
methods: []const Method,
|
||||
|
||||
pub const Method = struct {
|
||||
name: StringId,
|
||||
sig: TypeId, // function type
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
// ── StringId ────────────────────────────────────────────────────────────
|
||||
|
||||
pub const StringId = enum(u32) {
|
||||
empty = 0,
|
||||
_,
|
||||
|
||||
pub fn index(self: StringId) u32 {
|
||||
return @intFromEnum(self);
|
||||
}
|
||||
};
|
||||
|
||||
// ── StringPool ──────────────────────────────────────────────────────────
|
||||
// Intern strings for type/field/variant names. Deduplicates by content.
|
||||
|
||||
pub const StringPool = struct {
|
||||
/// Maps string content → StringId for dedup. Keys point to owned allocations in `strings`.
|
||||
map: std.StringHashMap(StringId),
|
||||
/// Owned string data indexed by StringId. Each entry is separately heap-allocated.
|
||||
strings: std.ArrayList([]const u8),
|
||||
next_id: u32,
|
||||
|
||||
pub fn init(alloc: Allocator) StringPool {
|
||||
var pool = StringPool{
|
||||
.map = std.StringHashMap(StringId).init(alloc),
|
||||
.strings = std.ArrayList([]const u8).empty,
|
||||
.next_id = 1, // 0 is reserved for empty
|
||||
};
|
||||
// Slot 0 = empty string (not heap-allocated)
|
||||
pool.strings.append(alloc, "") catch unreachable;
|
||||
return pool;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *StringPool, alloc: Allocator) void {
|
||||
// Free heap-allocated strings (skip slot 0 which is a string literal)
|
||||
for (self.strings.items[1..]) |s| {
|
||||
alloc.free(@constCast(s));
|
||||
}
|
||||
self.strings.deinit(alloc);
|
||||
self.map.deinit();
|
||||
}
|
||||
|
||||
pub fn intern(self: *StringPool, alloc: Allocator, str: []const u8) StringId {
|
||||
if (str.len == 0) return .empty;
|
||||
if (self.map.get(str)) |id| return id;
|
||||
|
||||
const id: StringId = @enumFromInt(self.next_id);
|
||||
self.next_id += 1;
|
||||
|
||||
// Allocate a stable copy — used as both map key and lookup value
|
||||
const owned = alloc.dupe(u8, str) catch unreachable;
|
||||
self.strings.append(alloc, owned) catch unreachable;
|
||||
self.map.put(owned, id) catch unreachable;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
pub fn get(self: *const StringPool, id: StringId) []const u8 {
|
||||
const idx = id.index();
|
||||
if (idx >= self.strings.items.len) return "";
|
||||
return self.strings.items[idx];
|
||||
}
|
||||
};
|
||||
|
||||
// ── TypeTable ───────────────────────────────────────────────────────────
|
||||
// Holds all resolved types. Builtins in slots 0–15, user types interned from 16+.
|
||||
|
||||
pub const TypeTable = struct {
|
||||
infos: std.ArrayList(TypeInfo),
|
||||
strings: StringPool,
|
||||
/// Maps TypeInfo → TypeId for dedup of structural types
|
||||
intern_map: std.HashMap(TypeKey, TypeId, TypeKeyContext, 80),
|
||||
alloc: Allocator,
|
||||
|
||||
pub fn init(alloc: Allocator) TypeTable {
|
||||
var table = TypeTable{
|
||||
.infos = std.ArrayList(TypeInfo).empty,
|
||||
.strings = StringPool.init(alloc),
|
||||
.intern_map = std.HashMap(TypeKey, TypeId, TypeKeyContext, 80).init(alloc),
|
||||
.alloc = alloc,
|
||||
};
|
||||
|
||||
// Pre-populate builtin slots 0–15 (must match TypeId enum order)
|
||||
const builtins = [_]TypeInfo{
|
||||
.void, // 0
|
||||
.bool, // 1
|
||||
.{ .signed = 8 }, // 2: s8
|
||||
.{ .signed = 16 }, // 3: s16
|
||||
.{ .signed = 32 }, // 4: s32
|
||||
.{ .signed = 64 }, // 5: s64
|
||||
.{ .unsigned = 8 }, // 6: u8
|
||||
.{ .unsigned = 16 }, // 7: u16
|
||||
.{ .unsigned = 32 }, // 8: u32
|
||||
.{ .unsigned = 64 }, // 9: u64
|
||||
.f32, // 10
|
||||
.f64, // 11
|
||||
.string, // 12
|
||||
.any, // 13
|
||||
.noreturn, // 14
|
||||
.void, // 15: reserved (placeholder)
|
||||
};
|
||||
for (&builtins) |info| {
|
||||
table.infos.append(alloc, info) catch unreachable;
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *TypeTable) void {
|
||||
self.infos.deinit(self.alloc);
|
||||
self.strings.deinit(self.alloc);
|
||||
self.intern_map.deinit();
|
||||
}
|
||||
|
||||
/// Look up the TypeInfo for a given TypeId.
|
||||
pub fn get(self: *const TypeTable, id: TypeId) TypeInfo {
|
||||
return self.infos.items[id.index()];
|
||||
}
|
||||
|
||||
/// Intern a TypeInfo, returning the existing TypeId if structurally equal.
|
||||
pub fn intern(self: *TypeTable, info: TypeInfo) TypeId {
|
||||
const key = TypeKey{ .info = info };
|
||||
if (self.intern_map.get(key)) |existing| {
|
||||
return existing;
|
||||
}
|
||||
const id = TypeId.fromIndex(@intCast(self.infos.items.len));
|
||||
self.infos.append(self.alloc, info) catch unreachable;
|
||||
self.intern_map.putNoClobber(key, id) catch unreachable;
|
||||
return id;
|
||||
}
|
||||
|
||||
// ── Convenience constructors ────────────────────────────────────────
|
||||
|
||||
pub fn ptrTo(self: *TypeTable, pointee: TypeId) TypeId {
|
||||
return self.intern(.{ .pointer = .{ .pointee = pointee } });
|
||||
}
|
||||
|
||||
pub fn manyPtrTo(self: *TypeTable, element: TypeId) TypeId {
|
||||
return self.intern(.{ .many_pointer = .{ .element = element } });
|
||||
}
|
||||
|
||||
pub fn sliceOf(self: *TypeTable, element: TypeId) TypeId {
|
||||
return self.intern(.{ .slice = .{ .element = element } });
|
||||
}
|
||||
|
||||
pub fn arrayOf(self: *TypeTable, element: TypeId, length: u32) TypeId {
|
||||
return self.intern(.{ .array = .{ .element = element, .length = length } });
|
||||
}
|
||||
|
||||
pub fn optionalOf(self: *TypeTable, child: TypeId) TypeId {
|
||||
return self.intern(.{ .optional = .{ .child = child } });
|
||||
}
|
||||
|
||||
pub fn functionType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
|
||||
const owned_params = self.alloc.dupe(TypeId, params) catch unreachable;
|
||||
return self.intern(.{ .function = .{ .params = owned_params, .ret = ret } });
|
||||
}
|
||||
|
||||
pub fn closureType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
|
||||
const owned_params = self.alloc.dupe(TypeId, params) catch unreachable;
|
||||
return self.intern(.{ .closure = .{ .params = owned_params, .ret = ret } });
|
||||
}
|
||||
|
||||
pub fn vectorOf(self: *TypeTable, element: TypeId, length: u32) TypeId {
|
||||
return self.intern(.{ .vector = .{ .element = element, .length = length } });
|
||||
}
|
||||
|
||||
/// Size in bytes for a type (pointer-sized = 8 on 64-bit).
|
||||
pub fn sizeOf(self: *const TypeTable, id: TypeId) u32 {
|
||||
const info = self.get(id);
|
||||
return switch (info) {
|
||||
.void, .noreturn => 0,
|
||||
.bool => 1,
|
||||
.signed => |w| @max(1, w / 8),
|
||||
.unsigned => |w| @max(1, w / 8),
|
||||
.f32 => 4,
|
||||
.f64 => 8,
|
||||
.string => 16, // {ptr, len}
|
||||
.pointer, .many_pointer, .function => 8,
|
||||
.closure => 16, // {fn_ptr, env}
|
||||
.optional => |opt| self.sizeOf(opt.child) + 8, // child + has_value flag (aligned)
|
||||
.slice => 16, // {ptr, len}
|
||||
.array => |arr| arr.length * self.sizeOf(arr.element),
|
||||
.vector => |vec| vec.length * self.sizeOf(vec.element),
|
||||
.any => 16, // {type_tag, data_ptr}
|
||||
.@"struct", .@"union", .@"enum", .tuple, .protocol => {
|
||||
// Sizes of composite types depend on layout — return 0 as placeholder.
|
||||
// Real size computation needs struct layout info from codegen/sema.
|
||||
return 0;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/// Intern a string into the pool.
|
||||
pub fn internString(self: *TypeTable, str: []const u8) StringId {
|
||||
return self.strings.intern(self.alloc, str);
|
||||
}
|
||||
|
||||
/// Look up a string from its id.
|
||||
pub fn getString(self: *const TypeTable, id: StringId) []const u8 {
|
||||
return self.strings.get(id);
|
||||
}
|
||||
|
||||
/// Format a TypeId for display (e.g., "s32", "*bool", "[]u8").
|
||||
pub fn typeName(self: *const TypeTable, id: TypeId) []const u8 {
|
||||
// Fast path for builtins
|
||||
return switch (id) {
|
||||
.void => "void",
|
||||
.bool => "bool",
|
||||
.s8 => "s8",
|
||||
.s16 => "s16",
|
||||
.s32 => "s32",
|
||||
.s64 => "s64",
|
||||
.u8 => "u8",
|
||||
.u16 => "u16",
|
||||
.u32 => "u32",
|
||||
.u64 => "u64",
|
||||
.f32 => "f32",
|
||||
.f64 => "f64",
|
||||
.string => "string",
|
||||
.any => "Any",
|
||||
.noreturn => "noreturn",
|
||||
else => {
|
||||
// User types — format from TypeInfo
|
||||
const info = self.get(id);
|
||||
return switch (info) {
|
||||
.@"struct" => |s| self.getString(s.name),
|
||||
.@"enum" => |e| self.getString(e.name),
|
||||
.@"union" => |u| self.getString(u.name),
|
||||
.protocol => |p| self.getString(p.name),
|
||||
else => "?",
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// ── Intern map support ──────────────────────────────────────────────────
|
||||
// We use a custom hash/eql context so structurally identical types dedup.
|
||||
|
||||
const TypeKey = struct {
|
||||
info: TypeInfo,
|
||||
};
|
||||
|
||||
const TypeKeyContext = struct {
|
||||
pub fn hash(_: TypeKeyContext, key: TypeKey) u64 {
|
||||
var h = std.hash.Wyhash.init(0);
|
||||
hashTypeInfo(&h, key.info);
|
||||
return h.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: TypeKeyContext, a: TypeKey, b: TypeKey) bool {
|
||||
return typeInfoEql(a.info, b.info);
|
||||
}
|
||||
};
|
||||
|
||||
fn hashTypeInfo(h: *std.hash.Wyhash, info: TypeInfo) void {
|
||||
// Hash the tag
|
||||
const tag: u8 = @intFromEnum(std.meta.activeTag(info));
|
||||
h.update(&.{tag});
|
||||
|
||||
switch (info) {
|
||||
.signed => |w| h.update(&.{w}),
|
||||
.unsigned => |w| h.update(&.{w}),
|
||||
.f32, .f64, .void, .bool, .string, .any, .noreturn => {},
|
||||
.pointer => |p| h.update(std.mem.asBytes(&p.pointee)),
|
||||
.many_pointer => |p| h.update(std.mem.asBytes(&p.element)),
|
||||
.slice => |s| h.update(std.mem.asBytes(&s.element)),
|
||||
.array => |a| {
|
||||
h.update(std.mem.asBytes(&a.element));
|
||||
h.update(std.mem.asBytes(&a.length));
|
||||
},
|
||||
.vector => |v| {
|
||||
h.update(std.mem.asBytes(&v.element));
|
||||
h.update(std.mem.asBytes(&v.length));
|
||||
},
|
||||
.optional => |o| h.update(std.mem.asBytes(&o.child)),
|
||||
.function => |f| {
|
||||
for (f.params) |p| h.update(std.mem.asBytes(&p));
|
||||
h.update(std.mem.asBytes(&f.ret));
|
||||
},
|
||||
.closure => |c| {
|
||||
for (c.params) |p| h.update(std.mem.asBytes(&p));
|
||||
h.update(std.mem.asBytes(&c.ret));
|
||||
},
|
||||
.@"struct" => |s| h.update(std.mem.asBytes(&s.name)),
|
||||
.@"enum" => |e| h.update(std.mem.asBytes(&e.name)),
|
||||
.@"union" => |u| h.update(std.mem.asBytes(&u.name)),
|
||||
.protocol => |p| h.update(std.mem.asBytes(&p.name)),
|
||||
.tuple => |t| {
|
||||
for (t.fields) |f| h.update(std.mem.asBytes(&f));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn typeInfoEql(a: TypeInfo, b: TypeInfo) bool {
|
||||
const Tag = std.meta.Tag(TypeInfo);
|
||||
const a_tag: Tag = a;
|
||||
const b_tag: Tag = b;
|
||||
if (a_tag != b_tag) return false;
|
||||
|
||||
return switch (a) {
|
||||
.signed => |w| w == b.signed,
|
||||
.unsigned => |w| w == b.unsigned,
|
||||
.f32, .f64, .void, .bool, .string, .any, .noreturn => true,
|
||||
.pointer => |p| p.pointee == b.pointer.pointee,
|
||||
.many_pointer => |p| p.element == b.many_pointer.element,
|
||||
.slice => |s| s.element == b.slice.element,
|
||||
.array => |ar| ar.element == b.array.element and ar.length == b.array.length,
|
||||
.vector => |v| v.element == b.vector.element and v.length == b.vector.length,
|
||||
.optional => |o| o.child == b.optional.child,
|
||||
.function => |f| {
|
||||
const g = b.function;
|
||||
if (f.params.len != g.params.len) return false;
|
||||
for (f.params, g.params) |fp, gp| {
|
||||
if (fp != gp) return false;
|
||||
}
|
||||
return f.ret == g.ret;
|
||||
},
|
||||
.closure => |c| {
|
||||
const d = b.closure;
|
||||
if (c.params.len != d.params.len) return false;
|
||||
for (c.params, d.params) |cp, dp| {
|
||||
if (cp != dp) return false;
|
||||
}
|
||||
return c.ret == d.ret;
|
||||
},
|
||||
.@"struct" => |s| s.name == b.@"struct".name,
|
||||
.@"enum" => |e| e.name == b.@"enum".name,
|
||||
.@"union" => |u| u.name == b.@"union".name,
|
||||
.protocol => |p| p.name == b.protocol.name,
|
||||
.tuple => |t| {
|
||||
const u = b.tuple;
|
||||
if (t.fields.len != u.fields.len) return false;
|
||||
for (t.fields, u.fields) |tf, uf| {
|
||||
if (tf != uf) return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user