This commit is contained in:
agra
2026-02-26 02:25:02 +02:00
parent 7209e8e69d
commit dd14f1206b
23 changed files with 5433 additions and 9 deletions

484
src/ir/types.zig Normal file
View File

@@ -0,0 +1,484 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
// ── TypeId ──────────────────────────────────────────────────────────────
// Opaque handle into the TypeTable. First 16 slots are reserved for builtins.
pub const TypeId = enum(u32) {
// Builtin slots 015
void = 0,
bool = 1,
s8 = 2,
s16 = 3,
s32 = 4,
s64 = 5,
u8 = 6,
u16 = 7,
u32 = 8,
u64 = 9,
f32 = 10,
f64 = 11,
string = 12, // [:0]u8
any = 13,
noreturn = 14,
_reserved = 15,
_, // user-defined types start at 16
pub const first_user: u32 = 16;
pub fn index(self: TypeId) u32 {
return @intFromEnum(self);
}
pub fn fromIndex(i: u32) TypeId {
return @enumFromInt(i);
}
pub fn isBuiltin(self: TypeId) bool {
return self.index() < first_user;
}
};
// ── TypeInfo ────────────────────────────────────────────────────────────
// Resolved type information stored in the TypeTable.
// Unlike the AST-level `types.Type` which uses string names for references,
// TypeInfo uses TypeId handles, making it fully resolved and internable.
pub const TypeInfo = union(enum) {
signed: u8, // bit width: 164
unsigned: u8,
f32,
f64,
void,
bool,
string, // [:0]u8 — fat pointer {ptr, len}
@"struct": StructInfo,
@"enum": EnumInfo,
@"union": UnionInfo,
array: ArrayInfo,
slice: SliceInfo,
pointer: PointerInfo,
many_pointer: ManyPointerInfo,
vector: VectorInfo,
function: FunctionInfo,
closure: ClosureInfo,
optional: OptionalInfo,
tuple: TupleInfo,
any,
protocol: ProtocolInfo,
noreturn,
pub const StructInfo = struct {
name: StringId,
fields: []const Field,
pub const Field = struct {
name: StringId,
ty: TypeId,
};
};
pub const EnumInfo = struct {
name: StringId,
variants: []const StringId,
};
pub const UnionInfo = struct {
name: StringId,
fields: []const StructInfo.Field,
tag_type: ?TypeId, // tagged union enum type, null if untagged
};
pub const ArrayInfo = struct {
element: TypeId,
length: u32,
};
pub const SliceInfo = struct {
element: TypeId,
};
pub const PointerInfo = struct {
pointee: TypeId,
};
pub const ManyPointerInfo = struct {
element: TypeId,
};
pub const VectorInfo = struct {
element: TypeId,
length: u32,
};
pub const FunctionInfo = struct {
params: []const TypeId,
ret: TypeId,
};
pub const ClosureInfo = struct {
params: []const TypeId,
ret: TypeId,
};
pub const OptionalInfo = struct {
child: TypeId,
};
pub const TupleInfo = struct {
fields: []const TypeId,
names: ?[]const StringId,
};
pub const ProtocolInfo = struct {
name: StringId,
methods: []const Method,
pub const Method = struct {
name: StringId,
sig: TypeId, // function type
};
};
};
// ── StringId ────────────────────────────────────────────────────────────
pub const StringId = enum(u32) {
empty = 0,
_,
pub fn index(self: StringId) u32 {
return @intFromEnum(self);
}
};
// ── StringPool ──────────────────────────────────────────────────────────
// Intern strings for type/field/variant names. Deduplicates by content.
pub const StringPool = struct {
/// Maps string content → StringId for dedup. Keys point to owned allocations in `strings`.
map: std.StringHashMap(StringId),
/// Owned string data indexed by StringId. Each entry is separately heap-allocated.
strings: std.ArrayList([]const u8),
next_id: u32,
pub fn init(alloc: Allocator) StringPool {
var pool = StringPool{
.map = std.StringHashMap(StringId).init(alloc),
.strings = std.ArrayList([]const u8).empty,
.next_id = 1, // 0 is reserved for empty
};
// Slot 0 = empty string (not heap-allocated)
pool.strings.append(alloc, "") catch unreachable;
return pool;
}
pub fn deinit(self: *StringPool, alloc: Allocator) void {
// Free heap-allocated strings (skip slot 0 which is a string literal)
for (self.strings.items[1..]) |s| {
alloc.free(@constCast(s));
}
self.strings.deinit(alloc);
self.map.deinit();
}
pub fn intern(self: *StringPool, alloc: Allocator, str: []const u8) StringId {
if (str.len == 0) return .empty;
if (self.map.get(str)) |id| return id;
const id: StringId = @enumFromInt(self.next_id);
self.next_id += 1;
// Allocate a stable copy — used as both map key and lookup value
const owned = alloc.dupe(u8, str) catch unreachable;
self.strings.append(alloc, owned) catch unreachable;
self.map.put(owned, id) catch unreachable;
return id;
}
pub fn get(self: *const StringPool, id: StringId) []const u8 {
const idx = id.index();
if (idx >= self.strings.items.len) return "";
return self.strings.items[idx];
}
};
// ── TypeTable ───────────────────────────────────────────────────────────
// Holds all resolved types. Builtins in slots 015, user types interned from 16+.
pub const TypeTable = struct {
infos: std.ArrayList(TypeInfo),
strings: StringPool,
/// Maps TypeInfo → TypeId for dedup of structural types
intern_map: std.HashMap(TypeKey, TypeId, TypeKeyContext, 80),
alloc: Allocator,
pub fn init(alloc: Allocator) TypeTable {
var table = TypeTable{
.infos = std.ArrayList(TypeInfo).empty,
.strings = StringPool.init(alloc),
.intern_map = std.HashMap(TypeKey, TypeId, TypeKeyContext, 80).init(alloc),
.alloc = alloc,
};
// Pre-populate builtin slots 015 (must match TypeId enum order)
const builtins = [_]TypeInfo{
.void, // 0
.bool, // 1
.{ .signed = 8 }, // 2: s8
.{ .signed = 16 }, // 3: s16
.{ .signed = 32 }, // 4: s32
.{ .signed = 64 }, // 5: s64
.{ .unsigned = 8 }, // 6: u8
.{ .unsigned = 16 }, // 7: u16
.{ .unsigned = 32 }, // 8: u32
.{ .unsigned = 64 }, // 9: u64
.f32, // 10
.f64, // 11
.string, // 12
.any, // 13
.noreturn, // 14
.void, // 15: reserved (placeholder)
};
for (&builtins) |info| {
table.infos.append(alloc, info) catch unreachable;
}
return table;
}
pub fn deinit(self: *TypeTable) void {
self.infos.deinit(self.alloc);
self.strings.deinit(self.alloc);
self.intern_map.deinit();
}
/// Look up the TypeInfo for a given TypeId.
pub fn get(self: *const TypeTable, id: TypeId) TypeInfo {
return self.infos.items[id.index()];
}
/// Intern a TypeInfo, returning the existing TypeId if structurally equal.
pub fn intern(self: *TypeTable, info: TypeInfo) TypeId {
const key = TypeKey{ .info = info };
if (self.intern_map.get(key)) |existing| {
return existing;
}
const id = TypeId.fromIndex(@intCast(self.infos.items.len));
self.infos.append(self.alloc, info) catch unreachable;
self.intern_map.putNoClobber(key, id) catch unreachable;
return id;
}
// ── Convenience constructors ────────────────────────────────────────
pub fn ptrTo(self: *TypeTable, pointee: TypeId) TypeId {
return self.intern(.{ .pointer = .{ .pointee = pointee } });
}
pub fn manyPtrTo(self: *TypeTable, element: TypeId) TypeId {
return self.intern(.{ .many_pointer = .{ .element = element } });
}
pub fn sliceOf(self: *TypeTable, element: TypeId) TypeId {
return self.intern(.{ .slice = .{ .element = element } });
}
pub fn arrayOf(self: *TypeTable, element: TypeId, length: u32) TypeId {
return self.intern(.{ .array = .{ .element = element, .length = length } });
}
pub fn optionalOf(self: *TypeTable, child: TypeId) TypeId {
return self.intern(.{ .optional = .{ .child = child } });
}
pub fn functionType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
const owned_params = self.alloc.dupe(TypeId, params) catch unreachable;
return self.intern(.{ .function = .{ .params = owned_params, .ret = ret } });
}
pub fn closureType(self: *TypeTable, params: []const TypeId, ret: TypeId) TypeId {
const owned_params = self.alloc.dupe(TypeId, params) catch unreachable;
return self.intern(.{ .closure = .{ .params = owned_params, .ret = ret } });
}
pub fn vectorOf(self: *TypeTable, element: TypeId, length: u32) TypeId {
return self.intern(.{ .vector = .{ .element = element, .length = length } });
}
/// Size in bytes for a type (pointer-sized = 8 on 64-bit).
pub fn sizeOf(self: *const TypeTable, id: TypeId) u32 {
const info = self.get(id);
return switch (info) {
.void, .noreturn => 0,
.bool => 1,
.signed => |w| @max(1, w / 8),
.unsigned => |w| @max(1, w / 8),
.f32 => 4,
.f64 => 8,
.string => 16, // {ptr, len}
.pointer, .many_pointer, .function => 8,
.closure => 16, // {fn_ptr, env}
.optional => |opt| self.sizeOf(opt.child) + 8, // child + has_value flag (aligned)
.slice => 16, // {ptr, len}
.array => |arr| arr.length * self.sizeOf(arr.element),
.vector => |vec| vec.length * self.sizeOf(vec.element),
.any => 16, // {type_tag, data_ptr}
.@"struct", .@"union", .@"enum", .tuple, .protocol => {
// Sizes of composite types depend on layout — return 0 as placeholder.
// Real size computation needs struct layout info from codegen/sema.
return 0;
},
};
}
/// Intern a string into the pool.
pub fn internString(self: *TypeTable, str: []const u8) StringId {
return self.strings.intern(self.alloc, str);
}
/// Look up a string from its id.
pub fn getString(self: *const TypeTable, id: StringId) []const u8 {
return self.strings.get(id);
}
/// Format a TypeId for display (e.g., "s32", "*bool", "[]u8").
pub fn typeName(self: *const TypeTable, id: TypeId) []const u8 {
// Fast path for builtins
return switch (id) {
.void => "void",
.bool => "bool",
.s8 => "s8",
.s16 => "s16",
.s32 => "s32",
.s64 => "s64",
.u8 => "u8",
.u16 => "u16",
.u32 => "u32",
.u64 => "u64",
.f32 => "f32",
.f64 => "f64",
.string => "string",
.any => "Any",
.noreturn => "noreturn",
else => {
// User types — format from TypeInfo
const info = self.get(id);
return switch (info) {
.@"struct" => |s| self.getString(s.name),
.@"enum" => |e| self.getString(e.name),
.@"union" => |u| self.getString(u.name),
.protocol => |p| self.getString(p.name),
else => "?",
};
},
};
}
};
// ── Intern map support ──────────────────────────────────────────────────
// We use a custom hash/eql context so structurally identical types dedup.
const TypeKey = struct {
info: TypeInfo,
};
const TypeKeyContext = struct {
pub fn hash(_: TypeKeyContext, key: TypeKey) u64 {
var h = std.hash.Wyhash.init(0);
hashTypeInfo(&h, key.info);
return h.final();
}
pub fn eql(_: TypeKeyContext, a: TypeKey, b: TypeKey) bool {
return typeInfoEql(a.info, b.info);
}
};
fn hashTypeInfo(h: *std.hash.Wyhash, info: TypeInfo) void {
// Hash the tag
const tag: u8 = @intFromEnum(std.meta.activeTag(info));
h.update(&.{tag});
switch (info) {
.signed => |w| h.update(&.{w}),
.unsigned => |w| h.update(&.{w}),
.f32, .f64, .void, .bool, .string, .any, .noreturn => {},
.pointer => |p| h.update(std.mem.asBytes(&p.pointee)),
.many_pointer => |p| h.update(std.mem.asBytes(&p.element)),
.slice => |s| h.update(std.mem.asBytes(&s.element)),
.array => |a| {
h.update(std.mem.asBytes(&a.element));
h.update(std.mem.asBytes(&a.length));
},
.vector => |v| {
h.update(std.mem.asBytes(&v.element));
h.update(std.mem.asBytes(&v.length));
},
.optional => |o| h.update(std.mem.asBytes(&o.child)),
.function => |f| {
for (f.params) |p| h.update(std.mem.asBytes(&p));
h.update(std.mem.asBytes(&f.ret));
},
.closure => |c| {
for (c.params) |p| h.update(std.mem.asBytes(&p));
h.update(std.mem.asBytes(&c.ret));
},
.@"struct" => |s| h.update(std.mem.asBytes(&s.name)),
.@"enum" => |e| h.update(std.mem.asBytes(&e.name)),
.@"union" => |u| h.update(std.mem.asBytes(&u.name)),
.protocol => |p| h.update(std.mem.asBytes(&p.name)),
.tuple => |t| {
for (t.fields) |f| h.update(std.mem.asBytes(&f));
},
}
}
fn typeInfoEql(a: TypeInfo, b: TypeInfo) bool {
const Tag = std.meta.Tag(TypeInfo);
const a_tag: Tag = a;
const b_tag: Tag = b;
if (a_tag != b_tag) return false;
return switch (a) {
.signed => |w| w == b.signed,
.unsigned => |w| w == b.unsigned,
.f32, .f64, .void, .bool, .string, .any, .noreturn => true,
.pointer => |p| p.pointee == b.pointer.pointee,
.many_pointer => |p| p.element == b.many_pointer.element,
.slice => |s| s.element == b.slice.element,
.array => |ar| ar.element == b.array.element and ar.length == b.array.length,
.vector => |v| v.element == b.vector.element and v.length == b.vector.length,
.optional => |o| o.child == b.optional.child,
.function => |f| {
const g = b.function;
if (f.params.len != g.params.len) return false;
for (f.params, g.params) |fp, gp| {
if (fp != gp) return false;
}
return f.ret == g.ret;
},
.closure => |c| {
const d = b.closure;
if (c.params.len != d.params.len) return false;
for (c.params, d.params) |cp, dp| {
if (cp != dp) return false;
}
return c.ret == d.ret;
},
.@"struct" => |s| s.name == b.@"struct".name,
.@"enum" => |e| e.name == b.@"enum".name,
.@"union" => |u| u.name == b.@"union".name,
.protocol => |p| p.name == b.protocol.name,
.tuple => |t| {
const u = b.tuple;
if (t.fields.len != u.fields.len) return false;
for (t.fields, u.fields) |tf, uf| {
if (tf != uf) return false;
}
return true;
},
};
}