compiler-API: welded structs by reflection + memory-order validation

Replace the explored byte-layout-override engine (offset-ordered LLVM structs /
weld plans / byte-blobs — all unnecessary) with a much simpler design: a welded
`struct abi(.zig) extern compiler { … }` is a bodied header declaring its fields
in the bound compiler type's MEMORY order. The compiler reflects the real Zig
type (field names via @typeInfo, offsets via @offsetOf, size via @sizeOf —
nothing hand-maintained) and validates the header matches, with loud diagnostics.

On pass it is an ordinary struct whose natural layout already equals the Zig
layout — no reorder, no padding, no index/remap tables, no special LLVM path — so
@ptrCast'ing it to the compiler's own type and dereferencing is byte-identical.
When types.zig shifts, the header stops matching and the developer gets a specific
message to fix it.

- compiler_lib.zig: weldStruct reflects field names and bakes bound_types fields
  in ascending-offset (memory) order; deleted computeWeldPlan/WeldPlan/WeldElement.
- nominal.zig validateWeldedStruct: precise diagnostics — field-not-found,
  wrong-field-order (+ expected memory order), type-layout (size) mismatch,
  total-size mismatch.
- Examples: 0627 (StructInfo in memory order, byte-identical, usable),
  1186 (source-order StructInfo -> wrong-field-order diagnostic); 1183 refreshed.
- Design doc + checkpoint updated.
This commit is contained in:
agra
2026-06-17 15:45:23 +03:00
parent 88c4cbcfa5
commit 40d075ca98
14 changed files with 230 additions and 218 deletions

View File

@@ -49,26 +49,31 @@ pub const BoundType = struct {
const FieldZig = types.TypeInfo.StructInfo.Field; // { name: StringId, ty: TypeId } — two u32s
const StructInfoZig = types.TypeInfo.StructInfo; // { name, fields: []Field, is_protocol, nominal_id } — Zig-reordered
/// Bake a `BoundType` from a real Zig struct type `T`. Field offsets/sizes come
/// from `@offsetOf`/`@sizeOf` on `T`; `sx_field_names` supplies the sx-visible
/// names positionally (must match `T`'s field order and count — a mismatch is a
/// compile error, never a silent truncation).
fn weldStruct(
comptime sx_name: []const u8,
comptime T: type,
comptime sx_field_names: []const []const u8,
) BoundType {
/// Bake a `BoundType` by REFLECTING the real Zig struct type `T` — field names
/// from `@typeInfo`, offsets from `@offsetOf`, sizes from `@sizeOf`. Nothing is
/// maintained by hand: a `types.zig` change re-bakes on the next compiler build.
/// Fields are returned in ascending-OFFSET (memory) order, which is the order an
/// sx welded header must declare them in to be byte-identical (Zig may reorder a
/// struct's fields from source order). The sx-visible field name IS the Zig
/// field identifier.
fn weldStruct(comptime sx_name: []const u8, comptime T: type) BoundType {
const zig_fields = @typeInfo(T).@"struct".fields;
if (zig_fields.len != sx_field_names.len)
@compileError("compiler-lib weld '" ++ sx_name ++ "': sx field count != Zig field count");
comptime var layouts: [zig_fields.len]FieldLayout = undefined;
inline for (zig_fields, 0..) |zf, i| {
layouts[i] = .{
.name = sx_field_names[i],
.name = zf.name,
.offset = @offsetOf(T, zf.name),
.size = @sizeOf(zf.type),
};
}
// Sort into memory order so the sx header is checked against the layout the
// compiler actually uses (declaration order != memory order under Zig's
// auto-layout).
comptime std.sort.insertion(FieldLayout, &layouts, {}, struct {
fn lt(_: void, a: FieldLayout, b: FieldLayout) bool {
return a.offset < b.offset;
}
}.lt);
const frozen = layouts;
return .{
.sx_name = sx_name,
@@ -78,14 +83,13 @@ fn weldStruct(
};
}
/// The welded-type export list. `Field` (two u32s, natural layout) proved the
/// weld in Phase 1; `StructInfo` (Phase 2) is the first NON-natural layout —
/// Zig reorders its fields (`fields`@0, `name`@16, `nominal_id`@20,
/// `is_protocol`@24), so it exercises the offset-override engine. `EnumInfo` /
/// `TaggedUnionInfo` / `TupleInfo` join later.
/// The welded-type export list. Each entry reflects a real internal Zig type;
/// the sx header that binds it must mirror these fields IN THIS (memory) ORDER.
/// `Field` (two u32s) is naturally ordered; `StructInfo` is Zig-reordered
/// (`fields`@0, `name`@16, `nominal_id`@20, `is_protocol`@24).
pub const bound_types = [_]BoundType{
weldStruct("Field", FieldZig, &.{ "name", "ty" }),
weldStruct("StructInfo", StructInfoZig, &.{ "name", "fields", "is_protocol", "nominal_id" }),
weldStruct("Field", FieldZig),
weldStruct("StructInfo", StructInfoZig),
};
/// Look up a welded type by its sx name. Returns null when the name is not on
@@ -150,91 +154,6 @@ pub fn validateStructLayout(
return null;
}
// ── Weld plan (byte-layout override) ────────────────────────────────────────
//
// A welded struct must be laid out byte-identically to the bound Zig type, whose
// fields Zig may REORDER (and pad). The sx struct's natural layout generally
// won't match — so the compiler imposes the Zig layout: it builds the struct's
// LLVM type as the fields in ascending-OFFSET order, with explicit padding
// elements filling the gaps, and remaps each sx field index to its LLVM element
// index. `computeWeldPlan` is that pure layout math; the LLVM type builder + the
// struct-GEP / field-access sites consume the plan (later sub-steps), and the
// interp serializes comptime struct Values through the same offsets.
/// One element of a welded struct's LLVM layout: either a real field (carrying
/// its sx field index) or a padding gap. Always in ascending `offset` order.
pub const WeldElement = struct {
/// The sx field index this element holds, or null for a padding gap.
sx_field: ?usize,
/// Byte offset of this element within the struct (the bound Zig offset).
offset: usize,
/// Byte width of this element (the field's size, or the gap width).
size: usize,
};
/// The byte-layout plan for a welded struct: its LLVM elements in offset order
/// (fields + padding) and the sx-field → LLVM-element-index remap. Owns its
/// slices — `deinit` with the same allocator passed to `computeWeldPlan`.
pub const WeldPlan = struct {
elements: []const WeldElement,
/// `sx_to_llvm[i]` is the index into `elements` of sx field `i`.
sx_to_llvm: []const usize,
total_size: usize,
pub fn deinit(self: *WeldPlan, alloc: std.mem.Allocator) void {
alloc.free(self.elements);
alloc.free(self.sx_to_llvm);
}
};
/// Compute the byte-layout plan for a struct whose fields carry their bound Zig
/// offsets (`fields[i].offset`/`.size`, e.g. from a `BoundType`). `total_size` is
/// the bound Zig `@sizeOf`. The result lists LLVM elements in ascending-offset
/// order — real fields interleaved with padding gaps — plus the sx-field →
/// element-index remap that struct-GEP uses. Pure; allocates the result slices.
pub fn computeWeldPlan(
alloc: std.mem.Allocator,
fields: []const FieldLayout,
total_size: usize,
) !WeldPlan {
// Order the sx field indices by ascending byte offset (stable).
const order = try alloc.alloc(usize, fields.len);
defer alloc.free(order);
for (order, 0..) |*o, i| o.* = i;
std.sort.insertion(usize, order, fields, struct {
fn lessThan(fs: []const FieldLayout, a: usize, b: usize) bool {
return fs[a].offset < fs[b].offset;
}
}.lessThan);
var elements = std.ArrayList(WeldElement).empty;
errdefer elements.deinit(alloc);
const sx_to_llvm = try alloc.alloc(usize, fields.len);
errdefer alloc.free(sx_to_llvm);
var cursor: usize = 0;
for (order) |sx_i| {
const f = fields[sx_i];
// Fill any gap before this field with a padding element.
if (f.offset > cursor) {
try elements.append(alloc, .{ .sx_field = null, .offset = cursor, .size = f.offset - cursor });
}
sx_to_llvm[sx_i] = elements.items.len;
try elements.append(alloc, .{ .sx_field = sx_i, .offset = f.offset, .size = f.size });
cursor = f.offset + f.size;
}
// Trailing padding up to the bound total size (alignment tail).
if (total_size > cursor) {
try elements.append(alloc, .{ .sx_field = null, .offset = cursor, .size = total_size - cursor });
}
return .{
.elements = try elements.toOwnedSlice(alloc),
.sx_to_llvm = sx_to_llvm,
.total_size = total_size,
};
}
// ── Functions (comptime-only, host-call bridged) ────────────────────────────
/// A welded `compiler` function: dispatched under the comptime interpreter to its