From d515696e61268fbfdbb5cc7adc3bcf8091828074 Mon Sep 17 00:00:00 2001 From: agra Date: Fri, 5 Jun 2026 23:33:22 +0300 Subject: [PATCH] fix(lsp): identifier array dimension no longer panics the analyzer [0099] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Analyzer.resolveTypeNode` read the array `.length` node's `.int_literal` union field unconditionally. For a named-const dimension (`MAX :: 4; [MAX]u8`) that node is an `identifier`, so the access tripped Zig's checked-union panic and `sx lsp` aborted on didOpen. The main compiler was unaffected (it folds the dim through the IR). - New `arrayDimLength` helper switches on the dimension node tag: int_literal → value; identifier → a recorded module-const int value; anything else / out-of-u32-range → unknown. Never assumes a node shape. - `Type.ArrayTypeInfo.length` is now `?u32`; null is an explicit "editor couldn't fold this dimension" marker (rendered `[_]T`), never a fabricated concrete length. - New `const_int_values` registry records integer-literal consts at registration time for the identifier path. Regression: first `src/lsp/*.test.zig` (the minimal LSP harness), wired into the test graph via `src/root.zig`. Drives `analyzeDocument` over `[MAX]u8` (folds to 4, no panic), `[64]u8` (happy-path guard), and `[N]u8` (explicit unknown). Fail-before/pass-after verified. Sibling audit of the resolveTypeNode/fieldType family: the array dim was the only unchecked union-field access; all other arms recurse or tag-check first. Noted a non-crashing display gap in server.zig hover rendering for step B. --- issues/0099-lsp-array-dim-identifier-panic.md | 100 ++++++++++++++++++ src/lsp/document.test.zig | 86 +++++++++++++++ src/root.zig | 2 + src/sema.zig | 32 +++++- src/types.zig | 11 +- 5 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 issues/0099-lsp-array-dim-identifier-panic.md create mode 100644 src/lsp/document.test.zig diff --git a/issues/0099-lsp-array-dim-identifier-panic.md b/issues/0099-lsp-array-dim-identifier-panic.md new file mode 100644 index 0000000..85bc8bf --- /dev/null +++ b/issues/0099-lsp-array-dim-identifier-panic.md @@ -0,0 +1,100 @@ +# 0099 — LSP analyzer panics on an identifier array dimension (`[MAX]u8`) + +**RESOLVED.** The editor analyzer (`src/sema.zig`) read the array-dimension node's +`.int_literal` union field unconditionally. When the dimension is a **named const** +(`[MAX]u8`), the `.length` node is an `identifier`, so the access tripped Zig's +checked-union panic and `sx lsp` aborted (`SIGABRT`) the moment the file was opened +(didOpen → `analyzeDocument`). The main compiler (`sx run`) was never affected — it +resolves `[MAX]u8` through the IR's `foldArrayDim` machinery. + +## Symptom + +- **Observed:** opening a buffer with a named-const dimension in an editor crashes the + language server: `src/sema.zig` aborts with + `access of union field 'int_literal' while field 'identifier' is active`. +- **Expected:** the analyzer resolves `[MAX]u8` to length 4 (or, when it can't fold the + dimension, records an explicit "unknown" length) and never crashes. + +## Reproduction + +```sx +MAX :: 4; +Thing :: struct { buf: [MAX]u8; } +``` + +`sx run` compiles this fine; `sx lsp` aborts on didOpen. (The crash is reached via +`registerTopLevelDecl` → `fieldType` → `resolveTypeNode` while building the editor +index — not a diagnostic pass.) + +## Root cause + +`Analyzer.resolveTypeNode` in `src/sema.zig`, the `array_type_expr` arm: + +```zig +const length: u32 = @intCast(ate.length.data.int_literal.value); +``` + +`ate.length` is an arbitrary expression node. For a literal `[64]u8` it is +`int_literal`; for `[MAX]u8` it is `identifier`. The code assumed `int_literal` +unconditionally — a textbook unchecked-union-field access. `@intCast` of the +non-existent payload never even runs; the union-field read panics first. + +## Fix + +- New helper `Analyzer.arrayDimLength(len_node)` switches on the dimension node's tag: + `int_literal` → its value; `identifier` → the value recorded for that name in a new + `const_int_values` registry (populated when a top-level `const_decl` has an + `int_literal` value); anything else (unknown name, non-const expression) → `null`. A + value outside `u32` range also yields `null`. No node shape is ever assumed without a + tag check, so the path cannot panic. +- `Type.ArrayTypeInfo.length` changed from `u32` to `?u32`. `null` is the explicit + "editor couldn't fold this dimension" marker — never a fabricated concrete length + (this is hover/metadata, not codegen). `displayName` renders an unknown length as + `[_]T`. +- `src/sema.zig` `substType` passes the (now optional) length through unchanged; the + only other consumer of the editor `ArrayTypeInfo` is `displayName`, updated above. + +The named-const value is captured at registration time, so resolution shares the +analyzer's existing intra-pass forward-reference limitation: a const declared *after* the +struct that uses it resolves to the safe "unknown" length rather than its value. This +matches how the analyzer already treats a struct field that references a later-declared +struct type, and never crashes. + +## Sibling audit (`resolveTypeNode` / `fieldType` family) + +Audited every arm for the same unchecked `.data.` pattern. The array dimension +was the **only** crashing site: + +- `slice_type_expr`, `optional_type_expr`, `pointer_type_expr`, + `many_pointer_type_expr`, `function_type_expr` — all recurse through + `resolveTypeNode`, which dispatches on the node tag; no raw field access. +- `parameterized_type_expr` returns `.void_type`; `type_expr` / `identifier` read + `.name` / `.is_raw` only *after* `tn.data == .type_expr or tn.data == .identifier`. +- `fieldType`, `typeExprName`, `typeExprIsRaw`, `resolveTypeAnnotation`, + `resolveTypeRef` all tag-check (switch capture or explicit `==`) before any payload + read; `resolveTypeAnnotation`'s `array_type_expr` case delegates to `resolveTypeNode`, + so it is fixed transitively. + +**Non-crashing display gap noted (not fixed here — out of scope for A):** +`src/lsp/server.zig` (~line 2801) renders a struct's array field in hover/completion +detail directly off the AST. It already guards `if (ate.length.data == .int_literal)`, +so it does **not** crash, but it renders a named-const dimension as an empty `[]u8` +instead of `[4]u8`. Tracked for the broad LSP `.data` sweep (next step B), not addressed +in this crash fix. + +## Regression + +First `src/lsp/*.test.zig` — establishes the minimal LSP test harness +(`src/lsp/document.test.zig`), wired into the `zig build test` graph via +`src/root.zig`'s `lsp.document_tests` reference. Tests drive the real didOpen path +(`DocumentStore.analyzeDocument`) and inspect the editor index: + +- `[MAX]u8` (named const) → no panic; folded length is `4`, element `u8`. +- `[64]u8` (int literal) → length `64` (happy-path guard). +- `[N]u8` (undeclared name) → explicit unknown length (`null`), no panic, no fabricated + value. + +Verified fail-before / pass-after: with the pre-fix unconditional `.int_literal` +dereference restored, the identifier test aborts with the exact original panic; with the +tag-switching fix it passes. Full gate green: `zig build`, `zig build test` (incl. the +new LSP test), `bash tests/run_examples.sh` (453 passed, 0 failed). diff --git a/src/lsp/document.test.zig b/src/lsp/document.test.zig new file mode 100644 index 0000000..188b151 --- /dev/null +++ b/src/lsp/document.test.zig @@ -0,0 +1,86 @@ +const std = @import("std"); +const sx = struct { + pub const sema = @import("../sema.zig"); + pub const types = @import("../types.zig"); +}; +const doc_mod = @import("document.zig"); + +// Minimal LSP test harness (issue 0099): drive the editor analyzer through the +// real didOpen path (`DocumentStore.analyzeDocument`) and inspect the resulting +// editor index. This is the FIRST `src/lsp/*.test.zig`; it is pulled into the +// `zig build test` graph via the `_ = lsp.document;` reference in `src/root.zig` +// (its tests live one struct deeper than `refAllDecls` reaches). + +var g_test_threaded: ?std.Io.Threaded = null; +fn test_io() std.Io { + if (g_test_threaded == null) { + g_test_threaded = std.Io.Threaded.init(std.heap.page_allocator, .{}); + } + return g_test_threaded.?.io(); +} + +/// The editor `Type` recorded for field `field` of struct `type_name` in the +/// document's index, or null if the document/struct/field isn't present. +fn fieldTypeOf(doc: *doc_mod.Document, type_name: []const u8, field: []const u8) ?sx.types.Type { + const sema = doc.sema orelse return null; + const info = sema.struct_types.get(type_name) orelse return null; + for (info.field_names, info.field_types) |fname, fty| { + if (std.mem.eql(u8, fname, field)) return fty; + } + return null; +} + +test "analyzeDocument: identifier array dimension folds to the const value (issue 0099)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var store = doc_mod.DocumentStore.init(alloc, test_io(), &.{}); + const src: [:0]const u8 = + \\MAX :: 4; + \\Thing :: struct { buf: [MAX]u8; } + ; + const doc = try store.openOrUpdate("main.sx", src, 1); + // Pre-fix this aborts inside `resolveTypeNode`: the array `.length` node is + // an `identifier` (named const), but the code read `.int_literal` + // unconditionally. Reaching the assertions at all proves the crash is gone. + try store.analyzeDocument(doc); + + const buf_ty = fieldTypeOf(doc, "Thing", "buf") orelse return error.SkipZigTest; + try std.testing.expect(buf_ty == .array_type); + try std.testing.expectEqual(@as(?u32, 4), buf_ty.array_type.length); + try std.testing.expectEqualStrings("u8", buf_ty.array_type.element_name); +} + +test "analyzeDocument: int-literal array dimension still resolves to its length" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var store = doc_mod.DocumentStore.init(alloc, test_io(), &.{}); + const src: [:0]const u8 = "Buf :: struct { data: [64]u8; }"; + const doc = try store.openOrUpdate("main.sx", src, 1); + try store.analyzeDocument(doc); + + const data_ty = fieldTypeOf(doc, "Buf", "data") orelse return error.SkipZigTest; + try std.testing.expect(data_ty == .array_type); + try std.testing.expectEqual(@as(?u32, 64), data_ty.array_type.length); + try std.testing.expectEqualStrings("u8", data_ty.array_type.element_name); +} + +test "analyzeDocument: unresolvable array dimension records an explicit unknown length" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var store = doc_mod.DocumentStore.init(alloc, test_io(), &.{}); + // `N` is never declared as an integer const → the dimension is unknown. + // Must not panic and must not fabricate a concrete length. + const src: [:0]const u8 = "Holder :: struct { slots: [N]u8; }"; + const doc = try store.openOrUpdate("main.sx", src, 1); + try store.analyzeDocument(doc); + + const slots_ty = fieldTypeOf(doc, "Holder", "slots") orelse return error.SkipZigTest; + try std.testing.expect(slots_ty == .array_type); + try std.testing.expectEqual(@as(?u32, null), slots_ty.array_type.length); +} diff --git a/src/root.zig b/src/root.zig index 230c390..97b1137 100644 --- a/src/root.zig +++ b/src/root.zig @@ -22,6 +22,7 @@ pub const lsp = struct { pub const transport = @import("lsp/transport.zig"); pub const types = @import("lsp/types.zig"); pub const document = @import("lsp/document.zig"); + pub const document_tests = @import("lsp/document.test.zig"); }; test { @@ -35,6 +36,7 @@ test { // struct deeper, so reference them directly to pull in their tests. _ = lsp.server; _ = lsp.document; + _ = lsp.document_tests; _ = lsp.types; _ = lsp.transport; } diff --git a/src/sema.zig b/src/sema.zig index 9e2f7be..a081f4e 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -114,6 +114,13 @@ pub const Analyzer = struct { struct_types: std.StringHashMap(StructTypeInfo), enum_types: std.StringHashMap([]const []const u8), type_aliases: std.StringHashMap([]const u8), + /// Module-global integer consts, by bare name → value. Lets an array + /// dimension written as a named const (`MAX :: 4; [MAX]u8`) fold to a + /// concrete editor length instead of panicking on the `.int_literal` + /// union access (issue 0099). Populated at registration time, so it shares + /// the analyzer's existing intra-pass forward-reference limitation (a const + /// declared after the struct that uses it resolves to "unknown" length). + const_int_values: std.StringHashMap(i64), type_map: TypeMap, pub fn init(allocator: std.mem.Allocator) Analyzer { @@ -130,6 +137,7 @@ pub const Analyzer = struct { .struct_types = std.StringHashMap(StructTypeInfo).init(allocator), .enum_types = std.StringHashMap([]const []const u8).init(allocator), .type_aliases = std.StringHashMap([]const u8).init(allocator), + .const_int_values = std.StringHashMap(i64).init(allocator), .type_map = TypeMap.init(allocator), }; } @@ -217,6 +225,11 @@ pub const Analyzer = struct { const ty = self.resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value); const kind = classifyConstDecl(cd); try self.addSymbol(cd.name, kind, ty, node.span); + // Record integer-literal consts so a named array dimension + // (`MAX :: 4; [MAX]u8`) can fold to a concrete length (issue 0099). + if (cd.value.data == .int_literal) { + try self.const_int_values.put(cd.name, cd.value.data.int_literal.value); + } // Populate type_aliases registry if (cd.value.data == .type_expr) { try self.type_aliases.put(cd.name, cd.value.data.type_expr.name); @@ -354,6 +367,23 @@ pub const Analyzer = struct { try self.fn_signatures.put(key, .{ .param_types = &.{}, .return_type = ret }); } + /// Fold an array dimension node to a concrete editor length, or null + /// ("unknown") when it isn't a compile-time integer this index can resolve. + /// Metadata-only — NEVER panic on an unexpected node shape and never + /// fabricate a misleading concrete length (issue 0099). A literal dim is + /// taken directly; a bare identifier naming an integer const folds to its + /// recorded value; anything else (unknown name, non-const expression, + /// out-of-`u32`-range value) is unknown. + fn arrayDimLength(self: *Analyzer, len_node: *Node) ?u32 { + const v: i64 = switch (len_node.data) { + .int_literal => |lit| lit.value, + .identifier => |id| self.const_int_values.get(id.name) orelse return null, + else => return null, + }; + if (v < 0 or v > std.math.maxInt(u32)) return null; + return @intCast(v); + } + /// Resolve a type annotation node to an editor `Type` (metadata only — the /// compiler resolves type nodes to canonical `TypeId` in `src/ir/`). /// Handles primitives, type_expr, array_type_expr, parameterized_type_expr, @@ -364,7 +394,7 @@ pub const Analyzer = struct { // Array type: [N]T if (tn.data == .array_type_expr) { const ate = tn.data.array_type_expr; - const length: u32 = @intCast(ate.length.data.int_literal.value); + const length = self.arrayDimLength(ate.length); const elem_type = self.resolveTypeNode(ate.element_type); const elem_name = elem_type.displayName(self.allocator) catch return .void_type; return .{ .array_type = .{ .element_name = elem_name, .length = length, .is_raw = typeExprIsRaw(ate.element_type) } }; diff --git a/src/types.zig b/src/types.zig index 30adae6..5120671 100644 --- a/src/types.zig +++ b/src/types.zig @@ -76,7 +76,11 @@ pub const Type = union(enum) { pub const ArrayTypeInfo = struct { element_name: []const u8, - length: u32, + /// null = the dimension could not be folded to a compile-time integer + /// by the editor index (an identifier const it couldn't resolve, or a + /// non-const expression). Explicit "unknown" rather than a fabricated + /// concrete length — this is hover/metadata, not codegen (issue 0099). + length: ?u32, is_raw: bool, }; @@ -296,7 +300,10 @@ pub const Type = union(enum) { .slice_type => |info| return fmtAlloc(allocator, "[]{s}", .{info.element_name}), .pointer_type => |info| return fmtAlloc(allocator, "*{s}", .{info.pointee_name}), .many_pointer_type => |info| return fmtAlloc(allocator, "[*]{s}", .{info.element_name}), - .array_type => |info| return fmtAlloc(allocator, "[{d}]{s}", .{ info.length, info.element_name }), + .array_type => |info| { + if (info.length) |n| return fmtAlloc(allocator, "[{d}]{s}", .{ n, info.element_name }); + return fmtAlloc(allocator, "[_]{s}", .{info.element_name}); + }, .vector_type => |info| return fmtAlloc(allocator, "Vector({d},{s})", .{ info.length, info.element_name }), .function_type => |info| { var buf = std.ArrayList(u8).empty;