fix(diagnostics): reject reserved/builtin type names used as identifiers (issue 0076)

A value binding (local/global `var` or a parameter) spelled as a
reserved/builtin type name parses as a `.type_expr` rather than an
`.identifier` (parser.zig, via `Type.fromName`), so the address-of
family in lower.zig never saw a scoped local and mis-lowered it —
loading the aggregate and passing it by value to a `ptr` parameter
(LLVM verifier abort, or a silent `*self`-mutation-losing copy).

Add a declaration-site diagnostic in semantic_diagnostics.zig
(`UnknownTypeChecker.checkBindingName`): reject any parameter name or
`var` binding name (`:=` / typed-local / global forms) whose spelling
collides with a reserved type name. `isReservedTypeName` defers to the
parser's own classifier (`types.Type.fromName`) so the rejected set
never drifts from the set that would parse as a type — the named
builtins (bool/string/void/f32/f64/usize/isize/Any) and `[su]N` over
sx's 1-64 range. Bare value names (`s`, `self`, `index`) are untouched.
No lowering special-case; the `.identifier`-only address-of paths are
correct once type-shaped names can never be bound. The rejected
attempt-1 `bareVarName` approach was never landed.

Tests:
- 0125-types-type-named-var-rejected: `:=` form (s2) rejected
  (repurposed from the old test that asserted the now-illegal behavior).
- 1119-diagnostics-reserved-type-name-as-identifier: parameter (u8),
  typed-local (s64, bool), `:=` (string) forms rejected.
- 0135-types-self-streaming-nonreserved: positive — `*self` streaming
  with non-reserved names accumulates correctly via both call styles.
- 0904-optionals: renamed incidental locals s1/s2 -> filled/empty.
This commit is contained in:
agra
2026-06-03 19:00:39 +03:00
parent 4ab3608f77
commit f49a49cd07
18 changed files with 262 additions and 31 deletions

View File

@@ -0,0 +1,13 @@
// A local declared with a reserved/builtin type-name spelling (`s2` is the
// arbitrary-width `sN` integer type) is rejected at the declaration site.
// Previously such a name parsed as a `.type_expr`, so address-of sites
// mis-lowered it (load-by-value to a `ptr` param → LLVM verifier abort, or a
// silent `*self`-mutation-losing copy). Regression (issue 0076). Expected:
// error at the declaration; exit 1.
#import "modules/std.sx";
main :: () -> s32 {
s2 := 42;
print("s2: {}\n", s2);
return 0;
}

View File

@@ -1,19 +0,0 @@
#import "modules/std.sx";
#import "modules/math/math.sx";
#import "modules/compiler.sx";
#import "modules/test.sx";
pkg :: #import "modules/testpkg";
main :: () {
// ========================================================
// 21. TYPE-NAMED VARIABLES (s2, u8, etc.)
// ========================================================
print("=== 21. Type-Named Vars ===\n");
{
s2 := 42;
print("s2: {}\n", s2);
s2 = s2 + 1;
print("s2+1: {}\n", s2);
}
}

View File

@@ -0,0 +1,30 @@
// A `*self`-mutating streaming pattern with NON-reserved binding names
// (`hasher`, `ctx`) compiles and accumulates state correctly through BOTH
// call styles — explicit address-of `update(@h, ...)` and autoref
// `h.update(...)` — across multiple mutating calls. Proves the
// `.identifier`-only address-of paths in lowering are correct as-is, with no
// type-shaped-name special-case (companion to the issue-0076 rejection of
// type-named identifiers).
#import "modules/std.sx";
Hasher :: struct { total: s64 = 0; count: s64 = 0; }
update :: (self: *Hasher, n: s64) {
self.total += n;
self.count += 1;
}
main :: () -> s32 {
hasher := Hasher.{ total = 0, count = 0 };
update(@hasher, 10); // explicit address-of receiver
hasher.update(20); // autoref receiver
update(@hasher, 30);
hasher.update(40);
print("hasher total={} count={}\n", hasher.total, hasher.count);
ctx := Hasher.{ total = 100, count = 0 };
ctx.update(5);
update(@ctx, 7);
print("ctx total={} count={}\n", ctx.total, ctx.count);
return 0;
}

View File

@@ -23,9 +23,9 @@ S :: struct {
} }
main :: () { main :: () {
s1 := S.{ a = 42, b = "hi", c = true }; filled := S.{ a = 42, b = "hi", c = true };
print("{}\n", s1); print("{}\n", filled);
s2 := S.{ a = null, b = null, c = null }; empty := S.{ a = null, b = null, c = null };
print("{}\n", s2); print("{}\n", empty);
0; 0;
} }

View File

@@ -0,0 +1,16 @@
// A value binding (parameter or local `var`) spelled as a reserved/builtin
// type name is rejected at the declaration site, across every declaration
// form: a parameter name (`u8`), a typed local (`s64`, `bool`), and a `:=`
// local (`string`). Such a spelling parses as a `.type_expr` rather than an
// `.identifier`, so the address-of family in lowering mis-lowers it (issue
// 0076). Expected: one error per offending name; exit 1.
#import "modules/std.sx";
takes_u8 :: (u8: s32) -> s32 { return u8; }
main :: () -> s32 {
s64 : s32 = 3;
bool : bool = true;
string := "x";
return 0;
}

View File

@@ -0,0 +1 @@
1

View File

@@ -0,0 +1,5 @@
error: 's2' is a reserved type name and cannot be used as an identifier
--> /Users/agra/projects/sx/examples/0125-types-type-named-var-rejected.sx:10:5
|
10 | s2 := 42;
| ^^^^^^^^^

View File

@@ -1,3 +0,0 @@
=== 21. Type-Named Vars ===
s2: 42
s2+1: 43

View File

@@ -0,0 +1,2 @@
hasher total=100 count=4
ctx total=112 count=2

View File

@@ -0,0 +1,23 @@
error: 'u8' is a reserved type name and cannot be used as an identifier
--> /Users/agra/projects/sx/examples/1119-diagnostics-reserved-type-name-as-identifier.sx:9:14
|
9 | takes_u8 :: (u8: s32) -> s32 { return u8; }
| ^^
error: 's64' is a reserved type name and cannot be used as an identifier
--> /Users/agra/projects/sx/examples/1119-diagnostics-reserved-type-name-as-identifier.sx:12:5
|
12 | s64 : s32 = 3;
| ^^^^^^^^^^^^^^
error: 'bool' is a reserved type name and cannot be used as an identifier
--> /Users/agra/projects/sx/examples/1119-diagnostics-reserved-type-name-as-identifier.sx:13:5
|
13 | bool : bool = true;
| ^^^^^^^^^^^^^^^^^^^
error: 'string' is a reserved type name and cannot be used as an identifier
--> /Users/agra/projects/sx/examples/1119-diagnostics-reserved-type-name-as-identifier.sx:14:5
|
14 | string := "x";
| ^^^^^^^^^^^^^^

View File

@@ -0,0 +1,125 @@
# 0076 — builtin/reserved type name wrongly accepted as an identifier
> **Status: RESOLVED.**
>
> **Root cause:** the language accepted a value binding (local/global `var` or a
> parameter) spelled as a reserved/builtin type name. The parser turns such a
> spelling into a `.type_expr` rather than an `.identifier` (`parser.zig`, via
> `Type.fromName`), so the address-of family in `src/ir/lower.zig` never saw a
> scoped local and fell through to value lowering — loading the whole aggregate
> and passing it by value to a `ptr` parameter (LLVM verifier abort, or a silent
> `*self`-mutation-losing copy).
>
> **Fix:** a declaration-site diagnostic in the existing semantic pass
> `src/ir/semantic_diagnostics.zig` (`UnknownTypeChecker`). New
> `checkBindingName` rejects any parameter name or `var` binding name (local or
> global, `:=` / typed-local forms) whose spelling collides with a reserved type
> name; `isReservedTypeName` defers to the parser's own classifier
> (`types.Type.fromName`) so the rejected set never drifts from the set that
> would parse as a type — the named builtins (`bool`, `string`, `void`, `f32`,
> `f64`, `usize`, `isize`, `Any`) and `[su]N` over sx's 164 range. Bare value
> names (`s`, `self`, `index`) are untouched. No lowering special-case is added;
> the `.identifier`-only address-of paths are correct once type-shaped names can
> never be bound. The rejected `bareVarName` approach was never landed.
>
> **Regression tests:**
> - `examples/0125-types-type-named-var-rejected.sx` — `:=` form (`s2`) rejected.
> - `examples/1119-diagnostics-reserved-type-name-as-identifier.sx` — parameter
> (`u8`), typed-local (`s64`, `bool`), and `:=` (`string`) forms rejected.
> - `examples/0135-types-self-streaming-nonreserved.sx` — positive: `*self`
> streaming with non-reserved names (`hasher`, `ctx`) accumulates correctly via
> both `update(@h, …)` and `h.update(…)`.
>
> Pre-existing example `examples/0904-...` declared locals `s1`/`s2` (incidental
> names); renamed to `filled`/`empty`. Scope: main-file decls only, matching the
> pass's existing trusted-imports convention.
## Symptom (how it first surfaced)
A local variable whose name is lexically a type — e.g. `s2` (the `sN`
arbitrary-width signed-int syntax: `Type.fromName("s2")``s(2)`), or `u8`,
`s64`, etc. — is accepted as a variable. Because such a name parses as a
`.type_expr` (not `.identifier`), the address-of family of lowering sites
(`@s2`, the autoref `s2.update(...)` receiver, a bare `f(s2)` at a `*T` param,
global function-pointer args) does NOT recognize it as a scoped local and falls
through to value lowering — loading the whole aggregate and passing it **by
value** to a `ptr` parameter:
```
LLVM verification failed: Call parameter type does not match function signature!
call void @update(ptr @__sx_default_context,
{ [8 x i64], [64 x i8], i64, i64 } %load, ...)
```
For some struct shapes it compiles but silently passes a **copy** (callee
`*self` mutations lost). A non-type-shaped name (`hasher`, `ctx`) never triggers
any of this — the `.identifier` paths already work correctly.
## Root cause
The language is **accepting reserved/builtin type names as identifiers** in the
first place. `sN`/`uN` (arbitrary-width ints) and the named builtins
(`bool`, `string`, `void`, `f32`, `f64`, `s8`/`s16`/`s32`/`s64`,
`u8`/`u16`/`u32`/`u64`, …) are reserved type names; declaring a variable with
such a name is meaningless and produces the mis-lowering above. Patching each
address-of site to tolerate the name (the rejected `bareVarName` approach) is
whack-a-mole — there is always another site, and it entrenches a name that
should never have been allowed.
## Proper fix (the required direction)
Emit a **diagnostic error** when an identifier is declared with a name that
collides with a **builtin/reserved type name** — including the arbitrary-width
`[su][0-9]+` (`sN`/`uN`) family AND the named builtins (`bool`, `string`,
`void`, `f32`, `f64`, the fixed-width int types, etc.). Scope ruling (Agra):
**all builtin/reserved type names** are rejected as identifiers. (User-defined
struct/type-name shadowing, if intentionally supported elsewhere, is out of
scope for this issue — this is specifically about builtin/reserved type names.)
Diagnostic at the declaration site, e.g.:
`error: 'u8' is a reserved type name and cannot be used as an identifier`
with the declaration's span.
Suspected area: name binding / declaration handling — where a `:=` / typed
local / parameter name is introduced. Reject the name there, before it ever
reaches lowering. Do NOT add lowering special-cases for type-shaped names; the
`.identifier`-only checks at the address-of sites are then correct as-is (no
type-shaped name can reach them).
## Reproduction
```sx
#import "modules/std.sx";
Sha256 :: struct { h:[8]u64; block:[64]u8; block_len:s64=0; total_len:u64=0; }
init :: () -> Sha256 { s:Sha256=---; s.block_len=0; s.total_len=0; s }
update :: (self:*Sha256, data:string) { self.total_len += data.len; }
main :: () -> s32 { s2 := init(); update(@s2, "."); print("total_len={}\n", s2.total_len); return 0; }
```
`./zig-out/bin/sx run <file>` today → LLVM verifier abort.
**Expected after fix:** a clean compile-time diagnostic that `s2` is a reserved
type name and cannot be an identifier (exit non-zero, readable error — NOT an
LLVM abort, NOT a silent copy). The same program with a non-reserved name
(`hasher := init(); update(@hasher, ".")`) must compile and print `total_len=1`.
## Verification
1. Pinned diagnostics test(s) asserting the error for representative reserved
names used as identifiers: `s2`, `u8`, `s64`, `bool`, `string` (declaration
forms: `:=`, typed local, and a parameter name). Capture the diagnostic text
in `expected/`.
2. A positive test: the same `*self` streaming pattern with NON-reserved names
(`hasher`, `ctx`) compiles and accumulates state correctly via both
`update(@h, ...)` and `h.update(...)` — proving the `.identifier` paths are
correct and no lowering special-case is needed.
3. `zig build && zig build test && bash tests/run_examples.sh` all green. If any
existing example/test declares a variable with a reserved type name, it is now
illegal — fix the test's variable name (do NOT weaken the diagnostic). Report
how many such sites existed.
## Provenance
Discovered by the `distribution` flow (P1.2 pure-sx SHA-256), whose minimal repro
happened to name a local `s2`. Real SHA-256 code with names like `hasher`/`ctx`
is unaffected on the current compiler — so the P1.2 "blocker" was a
naming artifact, and this issue is really a missing-diagnostic correctness bug.

View File

@@ -2,6 +2,7 @@ const std = @import("std");
const ast = @import("../ast.zig"); const ast = @import("../ast.zig");
const errors = @import("../errors.zig"); const errors = @import("../errors.zig");
const types = @import("types.zig"); const types = @import("types.zig");
const name_class = @import("../types.zig");
const program_index_mod = @import("program_index.zig"); const program_index_mod = @import("program_index.zig");
const type_resolver = @import("type_resolver.zig"); const type_resolver = @import("type_resolver.zig");
@@ -10,10 +11,17 @@ const TypeTable = types.TypeTable;
const ProgramIndex = program_index_mod.ProgramIndex; const ProgramIndex = program_index_mod.ProgramIndex;
const TypeResolver = type_resolver.TypeResolver; const TypeResolver = type_resolver.TypeResolver;
/// Unknown-type diagnostic pass (issue 0064), extracted from `Lowering` /// Declaration-name / type-position diagnostic pass. Two checks, both over the
/// (architecture phase A2.4). Rejects an identifier used in a type position /// main file's decls, before lowering:
/// that names no declared type, primitive, or in-scope generic type parameter. ///
/// Without it, `TypeResolver.resolveNamed`'s empty-struct-stub fallback silently /// 1. Unknown-type diagnostic (issue 0064), extracted from `Lowering`
/// (architecture phase A2.4): an identifier used in a type position that
/// names no declared type, primitive, or in-scope generic type parameter.
/// 2. Reserved-type-name binding (issue 0076): a value binding (local/global
/// `var` or a parameter) spelled as a reserved/builtin type name. See
/// `isReservedTypeName`.
///
/// Without (1)'s check, `TypeResolver.resolveNamed`'s empty-struct-stub fallback silently
/// fabricates a 0-field struct named after the unknown identifier — so a value /// fabricates a 0-field struct named after the unknown identifier — so a value
/// param mistakenly used as a type (`(T: Type, …) -> T`, missing the `$`) or a /// param mistakenly used as a type (`(T: Type, …) -> T`, missing the `$`) or a
/// typo'd type name compiles and runs, rendering as `T{}`. Main-file decls only; /// typo'd type name compiles and runs, rendering as `T{}`. Main-file decls only;
@@ -46,6 +54,7 @@ pub const UnknownTypeChecker = struct {
switch (decl.data) { switch (decl.data) {
.fn_decl => self.checkFnSignatureTypes(&decl.data.fn_decl, &declared), .fn_decl => self.checkFnSignatureTypes(&decl.data.fn_decl, &declared),
.struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared), .struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared),
.var_decl => |vd| self.checkBindingName(vd.name, decl.span),
.const_decl => |cd| switch (cd.value.data) { .const_decl => |cd| switch (cd.value.data) {
.fn_decl => self.checkFnSignatureTypes(&cd.value.data.fn_decl, &declared), .fn_decl => self.checkFnSignatureTypes(&cd.value.data.fn_decl, &declared),
.struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared), .struct_decl => |sd| self.checkStructFieldTypes(&sd, &declared),
@@ -224,6 +233,7 @@ pub const UnknownTypeChecker = struct {
} }
} }
} }
for (params) |p| self.checkBindingName(p.name, p.name_span);
for (params) |p| self.checkTypeNodeForUnknown(p.type_expr, declared, in_scope.items, type_vals.items); for (params) |p| self.checkTypeNodeForUnknown(p.type_expr, declared, in_scope.items, type_vals.items);
if (return_type) |rt| self.checkTypeNodeForUnknown(rt, declared, in_scope.items, type_vals.items); if (return_type) |rt| self.checkTypeNodeForUnknown(rt, declared, in_scope.items, type_vals.items);
self.walkBodyTypes(body, declared, in_scope, type_vals); self.walkBodyTypes(body, declared, in_scope, type_vals);
@@ -275,6 +285,7 @@ pub const UnknownTypeChecker = struct {
.multi_assign => |ma| for (ma.values) |v| self.walkBodyTypes(v, declared, in_scope, type_vals), .multi_assign => |ma| for (ma.values) |v| self.walkBodyTypes(v, declared, in_scope, type_vals),
.destructure_decl => |dd| self.walkBodyTypes(dd.value, declared, in_scope, type_vals), .destructure_decl => |dd| self.walkBodyTypes(dd.value, declared, in_scope, type_vals),
.var_decl => |vd| { .var_decl => |vd| {
self.checkBindingName(vd.name, node.span);
if (vd.type_annotation) |ta| self.checkTypeNodeForUnknown(ta, declared, in_scope.items, type_vals.items); if (vd.type_annotation) |ta| self.checkTypeNodeForUnknown(ta, declared, in_scope.items, type_vals.items);
if (vd.value) |v| self.walkBodyTypes(v, declared, in_scope, type_vals); if (vd.value) |v| self.walkBodyTypes(v, declared, in_scope, type_vals);
}, },
@@ -416,8 +427,35 @@ pub const UnknownTypeChecker = struct {
} }
self.diagnostics.addFmt(.err, span, "unknown type '{s}'", .{name}); self.diagnostics.addFmt(.err, span, "unknown type '{s}'", .{name});
} }
/// Reject a value binding (local/global `var` or a parameter) spelled as a
/// reserved/builtin type name (issue 0076). The parser turns such a spelling
/// into a `.type_expr` rather than an `.identifier` (`parser.zig`, via
/// `name_class.Type.fromName`), so the address-of family in `lower.zig`
/// (`@x`, the autoref `x.method(...)` receiver, a bare `f(x)` at a `*T`
/// param) never sees a scoped local and falls through to value lowering —
/// loading the whole aggregate and passing it by value to a `ptr` parameter
/// (LLVM verifier abort, or a silent mutation-losing copy). Rejecting the
/// name here, before lowering, keeps the `.identifier`-only address-of paths
/// correct without any lowering special-case.
fn checkBindingName(self: UnknownTypeChecker, name: []const u8, span: ?ast.Span) void {
if (isReservedTypeName(name))
self.diagnostics.addFmt(.err, span, "'{s}' is a reserved type name and cannot be used as an identifier", .{name});
}
}; };
/// A binding name collides with a reserved/builtin type name exactly when the
/// parser would classify the same spelling as a type. `name_class.Type.fromName`
/// is that classifier (`parser.zig` uses it to choose `.type_expr` over
/// `.identifier`), so deferring to it ties the rejection to the parser's set and
/// keeps the two from drifting: the named builtins (`bool`, `string`, `void`,
/// `f32`, `f64`, `usize`, `isize`, `Any`) and the `[su]N` arbitrary-width ints
/// over sx's supported 164 range. A bare value name (`s`, `buf`, `index`,
/// `self`) is not a type spelling and is left alone.
fn isReservedTypeName(name: []const u8) bool {
return name_class.Type.fromName(name) != null;
}
fn isBuiltinTypeName(name: []const u8) bool { fn isBuiltinTypeName(name: []const u8) bool {
if (TypeResolver.resolvePrimitive(name) != null) return true; if (TypeResolver.resolvePrimitive(name) != null) return true;
// Arbitrary-width integers / floats: u1, s7, u128, f16, f80, … // Arbitrary-width integers / floats: u1, s7, u128, f16, f80, …