feat(lang): backtick raw-identifier escape + #import c foreign-name exemption [F0.6]

Reserved type-name spellings (s1, s2, u8, …) can now be used as value
identifiers two ways, resolving issue 0089:

1. Backtick raw identifier: a leading backtick (`s2) lexes to an
   .identifier token carrying a new Token.is_raw flag, with the backtick
   excluded from the text. A raw identifier is never type-classified — the
   parser skips Type.fromName for it — so it is always a value identifier.
   The flag threads to VarDecl.is_raw / Param.is_raw at binding sites, and
   the reserved-type-name check (UnknownTypeChecker) skips raw bindings.
   Because the token tag stays .identifier, the escape works in every
   position (local, global, param, field, fn name, struct member, later
   reference) with no per-site parser change.

2. #import c exemption: c_import.zig synthesizes foreign decls with
   Param.is_raw = true, so generated C param names that collide with
   reserved type names (s1, s2) import unedited.

A bare reserved-name binding in sx still errors (issue 0076 preserved):
the is_raw-gated skip only fires for backtick / foreign names, and a raw
binding's address-of / autoref lowering stays correct because every
occurrence is an .identifier, never a .type_expr.

Tests: examples/0151 (backtick, every position),
examples/1220 (foreign exemption, compiled+run), lexer unit tests.
1119 (bare-binding rejection) stays green. specs.md + readme.md updated.
This commit is contained in:
agra
2026-06-04 17:40:42 +03:00
parent 7911494809
commit 0dbdc530ba
19 changed files with 317 additions and 14 deletions

View File

@@ -0,0 +1,33 @@
// Backtick raw-identifier escape: a leading backtick makes the following
// identifier RAW — its text excludes the backtick and it is NEVER
// type-classified, so a reserved type-name spelling (`s2`, `u8`, …) can be
// used as a value identifier. Exercised in every position: global, local,
// param, struct field + member access, function name + call, and a later
// reference. A *bare* `s2` is still the reserved type name (see
// examples/1119), so the escape is the only way to spell these as values.
// Regression (issue 0089).
#import "modules/std.sx";
// Global named with a reserved type spelling.
`u8 := 100;
// Function whose name is a reserved type spelling, with a reserved-name param.
`s2 :: (`s1: s64) -> s64 { return `s1 * 2; }
Point :: struct {
`s2: f64; // field name is a reserved type spelling
`u16: s64;
}
main :: () {
// Local with a reserved type spelling; later reference resolves to it.
`s64 := 7;
`s64 = `s64 + 1;
print("local = {}\n", `s64);
print("global = {}\n", `u8);
print("fn = {}\n", `s2(21)); // calls the `s2 function
p := Point.{ `s2 = 2.5, `u16 = 9 };
print("field = {} {}\n", p.`s2, p.`u16);
}

View File

@@ -0,0 +1,9 @@
#include "1220-ffi-c-import-reserved-name-params.h"
int ffi_pick(int s1, int s2, int which) {
return which == 0 ? s1 : s2;
}
int ffi_sum(int s1, int s2) {
return s1 + s2;
}

View File

@@ -0,0 +1,5 @@
/* Foreign C declarations whose parameter names (`s1`, `s2`) collide with
sx's reserved signed-int type spellings. The `#import c` exemption must
accept these generated names unedited (issue 0089). */
int ffi_pick(int s1, int s2, int which);
int ffi_sum(int s1, int s2);

View File

@@ -0,0 +1,20 @@
// `#import c` foreign-name exemption: a C header's parameter names `s1`/`s2`
// collide with sx's reserved signed-int type spellings. Foreign decls are
// treated as RAW — their names are never type-classified nor reserved-checked
// — so the generated `#foreign` bindings import and call without hand-edits
// (no backticks needed). Before issue 0089 this errored with "'s1' is a
// reserved type name and cannot be used as an identifier".
// Regression (issue 0089).
#import "modules/std.sx";
#import c {
#include "1220-ffi-c-import-reserved-name-params.h";
#source "1220-ffi-c-import-reserved-name-params.c";
};
main :: () -> s32 {
print("pick(10,20,0) = {}\n", ffi_pick(10, 20, 0));
print("pick(10,20,1) = {}\n", ffi_pick(10, 20, 1));
print("sum(10,20) = {}\n", ffi_sum(10, 20));
0
}

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,4 @@
local = 8
global = 100
fn = 42
field = 2.500000 9

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1,3 @@
pick(10,20,0) = 10
pick(10,20,1) = 20
sum(10,20) = 30

View File

@@ -0,0 +1,91 @@
# 0089 — backtick raw-identifier escape + `#import c` foreign-name exemption from the reserved-type-name rule
> **✅ RESOLVED** (foundation step F0.6). Two mechanisms, per Agra's design ruling:
>
> 1. **Backtick raw identifier.** The lexer recognises a leading backtick
> (`` `s2 ``) and emits an `.identifier` token whose span excludes the backtick,
> carrying a new `Token.is_raw` flag ([src/lexer.zig], [src/token.zig]). A raw
> identifier is NEVER type-classified — the parser skips `Type.fromName` for it
> in expression position ([src/parser.zig] `parsePrimary`), so it is always a
> value identifier. The flag threads to `VarDecl.is_raw` / `Param.is_raw`
> ([src/ast.zig]) at the binding sites, and `UnknownTypeChecker` skips the
> reserved-name check for raw bindings ([src/ir/semantic_diagnostics.zig]).
> Because the token tag stays `.identifier`, the escape works in every position
> (local, global, param, field, function name, struct member, later reference)
> with no per-site parser change.
> 2. **`#import c` foreign-name exemption.** `c_import.zig` synthesizes foreign
> `#foreign` decls with `Param.is_raw = true`, so generated C param names that
> collide with reserved type names (`s1`, `s2`) import unedited.
>
> A *bare* reserved-name binding in sx still errors (issue 0076 preserved): the
> `is_raw`-gated skip only fires for backtick / foreign names. Regression tests:
> `examples/0151-types-backtick-raw-identifier.sx` (backtick, every position),
> `examples/1220-ffi-c-import-reserved-name-params.{sx,h,c}` (foreign exemption),
> `examples/1119-diagnostics-reserved-type-name-as-identifier.sx` (negative —
> bare binding still rejected). Backtick lexer unit tests in `src/lexer.zig`.
>
> The original report is preserved below.
---
## Symptom
Importing non-sx source whose names collide with sx reserved type names is
rejected. `library/modules/stb_truetype.sx` is a `#import c { ... }` block over a
vendored C header (`vendors/stb_truetype/stb_truetype.h`); C identifiers `s1`,
`s2` (which collide with sx's signed-int type keywords `s1`..`sN`) produce:
```
error: 's1' is a reserved type name and cannot be used as an identifier
error: 's2' is a reserved type name and cannot be used as an identifier
```
The user cannot hand-edit these — they are generated from the vendored C header.
Separately, sx-authored code has NO way to deliberately use a reserved-name-spelled
identifier even when it wants to.
## Root cause
The parser classifies any reserved-type-name spelling (`s2`, `u8`, `f64`, …) as a
`.type_expr` via `name_class.Type.fromName`, never as an `.identifier`. The F0.1 /
issue-0076 fix added `UnknownTypeChecker.checkBindingName`
(`src/ir/semantic_diagnostics.zig`) to reject a value binding / param spelled as
a reserved type name (the `.type_expr`-vs-`.identifier` mismatch otherwise breaks
address-of / autoref lowering). F0.1 deliberately extended this check to imported
declarations — which is what now fires on the C-imported `s1`/`s2`.
## Desired behaviour (Agra ruling)
External / imported source does NOT need to conform to sx naming standards. Two
mechanisms:
1. **Auto-exempt imports.** `#import c` (and other foreign) declarations are
treated as RAW identifiers: foreign names are never type-classified and never
reserved-checked, so generated bindings "just work" with zero user edits.
2. **Backtick raw-identifier for sx code.** A leading backtick makes the following
identifier raw — an identifier that is NEVER type-classified, so it bypasses the
reserved-name rule:
```sx
`s2 := 2.5; // OK — identifier "s2", distinct from the s2 signed-int type
s2 := 2.5; // ERROR — bare s2 is still the reserved type name
```
Prefix form (single leading backtick on the identifier). The raw identifier's
TEXT is `s2` (the backtick is not part of the name). A bare `s2` used as a TYPE
remains the signed-int type.
## Reproduction
sx-side (minimal):
```sx
#import "modules/std.sx";
main :: () {
`s2 := 2.5; // must compile: identifier s2 = 2.5
print("{}\n", `s2); // 2.5
}
```
Import-side: a `#import c` block over a header declaring `int s1, s2;` (or
`stb_truetype.sx`) must NOT emit the reserved-type-name error.

View File

@@ -105,6 +105,19 @@ y : s32 = 0; // explicit type
z : s32 = ---; // uninitialized
```
Builtin type names (`s2`, `u8`, `bool`, `string`, …) are reserved and can't be used
as bare value identifiers. A leading backtick escapes one into a raw identifier — its
text drops the backtick and it's never read as a type — so reserved spellings (and
keywords) work as ordinary names:
```sx
`s2 := 2.5; // value identifier "s2", distinct from the s2 type
print("{}\n", `s2); // 2.5
```
Foreign declarations from `#import c { … }` are exempt automatically: C names that
collide with reserved type names (e.g. `s1`, `s2`) import unedited.
### Structs
```sx

View File

@@ -13,6 +13,50 @@ Line comments start with `//` and extend to end of line.
- UPPER_SNAKE_CASE for constants: `SOME_INT`, `SOME_STR`
- PascalCase for types: `Foo`
#### Reserved type names
A spelling that names a builtin type — the arbitrary-width integers `s1`..`s64` /
`u1`..`u64`, plus `bool`, `string`, `void`, `f32`, `f64`, `usize`, `isize`, `Any`
is reserved. A bare value binding (`:=` / typed local / parameter name) spelled as
one of these is rejected: such a spelling parses as a *type*, not a value, so the
address-of / autoref paths would mis-lower it.
```sx
s2 := 2.5; // ERROR: 's2' is a reserved type name and cannot be used as an identifier
```
#### Backtick raw-identifier escape
A leading backtick makes the following identifier **raw**: its text excludes the
backtick and it is never type-classified, so a reserved-type-name spelling can be
used as an ordinary value identifier. The backtick is required at every occurrence
of that identifier (declaration and each reference); a *bare* `s2` is still the
signed-int type.
```sx
`s2 := 2.5; // OK — value identifier "s2", distinct from the s2 type
print("{}\n", `s2); // 2.5
```
The escape works in every identifier position — local, global, parameter, struct
field, function name, and a later reference:
```sx
`u8 := 100; // global
`s2 :: (`s1: s64) -> s64 { `s1 } // function name + parameter
P :: struct { `s2: f64; } // struct field
```
A backtick may also escape a keyword spelling (`` `for ``, `` `struct ``), yielding
an identifier with that text.
**`#import c` exemption.** Foreign declarations synthesized by an `#import c { … }`
block are treated as raw automatically: a generated C parameter or name that
collides with a reserved type name (e.g. `s1`, `s2`) imports unedited, with no
backticks and no reserved-name error. The exemption is scoped to the foreign decls —
it does not make a foreign `s2` usable as the sx `s2` type, nor relax the rule for
hand-written sx code.
### Literals
| Kind | Examples | Type |

View File

@@ -148,6 +148,10 @@ pub const Param = struct {
/// Optional default value expression. When the caller omits this
/// parameter, lowering substitutes this expression in its place.
default_expr: ?*Node = null,
/// True when the param name was written as a backtick raw identifier
/// (`` `s2 ``) or synthesized by a `#import c` foreign decl. A raw name is
/// exempt from the reserved-type-name binding check (issue 0089).
is_raw: bool = false,
};
pub const Block = struct {
@@ -303,6 +307,10 @@ pub const VarDecl = struct {
is_foreign: bool = false,
foreign_lib: ?[]const u8 = null,
foreign_name: ?[]const u8 = null,
/// True when the binding name was written as a backtick raw identifier
/// (`` `s2 := … ``). A raw name is exempt from the reserved-type-name
/// binding check (issue 0089).
is_raw: bool = false,
};
pub const Assignment = struct {

View File

@@ -127,6 +127,10 @@ pub fn processCImport(
.name = pname,
.name_span = .{ .start = 0, .end = 0 },
.type_expr = ptype_node,
// Foreign C param names (`s1`, `s2`, …) are RAW — exempt from
// the reserved-type-name binding check; generated bindings
// must import without hand-edits (issue 0089).
.is_raw = true,
});
}

View File

@@ -117,7 +117,7 @@ pub const UnknownTypeChecker = struct {
switch (node.data) {
// ── Binding-introducing nodes: check the name(s), then recurse. ──
.var_decl => |vd| {
self.checkBindingName(vd.name, vd.name_span);
if (!vd.is_raw) self.checkBindingName(vd.name, vd.name_span);
if (vd.value) |v| self.checkBindingNames(v);
},
.destructure_decl => |dd| {
@@ -133,7 +133,7 @@ pub const UnknownTypeChecker = struct {
self.checkBindingNames(lm.body);
},
.param => |p| {
self.checkBindingName(p.name, p.name_span);
if (!p.is_raw) self.checkBindingName(p.name, p.name_span);
if (p.default_expr) |de| self.checkBindingNames(de);
},
.if_expr => |ie| {
@@ -316,7 +316,9 @@ pub const UnknownTypeChecker = struct {
/// (a lambda default), so recurse into it.
fn checkParamNames(self: UnknownTypeChecker, params: []const ast.Param) void {
for (params) |p| {
self.checkBindingName(p.name, p.name_span);
// A backtick raw param (`` (`s2: T) ``) or a `#import c` foreign
// param is exempt from the reserved-type-name rule (issue 0089).
if (!p.is_raw) self.checkBindingName(p.name, p.name_span);
if (p.default_expr) |de| self.checkBindingNames(de);
}
}

View File

@@ -50,6 +50,24 @@ pub const Lexer = struct {
return self.lexString(start);
}
// Raw-identifier escape: `ident — a leading backtick forces the
// following identifier to be RAW (never type-classified, never
// reserved-checked). The emitted token's span excludes the backtick, so
// its text is the bare name, and a backticked keyword spelling
// (`` `s2 ``, `` `string ``) is still an `.identifier`, never a keyword.
if (c == '`') {
const id_start = start + 1;
if (id_start < self.source.len and isIdentStart(self.source[id_start])) {
self.index = id_start;
var tok = self.lexIdentifier(id_start);
tok.tag = .identifier;
tok.is_raw = true;
return tok;
}
self.index += 1;
return self.makeToken(.invalid, start, self.index);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
if (c == '#') {
@@ -485,6 +503,38 @@ test "lex type-like identifiers" {
}
}
test "lex backtick raw identifier" {
const source: [:0]const u8 = "`s2 `string `for";
var lex = Lexer.init(source);
// Each is an `.identifier` carrying `is_raw`, even a keyword spelling
// (`for`), with text that excludes the leading backtick.
const t1 = lex.next();
try std.testing.expectEqual(Tag.identifier, t1.tag);
try std.testing.expect(t1.is_raw);
try std.testing.expectEqualStrings("s2", t1.slice(source));
const t2 = lex.next();
try std.testing.expectEqual(Tag.identifier, t2.tag);
try std.testing.expect(t2.is_raw);
try std.testing.expectEqualStrings("string", t2.slice(source));
const t3 = lex.next();
try std.testing.expectEqual(Tag.identifier, t3.tag);
try std.testing.expect(t3.is_raw);
try std.testing.expectEqualStrings("for", t3.slice(source));
try std.testing.expectEqual(Tag.eof, lex.next().tag);
}
test "lex bare identifier is not raw" {
var lex = Lexer.init("s2");
const tok = lex.next();
try std.testing.expectEqual(Tag.identifier, tok.tag);
try std.testing.expect(!tok.is_raw);
}
test "lex lone backtick is invalid" {
var lex = Lexer.init("` 5");
try std.testing.expectEqual(Tag.invalid, lex.next().tag);
}
test "lex hash_run" {
var lex = Lexer.init("#run");
try std.testing.expectEqual(Tag.hash_run, lex.next().tag);

View File

@@ -146,6 +146,7 @@ pub const Parser = struct {
}
const name = self.tokenSlice(self.current);
const name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end };
const name_is_raw = self.current.is_raw;
self.advance();
// IDENT :: ...
@@ -158,7 +159,7 @@ pub const Parser = struct {
// IDENT : type = value; (typed variable)
if (self.current.tag == .colon) {
self.advance();
return self.parseTypedBinding(name, name_span, start);
return self.parseTypedBinding(name, name_span, start, name_is_raw);
}
// IDENT := value; (variable)
@@ -166,7 +167,7 @@ pub const Parser = struct {
self.advance();
const value = try self.parseExpr();
try self.expectSemicolonAfter(value);
return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value } });
return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value, .is_raw = name_is_raw } });
}
return self.fail("expected '::', ':=', or ':' after identifier");
@@ -383,7 +384,7 @@ pub const Parser = struct {
} });
}
fn parseTypedBinding(self: *Parser, name: []const u8, name_span: ast.Span, start_pos: u32) anyerror!*Node {
fn parseTypedBinding(self: *Parser, name: []const u8, name_span: ast.Span, start_pos: u32, name_is_raw: bool) anyerror!*Node {
// After `name :`
// Parse type
const type_node = try self.parseTypeExpr();
@@ -401,13 +402,13 @@ pub const Parser = struct {
self.advance();
const value = try self.parseExpr();
try self.expectSemicolonAfter(value);
return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = value } });
return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = value, .is_raw = name_is_raw } });
}
if (self.current.tag == .semicolon) {
// name : type; (default-initialized variable)
self.advance();
return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = null } });
return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = type_node, .value = null, .is_raw = name_is_raw } });
}
if (self.current.tag == .hash_foreign) {
@@ -433,6 +434,7 @@ pub const Parser = struct {
.is_foreign = true,
.foreign_lib = lib_ref,
.foreign_name = c_name,
.is_raw = name_is_raw,
} });
}
@@ -1778,11 +1780,12 @@ pub const Parser = struct {
}
const param_name = self.tokenSlice(self.current);
const param_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end };
const param_is_raw = self.current.is_raw;
self.advance();
// Optional type annotation: if no ':', infer type from context
if (self.current.tag != .colon) {
const inferred_node = try self.createNode(param_name_span.start, .{ .inferred_type = {} });
try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = inferred_node, .is_variadic = is_variadic, .is_comptime = is_ct_param });
try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = inferred_node, .is_variadic = is_variadic, .is_comptime = is_ct_param, .is_raw = param_is_raw });
continue;
}
self.advance(); // consume ':'
@@ -1822,7 +1825,7 @@ pub const Parser = struct {
.type_expr, .parameterized_type_expr => true,
else => false,
};
try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = param_type, .is_variadic = is_variadic, .is_comptime = is_comptime_param, .is_pack = is_pack, .default_expr = default_expr });
try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = param_type, .is_variadic = is_variadic, .is_comptime = is_comptime_param, .is_pack = is_pack, .default_expr = default_expr, .is_raw = param_is_raw });
}
for (params.items, 0..) |param, i| {
if (param.is_variadic and i != params.items.len - 1) {
@@ -2023,6 +2026,7 @@ pub const Parser = struct {
const start = self.current.loc.start;
const name = self.tokenSlice(self.current);
const name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end };
const name_is_raw = self.current.is_raw;
self.advance();
if (self.current.tag == .colon_colon) {
@@ -2033,11 +2037,11 @@ pub const Parser = struct {
self.advance();
const value = try self.parseExpr();
try self.expectSemicolonAfter(value);
return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value } });
return try self.createNode(start, .{ .var_decl = .{ .name = name, .name_span = name_span, .type_annotation = null, .value = value, .is_raw = name_is_raw } });
}
if (self.current.tag == .colon) {
self.advance();
return self.parseTypedBinding(name, name_span, start);
return self.parseTypedBinding(name, name_span, start, name_is_raw);
}
// Multi-target assignment: ident, expr, ... = expr, expr, ...;
@@ -2686,8 +2690,11 @@ pub const Parser = struct {
},
.identifier => {
const name = self.tokenSlice(self.current);
// Check if this identifier is a type name (e.g. s32, u8, s128)
if (Type.fromName(name) != null) {
// A backtick raw identifier (`` `s2 ``) is NEVER type-classified —
// it is always a value identifier, bypassing the reserved-type-name
// rule (issue 0089). Only a bare spelling is checked for a type name
// (e.g. s32, u8, s128).
if (!self.current.is_raw and Type.fromName(name) != null) {
self.advance();
return try self.createNode(start, .{ .type_expr = .{ .name = name } });
}

View File

@@ -210,6 +210,12 @@ pub const Tag = enum {
pub const Token = struct {
tag: Tag,
loc: Loc,
/// True when an `.identifier` was introduced by a leading backtick
/// (`` `s2 ``): a RAW identifier whose text excludes the backtick and which
/// the parser must NEVER type-classify (it bypasses the reserved-type-name
/// rule). `loc` already spans only the un-backticked name, so `slice` returns
/// the bare text.
is_raw: bool = false,
pub const Loc = struct {
start: u32,