Files
sx/src/ast.zig
agra 0dbdc530ba feat(lang): backtick raw-identifier escape + #import c foreign-name exemption [F0.6]
Reserved type-name spellings (s1, s2, u8, …) can now be used as value
identifiers two ways, resolving issue 0089:

1. Backtick raw identifier: a leading backtick (`s2) lexes to an
   .identifier token carrying a new Token.is_raw flag, with the backtick
   excluded from the text. A raw identifier is never type-classified — the
   parser skips Type.fromName for it — so it is always a value identifier.
   The flag threads to VarDecl.is_raw / Param.is_raw at binding sites, and
   the reserved-type-name check (UnknownTypeChecker) skips raw bindings.
   Because the token tag stays .identifier, the escape works in every
   position (local, global, param, field, fn name, struct member, later
   reference) with no per-site parser change.

2. #import c exemption: c_import.zig synthesizes foreign decls with
   Param.is_raw = true, so generated C param names that collide with
   reserved type names (s1, s2) import unedited.

A bare reserved-name binding in sx still errors (issue 0076 preserved):
the is_raw-gated skip only fires for backtick / foreign names, and a raw
binding's address-of / autoref lowering stays correct because every
occurrence is an .identifier, never a .type_expr.

Tests: examples/0151 (backtick, every position),
examples/1220 (foreign exemption, compiled+run), lexer unit tests.
1119 (bare-binding rejection) stays green. specs.md + readme.md updated.
2026-06-04 17:40:42 +03:00

742 lines
23 KiB
Zig

const std = @import("std");
pub const Span = struct {
start: u32,
end: u32,
};
pub const Node = struct {
span: Span,
data: Data,
source_file: ?[]const u8 = null,
pub const Data = union(enum) {
root: Root,
fn_decl: FnDecl,
block: Block,
int_literal: IntLiteral,
float_literal: FloatLiteral,
bool_literal: BoolLiteral,
string_literal: StringLiteral,
identifier: Identifier,
enum_literal: EnumLiteral,
binary_op: BinaryOp,
chained_comparison: ChainedComparison,
unary_op: UnaryOp,
call: Call,
field_access: FieldAccess,
if_expr: IfExpr,
match_expr: MatchExpr,
match_arm: MatchArm,
const_decl: ConstDecl,
var_decl: VarDecl,
assignment: Assignment,
multi_assign: MultiAssign,
destructure_decl: DestructureDecl,
enum_decl: EnumDecl,
struct_decl: StructDecl,
struct_literal: StructLiteral,
union_decl: UnionDecl,
error_set_decl: ErrorSetDecl,
lambda: Lambda,
type_expr: TypeExpr,
param: Param,
defer_stmt: DeferStmt,
push_stmt: PushStmt,
comptime_expr: ComptimeExpr,
insert_expr: InsertExpr,
return_stmt: ReturnStmt,
import_decl: ImportDecl,
namespace_decl: NamespaceDecl,
array_type_expr: ArrayTypeExpr,
slice_type_expr: SliceTypeExpr,
array_literal: ArrayLiteral,
parameterized_type_expr: ParameterizedTypeExpr,
index_expr: IndexExpr,
slice_expr: SliceExpr,
pointer_type_expr: PointerTypeExpr,
many_pointer_type_expr: ManyPointerTypeExpr,
optional_type_expr: OptionalTypeExpr,
error_type_expr: ErrorTypeExpr,
raise_stmt: RaiseStmt,
try_expr: TryExpr,
catch_expr: CatchExpr,
onfail_stmt: OnFailStmt,
/// `#caller_location` — a marker that, as a parameter default, resolves
/// to a `Source_Location` of the call site (ERR E4.1b). The node's
/// `span`/`source_file` carry the location (rewritten to the call site
/// during default expansion). No payload.
caller_location: void,
pack_index_type_expr: PackIndexTypeExpr,
comptime_pack_ref: ComptimePackRef,
force_unwrap: ForceUnwrap,
null_coalesce: NullCoalesce,
deref_expr: DerefExpr,
null_literal: void,
while_expr: WhileExpr,
for_expr: ForExpr,
spread_expr: SpreadExpr,
break_expr: void,
continue_expr: void,
undef_literal: void,
inferred_type: void,
builtin_expr: void,
compiler_expr: void,
foreign_expr: ForeignExpr,
library_decl: LibraryDecl,
framework_decl: FrameworkDecl,
function_type_expr: FunctionTypeExpr,
closure_type_expr: ClosureTypeExpr,
tuple_type_expr: TupleTypeExpr,
tuple_literal: TupleLiteral,
ufcs_alias: UfcsAlias,
c_import_decl: CImportDecl,
protocol_decl: ProtocolDecl,
impl_block: ImplBlock,
ffi_intrinsic_call: FfiIntrinsicCall,
foreign_class_decl: ForeignClassDecl,
jni_env_block: JniEnvBlock,
pub fn declName(self: Data) ?[]const u8 {
return switch (self) {
.fn_decl => |d| d.name,
.const_decl => |d| d.name,
.var_decl => |d| d.name,
.enum_decl => |d| d.name,
.struct_decl => |d| d.name,
.union_decl => |d| d.name,
.error_set_decl => |d| d.name,
.namespace_decl => |d| d.name,
.ufcs_alias => |d| d.name,
.c_import_decl => |d| d.name,
.protocol_decl => |d| d.name,
.foreign_class_decl => |d| d.name,
else => null,
};
}
};
};
pub const Root = struct {
decls: []const *Node,
};
pub const CallingConvention = enum { default, c };
pub const FnDecl = struct {
name: []const u8,
params: []const Param,
return_type: ?*Node,
body: *Node,
type_params: []const StructTypeParam = &.{},
is_arrow: bool = false,
call_conv: CallingConvention = .default,
};
pub const Param = struct {
name: []const u8,
name_span: Span,
type_expr: *Node,
is_variadic: bool = false,
is_comptime: bool = false,
/// Heterogeneous protocol-constrained variadic pack: `..xs: Protocol`
/// (no `[]`, no `$`). The annotation is a bare protocol the trailing args
/// each conform to with their own type-arg — distinct from a slice variadic
/// (`..xs: []T`, `is_pack == false`) and from the comptime type-pack
/// (`..$xs`, `is_comptime == true`). Always implies `is_variadic`.
is_pack: bool = false,
/// Optional default value expression. When the caller omits this
/// parameter, lowering substitutes this expression in its place.
default_expr: ?*Node = null,
/// True when the param name was written as a backtick raw identifier
/// (`` `s2 ``) or synthesized by a `#import c` foreign decl. A raw name is
/// exempt from the reserved-type-name binding check (issue 0089).
is_raw: bool = false,
};
pub const Block = struct {
stmts: []const *Node,
/// True when the block's last statement is its value — i.e. a trailing
/// expression with NO `;`. A trailing `;` (or a non-expression last
/// statement) discards the value and leaves the block void. Match-arm and
/// else-arm bodies are built with this forced true (the arm `;` is an arm
/// terminator, not a value-discard).
produces_value: bool = false,
/// When `produces_value` is false *because* the last statement was an
/// expression terminated by `;` (as opposed to a decl/return/empty block),
/// the span of that discarding `;`. Lets a value-position diagnostic point
/// precisely at the semicolon to drop. Null otherwise.
discarded_semi: ?Span = null,
};
pub const IntLiteral = struct {
value: i64,
};
pub const FloatLiteral = struct {
value: f64,
};
pub const BoolLiteral = struct {
value: bool,
};
pub const StringLiteral = struct {
raw: []const u8,
is_raw: bool = false,
};
pub const Identifier = struct {
name: []const u8,
};
pub const EnumLiteral = struct {
name: []const u8, // without the leading dot
};
pub const BinaryOp = struct {
op: Op,
lhs: *Node,
rhs: *Node,
pub const Op = enum {
add,
sub,
mul,
div,
mod,
eq,
neq,
lt,
lte,
gt,
gte,
and_op,
or_op,
bit_and,
bit_or,
bit_xor,
shl,
shr,
in_op,
};
};
pub const ChainedComparison = struct {
operands: []const *Node,
ops: []const BinaryOp.Op,
};
pub const UnaryOp = struct {
op: Op,
operand: *Node,
pub const Op = enum {
negate,
not,
bit_not,
xx,
address_of,
};
};
pub const Call = struct {
callee: *Node,
args: []const *Node,
};
/// `#objc_call(T)(recv, "sel:", args...)`,
/// `#jni_call(T)(env, target, "name", "(Sig)R", args...)`,
/// `#jni_static_call(T)(class, "name", "(Sig)R", args...)`.
/// The return-type T sits in the first parens; the actual call args
/// follow in the second parens. Codegen branches on `kind` to pick
/// the lowering (objc_msgSend / CallXxxMethod / CallStaticXxxMethod).
pub const FfiIntrinsicKind = enum {
objc_call,
jni_call,
jni_static_call,
};
pub const FfiIntrinsicCall = struct {
kind: FfiIntrinsicKind,
return_type: *Node,
args: []const *Node,
};
pub const FieldAccess = struct {
object: *Node,
field: []const u8,
is_optional: bool = false,
};
pub const IfExpr = struct {
condition: *Node,
then_branch: *Node,
else_branch: ?*Node,
is_inline: bool, // true for `if cond then a else b`
is_comptime: bool = false, // true for `inline if` — compile-time branch elimination
binding_name: ?[]const u8 = null, // for `if val := expr { ... }` optional binding
binding_span: ?Span = null, // span of `binding_name` (set iff `binding_name` is)
};
pub const MatchExpr = struct {
subject: *Node,
arms: []const MatchArm,
is_comptime: bool = false,
};
pub const MatchArm = struct {
pattern: ?*Node, // null = else (default) arm
body: *Node,
is_break: bool,
capture: ?[]const u8 = null, // payload binding name: case .variant: (name) { ... }
capture_span: ?Span = null, // span of `capture` (set iff `capture` is)
};
pub const ConstDecl = struct {
name: []const u8,
type_annotation: ?*Node,
value: *Node,
};
pub const VarDecl = struct {
name: []const u8,
name_span: Span,
type_annotation: ?*Node,
value: ?*Node,
is_foreign: bool = false,
foreign_lib: ?[]const u8 = null,
foreign_name: ?[]const u8 = null,
/// True when the binding name was written as a backtick raw identifier
/// (`` `s2 := … ``). A raw name is exempt from the reserved-type-name
/// binding check (issue 0089).
is_raw: bool = false,
};
pub const Assignment = struct {
target: *Node,
op: Op,
value: *Node,
pub const Op = enum {
assign,
add_assign,
sub_assign,
mul_assign,
div_assign,
mod_assign,
and_assign,
or_assign,
xor_assign,
shl_assign,
shr_assign,
};
};
pub const MultiAssign = struct {
targets: []const *Node,
values: []const *Node,
};
pub const DestructureDecl = struct {
names: []const []const u8,
name_spans: []const Span, // one per entry in `names`, same order
value: *Node,
};
pub const EnumDecl = struct {
name: []const u8,
variant_names: []const []const u8,
variant_types: []const ?*Node = &.{}, // null entries = no payload; empty = payload-less enum
is_flags: bool = false,
variant_values: []const ?*Node = &.{}, // explicit value per variant (null = auto), empty = all auto
backing_type: ?*Node = null, // optional backing type: enum u8 { ... }
};
pub const UnionDecl = struct {
name: []const u8,
field_names: []const []const u8,
field_types: []const *Node,
};
/// `Foo :: error { TagA, TagB }` — a named error set. Tags are bare
/// identifiers (no payload, no explicit value), unlike enum variants.
pub const ErrorSetDecl = struct {
name: []const u8,
tag_names: []const []const u8,
};
pub const StructTypeParam = struct {
name: []const u8, // e.g. "N" or "T" (without $)
constraint: *Node, // type_expr: "u32" for value param, "Type" for type param
protocol_constraints: []const []const u8 = &.{}, // e.g. ["Eq", "Hashable"] for $T/Eq/Hashable
/// `..$Ts: []Type` — a pack type-param binding the remaining type args as a
/// sequence (must be last). Field types reference it via `(..$Ts)` etc.
is_variadic: bool = false,
};
pub const UsingEntry = struct {
insert_index: u32, // position in field_names where used fields are spliced
type_name: []const u8, // struct type to inline
};
pub const StructDecl = struct {
name: []const u8,
field_names: []const []const u8,
field_types: []const *Node, // type_expr nodes
field_defaults: []const ?*Node, // default value per field, null if none
type_params: []const StructTypeParam = &.{},
using_entries: []const UsingEntry = &.{},
methods: []const *Node = &.{}, // fn_decl nodes for struct methods
constants: []const *Node = &.{}, // const_decl nodes for struct-level constants
};
pub const StructFieldInit = struct {
name: ?[]const u8, // null for positional, non-null for named/shorthand
value: *Node,
};
pub const StructLiteral = struct {
struct_name: ?[]const u8, // null for anonymous `.{ ... }`
type_expr: ?*Node = null, // for GenericType(args).{ ... }
field_inits: []const StructFieldInit,
init_block: ?*Node = null, // optional `{ stmts }` block after struct literal
};
pub const Lambda = struct {
params: []const Param,
return_type: ?*Node,
body: *Node,
type_params: []const StructTypeParam = &.{},
call_conv: CallingConvention = .default,
};
pub const TypeExpr = struct {
name: []const u8,
is_generic: bool = false,
protocol_constraints: []const []const u8 = &.{}, // e.g. ["Eq", "Hashable"] for $T/Eq/Hashable
};
/// `$<pack_name>[<index>]` in type position. Resolves to the i-th
/// element type of the active pack binding. Step 3 of the variadic
/// heterogeneous type packs feature — used in trampoline bodies,
/// generic conversions, struct fields parameterised over the pack.
pub const PackIndexTypeExpr = struct {
pack_name: []const u8,
index: u32,
};
/// `$<pack_name>` (no indexing) in expression position. Evaluates
/// to a comptime `[]Type` slice — the WHOLE pack as data. Step 4
/// final slice: lets builder fns walk the pack types and emit
/// per-position code (the shape step 5's generic Into(Block) needs
/// for its trampoline body).
pub const ComptimePackRef = struct {
pack_name: []const u8,
};
pub const DeferStmt = struct {
expr: *Node,
};
// ── Error handling (ERR stream) ──────────────────────────────────────────
/// `raise EXPR;` — terminates control flow like `return`, populating the
/// error channel. `tag` is a tag-typed expression: `error.X` (a field
/// access on the `error` keyword) or a tag-bound variable (`raise e`).
pub const RaiseStmt = struct {
tag: *Node,
};
/// `try X` — a failable attempt. Unary prefix, binds tighter than any
/// binary operator. Sema (E1.4) rejects a non-failable operand.
pub const TryExpr = struct {
operand: *Node,
};
/// `X catch [e] BODY` — inline failure handler (postfix). The binding is a
/// bare name (no parens) and optional. Body is a block, a bare expression,
/// or — when `is_match_body` — a `match_expr` from the `== { case ... }`
/// sugar (whose subject is the binding).
pub const CatchExpr = struct {
operand: *Node,
binding: ?[]const u8 = null,
binding_span: ?Span = null, // span of `binding` (set iff `binding` is)
body: *Node,
is_match_body: bool = false,
};
/// `onfail [e] BODY` — cleanup run on error-exit of the enclosing block.
/// Binding optional (bare name). Body is a block (`onfail [e] { ... }`) or
/// a bare expression (`onfail EXPR;`).
pub const OnFailStmt = struct {
binding: ?[]const u8 = null,
binding_span: ?Span = null, // span of `binding` (set iff `binding` is)
body: *Node,
};
pub const PushStmt = struct {
context_expr: *Node,
body: *Node,
};
pub const ComptimeExpr = struct {
expr: *Node,
};
pub const InsertExpr = struct {
expr: *Node,
};
pub const ReturnStmt = struct {
value: ?*Node,
};
pub const ImportDecl = struct {
path: []const u8,
name: ?[]const u8,
};
pub const ArrayTypeExpr = struct {
length: *Node, // int_literal for the size
element_type: *Node, // type_expr for the element type
};
pub const SliceTypeExpr = struct {
element_type: *Node, // type_expr for the element type
};
pub const ArrayLiteral = struct {
elements: []const *Node,
type_expr: ?*Node = null,
};
pub const ParameterizedTypeExpr = struct {
name: []const u8, // e.g. "Vector", or later generic struct names
args: []const *Node, // e.g. [int_literal(3), type_expr("f32")]
};
pub const IndexExpr = struct {
object: *Node,
index: *Node,
};
pub const SliceExpr = struct {
object: *Node,
start: ?*Node = null,
end: ?*Node = null,
};
pub const PointerTypeExpr = struct {
pointee_type: *Node,
};
pub const ManyPointerTypeExpr = struct {
element_type: *Node,
};
pub const OptionalTypeExpr = struct {
inner_type: *Node,
};
/// The error channel of a multi-return result list: bare `!` (inferred
/// set) or `!Named` (a declared `error { ... }` set). Appears only as
/// the trailing result element; the parser enforces the position and
/// sema (E1) restricts it to return positions.
pub const ErrorTypeExpr = struct {
/// `null` = inferred set (bare `!`); non-null = named set (`!Named`).
name: ?[]const u8 = null,
};
pub const ForceUnwrap = struct {
operand: *Node,
};
pub const NullCoalesce = struct {
lhs: *Node,
rhs: *Node,
};
pub const DerefExpr = struct {
operand: *Node,
};
pub const WhileExpr = struct {
condition: *Node,
body: *Node,
binding_name: ?[]const u8 = null, // for `while val := expr { ... }` optional binding
binding_span: ?Span = null, // span of `binding_name` (set iff `binding_name` is)
};
pub const ForExpr = struct {
iterable: *Node,
body: *Node,
capture_name: []const u8,
capture_span: ?Span = null, // span of `capture_name` (null when omitted, e.g. `for 0..N { }`)
index_name: ?[]const u8 = null,
index_span: ?Span = null, // span of `index_name` (set iff `index_name` is)
/// Range form `for start..end (i) { }`: `iterable` is the start, `range_end`
/// the (exclusive) end. Null for the iterate-a-collection form
/// (`for coll : (x) { }`). For the range form `capture_name` is the cursor
/// (empty when omitted, `for 0..N { }`).
range_end: ?*Node = null,
/// `inline for` — comptime-unrolled (range bounds must be comptime).
is_inline: bool = false,
/// `for xs: (*x)` — bind `x` to a pointer into the collection (no per-element
/// copy) rather than a value copy of each element.
capture_by_ref: bool = false,
};
pub const SpreadExpr = struct {
operand: *Node,
};
pub const NamespaceDecl = struct {
name: []const u8,
decls: []const *Node,
};
pub const ForeignExpr = struct {
library_ref: ?[]const u8 = null, // identifier name of library constant
c_name: ?[]const u8 = null, // C symbol name override
};
pub const LibraryDecl = struct {
lib_name: []const u8,
name: []const u8, // sx-side constant name
};
pub const FrameworkDecl = struct {
name: []const u8, // framework name, e.g. "Foundation"
};
pub const FunctionTypeExpr = struct {
param_types: []const *Node,
param_names: ?[]const ?[]const u8 = null, // optional documentation names
return_type: ?*Node, // null = void return
call_conv: CallingConvention = .default,
};
pub const ClosureTypeExpr = struct {
param_types: []const *Node,
param_names: ?[]const ?[]const u8 = null, // optional documentation names
return_type: ?*Node, // null = void return
/// Variadic heterogeneous type pack trailing the param list.
/// `Closure(..$args) -> R` ⇒ pack_name = "args", param_types = [].
/// `Closure(Prefix, ..$args)` ⇒ pack_name = "args", param_types = [Prefix].
pack_name: ?[]const u8 = null,
/// Projection on the pack: `Closure(..sources.T) -> R` ⇒ pack_name =
/// "sources", pack_projection = "T". Null for a bare `..pack`.
pack_projection: ?[]const u8 = null,
};
pub const TupleTypeExpr = struct {
field_types: []const *Node,
field_names: ?[]const []const u8, // null for positional
};
pub const TupleLiteral = struct {
elements: []const TupleElement,
};
pub const TupleElement = struct {
name: ?[]const u8, // null for positional
value: *Node,
};
pub const UfcsAlias = struct {
name: []const u8,
target: []const u8,
};
pub const CImportDecl = struct {
includes: []const []const u8,
sources: []const []const u8,
defines: []const []const u8,
flags: []const []const u8,
name: ?[]const u8 = null,
bitcode_paths: []const []const u8 = &.{}, // populated during import resolution
};
pub const ProtocolMethodDecl = struct {
name: []const u8,
params: []const *Node, // type_expr nodes for parameter types (excluding implicit self)
param_names: []const []const u8, // parameter names (excluding implicit self)
param_name_spans: []const Span = &.{}, // one per `param_names` entry; empty for synthesized methods
return_type: ?*Node, // null = void return
default_body: ?*Node, // null = required method, non-null = default implementation
};
pub const ProtocolDecl = struct {
name: []const u8,
methods: []const ProtocolMethodDecl,
is_inline: bool = false, // #inline — embedded fn ptrs instead of vtable pointer
type_params: []const StructTypeParam = &.{}, // for `protocol(Target: Type) { ... }`
};
pub const ForeignRuntime = enum {
jni_class,
jni_interface,
objc_class,
objc_protocol,
swift_class,
swift_struct,
swift_protocol,
};
pub const ForeignMethodDecl = struct {
name: []const u8,
params: []const *Node, // type_expr nodes — first is `*Self` for instance methods
param_names: []const []const u8,
param_name_spans: []const Span = &.{}, // one per `param_names` entry; empty for synthesized methods
return_type: ?*Node, // null = void
is_static: bool = false, // true for `static name :: ...`
jni_descriptor_override: ?[]const u8 = null, // `#jni_method_descriptor("(Sig)Ret")` — JNI runtime only
selector_override: ?[]const u8 = null, // `#selector("explicit:string")` — Obj-C runtime only (Phase 3.2)
body: ?*Node = null, // sx-side implementation (defined-class only). null = `;`-terminated decl referencing inherited / external method.
};
pub const ForeignFieldDecl = struct {
name: []const u8,
field_type: *Node, // type_expr node
/// True iff the declaration carries a `#property[(...)]` directive
/// (M2.2). For foreign classes, that means synthesize getter/setter
/// dispatch through `objc_msgSend`; for sx-defined classes it adds
/// runtime-introspectable property metadata + ARC-aware setter
/// emission (Month 4 wires the latter).
is_property: bool = false,
/// Comma-separated modifier names from `#property(strong, weak, ...)`.
/// Stored verbatim; semantic interpretation lands in M4.2.
property_modifiers: []const []const u8 = &.{},
};
pub const ForeignClassMember = union(enum) {
method: ForeignMethodDecl,
field: ForeignFieldDecl, // JNI runtime only (sema-checked in later step)
extends: []const u8, // sx-side alias name (right of `#extends`)
implements: []const u8, // sx-side alias name (right of `#implements`)
};
pub const ForeignClassDecl = struct {
name: []const u8, // sx-side alias (left of `::`)
foreign_path: []const u8, // directive arg: "java/path/Foo" / "NSString" / "Foundation.URL"
runtime: ForeignRuntime,
members: []const ForeignClassMember = &.{},
is_foreign: bool = false, // `#foreign #...` prefix — class is provided by the foreign runtime; we only reference it
is_main: bool = false, // `#jni_main` / `#objc_main` — class is the launchable entry (Activity / UIApplicationDelegate / ...)
};
pub const JniEnvBlock = struct {
env: *Node, // expression yielding the *JNIEnv for this scope
body: *Node, // block (or expression) — runs with `env` scoped via TL push/pop
};
pub const ImplBlock = struct {
protocol_name: []const u8,
target_type: []const u8,
target_type_params: []const StructTypeParam = &.{}, // for `impl P for List($T)`
methods: []const *Node, // fn_decl nodes
protocol_type_args: []const *Node = &.{}, // for `impl Into(Block) for Source` — type args on the protocol side
target_type_expr: ?*Node = null, // populated for parameterised-protocol impls; carries non-identifier source spellings (e.g. `Closure() -> void`)
};