This commit is contained in:
agra
2026-02-16 01:58:30 +02:00
parent b20676375d
commit c8ceceed0f
6 changed files with 104 additions and 236 deletions

View File

@@ -119,9 +119,9 @@ main :: {
// String with escapes
print("escapes: hello\tworld\n");
// Multi-line string (backtick)
ml := `line1
line2`;
// Multi-line string
ml := "line1
line2";
print("multiline: {}\n", ml);
// Heredoc string

View File

@@ -19,20 +19,19 @@ Line comments start with `//` and extend to end of line.
|-----------|---------------------|---------|
| Integer | `0`, `42`, `0xFF`, `0b1010` | `s64` |
| Float | `0.3`, `0.9` | `f32` |
| String | `"Hello"`, `"z: {z}"` | `string` |
| Multi-line String | `` `line1\nline2` `` | `string` |
| String | `"Hello"`, `"z: {z}"` | `string` (may span multiple lines) |
| Heredoc String | `#string END`...`END` | `string` |
| Boolean | `true`, `false` | `bool` |
| Enum | `.variant1` | inferred from context |
| Undefined | `---` | context-dependent |
**Multi-line strings** use backtick delimiters (`` ` ``). They may span multiple lines and support the same escape sequences as regular strings (`\n`, `\t`, `\r`, `\\`, `\"`, `` \` ``, `\0`). Content between backticks is taken verbatim (no indentation stripping).
String literals support escape sequences (`\n`, `\t`, `\r`, `\\`, `\"`, `\0`) and may span multiple lines directly:
```sx
shader_src := `#version 330 core
shader_src := "#version 330 core
void main() {
gl_Position = vec4(0.0);
}
`;
";
```
**Heredoc strings** use `#string DELIMITER` syntax (inspired by Jai). Content is completely raw — no escape processing. The delimiter is any identifier. Content starts after the newline following the delimiter and ends when the delimiter appears at column 0 of a line.

View File

@@ -127,16 +127,14 @@ pub const CodeGen = struct {
current_function: c.LLVMValueRef,
// Return type of the current function being generated
current_return_type: Type = .void_type,
// Scope save stack: each entry records shadowed names to restore on scope exit
scope_saves: std.ArrayList(std.ArrayList(ScopeEntry)),
// Defer stack: parallel to scope_saves, each entry holds deferred expressions
defer_stack: std.ArrayList(std.ArrayList(*Node)),
// Scope stack: each entry records shadowed names and deferred expressions for one scope
scope_stack: std.ArrayList(Scope),
// Compile-time globals: maps name to global variable info for #run results
comptime_globals: std.StringHashMap(ComptimeGlobal),
// Top-level #run expressions for side effects only
comptime_side_effects: std.ArrayList(*Node),
// Generic function templates: maps name to AST for deferred monomorphization
generic_templates: std.StringHashMap(GenericTemplate),
generic_templates: std.StringHashMap(ast.FnDecl),
// Instantiated generic functions: maps mangled name to LLVM function
generic_instances: std.StringHashMap(c.LLVMValueRef),
// Active type parameter bindings during generic instantiation (null when not instantiating)
@@ -146,7 +144,7 @@ pub const CodeGen = struct {
// Active comptime param AST nodes during generic function instantiation (for #insert substitution)
comptime_param_nodes: ?std.StringHashMap(*Node) = null,
// Generic struct templates: maps name to AST for deferred instantiation
generic_struct_templates: std.StringHashMap(GenericStructTemplate),
generic_struct_templates: std.StringHashMap(ast.StructDecl),
// Known namespace names (for import resolution)
namespaces: std.StringHashMap(void),
// Functions declared with #builtin (only available when imported)
@@ -231,13 +229,8 @@ pub const CodeGen = struct {
element_type_name: []const u8, // element type of the variadic slice (e.g. "s32")
};
const GenericTemplate = struct {
fd: ast.FnDecl,
};
const GenericStructTemplate = struct {
sd: ast.StructDecl,
};
// GenericTemplate and GenericStructTemplate used to be single-field wrappers;
// now the hashmaps store ast.FnDecl / ast.StructDecl directly.
const ComptimeGlobal = struct {
global: c.LLVMValueRef, // LLVM global variable
@@ -293,6 +286,11 @@ pub const CodeGen = struct {
prev: ?NamedValue, // null = name didn't exist before this scope
};
const Scope = struct {
saves: std.ArrayList(ScopeEntry),
defers: std.ArrayList(*Node),
};
const NamedValue = struct {
ptr: c.LLVMValueRef, // alloca pointer
ty: Type, // sx type
@@ -380,13 +378,12 @@ pub const CodeGen = struct {
.enum_backing_types = std.StringHashMap(c.LLVMTypeRef).init(allocator),
.builtins = null,
.current_function = null,
.scope_saves = std.ArrayList(std.ArrayList(ScopeEntry)).empty,
.defer_stack = std.ArrayList(std.ArrayList(*Node)).empty,
.scope_stack = std.ArrayList(Scope).empty,
.comptime_globals = std.StringHashMap(ComptimeGlobal).init(allocator),
.comptime_side_effects = std.ArrayList(*Node).empty,
.generic_templates = std.StringHashMap(GenericTemplate).init(allocator),
.generic_templates = std.StringHashMap(ast.FnDecl).init(allocator),
.generic_instances = std.StringHashMap(c.LLVMValueRef).init(allocator),
.generic_struct_templates = std.StringHashMap(GenericStructTemplate).init(allocator),
.generic_struct_templates = std.StringHashMap(ast.StructDecl).init(allocator),
.namespaces = std.StringHashMap(void).init(allocator),
.builtin_functions = std.StringHashMap(void).init(allocator),
.fn_signatures = std.StringHashMap([]const u8).init(allocator),
@@ -451,40 +448,24 @@ pub const CodeGen = struct {
return self.emitErrorFmt("unknown enum type '{s}'", .{name});
}
fn lookupStructInfo(self: *CodeGen, name: []const u8) ?StructInfo {
fn lookupType(self: *CodeGen, name: []const u8, comptime tag: std.meta.Tag(TypeRegistryEntry)) ?switch (tag) {
.struct_info => StructInfo,
.tagged_enum => TaggedEnumInfo,
.union_info => UnionInfo,
.plain_enum => []const []const u8,
.alias => []const u8,
} {
if (self.type_registry.get(name)) |e| {
if (e == .struct_info) return e.struct_info;
if (e == tag) return @field(e, @tagName(tag));
}
return null;
}
fn lookupTaggedEnumInfo(self: *CodeGen, name: []const u8) ?TaggedEnumInfo {
if (self.type_registry.get(name)) |e| {
if (e == .tagged_enum) return e.tagged_enum;
}
return null;
}
fn lookupUnionInfo(self: *CodeGen, name: []const u8) ?UnionInfo {
if (self.type_registry.get(name)) |e| {
if (e == .union_info) return e.union_info;
}
return null;
}
fn lookupEnumVariants(self: *CodeGen, name: []const u8) ?[]const []const u8 {
if (self.type_registry.get(name)) |e| {
if (e == .plain_enum) return e.plain_enum;
}
return null;
}
fn lookupAlias(self: *CodeGen, name: []const u8) ?[]const u8 {
if (self.type_registry.get(name)) |e| {
if (e == .alias) return e.alias;
}
return null;
}
fn lookupStructInfo(self: *CodeGen, name: []const u8) ?StructInfo { return self.lookupType(name, .struct_info); }
fn lookupTaggedEnumInfo(self: *CodeGen, name: []const u8) ?TaggedEnumInfo { return self.lookupType(name, .tagged_enum); }
fn lookupUnionInfo(self: *CodeGen, name: []const u8) ?UnionInfo { return self.lookupType(name, .union_info); }
fn lookupEnumVariants(self: *CodeGen, name: []const u8) ?[]const []const u8 { return self.lookupType(name, .plain_enum); }
fn lookupAlias(self: *CodeGen, name: []const u8) ?[]const u8 { return self.lookupType(name, .alias); }
fn isRegisteredType(self: *CodeGen, name: []const u8) bool {
return self.type_registry.contains(name);
@@ -991,50 +972,45 @@ pub const CodeGen = struct {
fn pushScope(self: *CodeGen) !void {
var saves = std.ArrayList(ScopeEntry).empty;
try saves.ensureTotalCapacity(self.allocator, 8);
try self.scope_saves.append(self.allocator, saves);
var defers = std.ArrayList(*Node).empty;
try defers.ensureTotalCapacity(self.allocator, 4);
try self.defer_stack.append(self.allocator, defers);
try self.scope_stack.append(self.allocator, .{ .saves = saves, .defers = defers });
}
fn popScope(self: *CodeGen) !void {
if (self.scope_stack.items.len == 0) return;
const scope = self.scope_stack.items[self.scope_stack.items.len - 1];
// 1. Execute deferred expressions in LIFO order
if (self.defer_stack.items.len > 0) {
const defers = self.defer_stack.items[self.defer_stack.items.len - 1];
var i: usize = defers.items.len;
while (i > 0) {
i -= 1;
_ = try self.genExpr(defers.items[i]);
}
_ = self.defer_stack.pop();
var i: usize = scope.defers.items.len;
while (i > 0) {
i -= 1;
_ = try self.genExpr(scope.defers.items[i]);
}
// 2. Restore shadowed variables
if (self.scope_saves.items.len > 0) {
const saves = self.scope_saves.items[self.scope_saves.items.len - 1];
// Restore in reverse order
var i: usize = saves.items.len;
while (i > 0) {
i -= 1;
const entry = saves.items[i];
if (entry.prev) |prev| {
self.named_values.putAssumeCapacity(entry.name, prev);
} else {
_ = self.named_values.remove(entry.name);
}
// 2. Restore shadowed variables in reverse order
i = scope.saves.items.len;
while (i > 0) {
i -= 1;
const entry = scope.saves.items[i];
if (entry.prev) |prev| {
self.named_values.putAssumeCapacity(entry.name, prev);
} else {
_ = self.named_values.remove(entry.name);
}
_ = self.scope_saves.pop();
}
_ = self.scope_stack.pop();
}
/// Emit all pending deferred expressions from all active scopes (LIFO order,
/// innermost scope first). Does NOT pop the stacks — used before `return`
/// so that popScope() can still clean up the data structures later.
fn emitAllDefers(self: *CodeGen) !void {
var i: usize = self.defer_stack.items.len;
var i: usize = self.scope_stack.items.len;
while (i > 0) {
i -= 1;
const defers = self.defer_stack.items[i];
const defers = self.scope_stack.items[i].defers;
var j: usize = defers.items.len;
while (j > 0) {
j -= 1;
@@ -1044,8 +1020,8 @@ pub const CodeGen = struct {
}
fn saveShadowed(self: *CodeGen, name: []const u8) !void {
if (self.scope_saves.items.len == 0) return;
const top = &self.scope_saves.items[self.scope_saves.items.len - 1];
if (self.scope_stack.items.len == 0) return;
const top = &self.scope_stack.items[self.scope_stack.items.len - 1].saves;
const prev = self.named_values.get(name);
try top.append(self.allocator, .{ .name = name, .prev = prev });
}
@@ -1072,7 +1048,7 @@ pub const CodeGen = struct {
// External C function — register LLVM declaration (no body)
try self.registerFnDecl(fd, fd.name);
} else if (fd.type_params.len > 0) {
try self.generic_templates.put(fd.name, .{ .fd = fd });
try self.generic_templates.put(fd.name, fd);
} else {
try self.registerFnDecl(fd, fd.name);
}
@@ -1480,8 +1456,8 @@ pub const CodeGen = struct {
/// Instantiate a generic struct template with concrete arguments.
/// Returns the struct_type for the instantiated struct (possibly cached).
fn instantiateGenericStruct(self: *CodeGen, template_name: []const u8, tmpl: GenericStructTemplate, args: []const *Node) !Type {
const sd = tmpl.sd;
fn instantiateGenericStruct(self: *CodeGen, template_name: []const u8, tmpl: ast.StructDecl, args: []const *Node) !Type {
const sd = tmpl;
// Build bindings from template params + args
var type_bindings = std.StringHashMap(Type).init(self.allocator);
@@ -1575,8 +1551,8 @@ pub const CodeGen = struct {
/// Instantiate a type-returning function (e.g. Complex(u32)) by walking the body AST
/// to find `return struct { ... }` or `return union { ... }` and registering with bindings active.
fn instantiateTypeFunction(self: *CodeGen, alias_name: []const u8, template_name: []const u8, tmpl: GenericTemplate, args: []const *Node) !Type {
const fd = tmpl.fd;
fn instantiateTypeFunction(self: *CodeGen, alias_name: []const u8, template_name: []const u8, tmpl: ast.FnDecl, args: []const *Node) !Type {
const fd = tmpl;
// Build type bindings from params + args
var type_bindings = std.StringHashMap(Type).init(self.allocator);
@@ -1985,7 +1961,7 @@ pub const CodeGen = struct {
}
try self.fn_param_types.put(qualified, try param_types.toOwnedSlice(self.allocator));
} else if (fd.type_params.len > 0) {
try self.generic_templates.put(qualified, .{ .fd = fd });
try self.generic_templates.put(qualified, fd);
} else {
try self.registerFnDecl(fd, qualified);
}
@@ -2294,8 +2270,7 @@ pub const CodeGen = struct {
}
} else {
// Explicit return already emitted defers; just clean up scope stacks
if (self.defer_stack.items.len > 0) _ = self.defer_stack.pop();
if (self.scope_saves.items.len > 0) _ = self.scope_saves.pop();
if (self.scope_stack.items.len > 0) _ = self.scope_stack.pop();
}
}
@@ -2361,7 +2336,7 @@ pub const CodeGen = struct {
// Local declaration inside a function body
if (fd.type_params.len > 0) {
// Generic template / type function: register for lazy instantiation
try self.generic_templates.put(fd.name, .{ .fd = fd });
try self.generic_templates.put(fd.name, fd);
} else {
// Non-generic local function
// Save outer function state
@@ -2467,8 +2442,8 @@ pub const CodeGen = struct {
},
.defer_stmt => |ds| {
// Don't generate now — push onto current defer list for later execution
if (self.defer_stack.items.len > 0) {
const top = &self.defer_stack.items[self.defer_stack.items.len - 1];
if (self.scope_stack.items.len > 0) {
const top = &self.scope_stack.items[self.scope_stack.items.len - 1].defers;
try top.append(self.allocator, ds.expr);
}
return null;
@@ -3547,7 +3522,7 @@ pub const CodeGen = struct {
fn registerStructType(self: *CodeGen, sd: ast.StructDecl) anyerror!void {
// Generic struct: store as template instead of registering now
if (sd.type_params.len > 0) {
try self.generic_struct_templates.put(sd.name, .{ .sd = sd });
try self.generic_struct_templates.put(sd.name, sd);
return;
}
@@ -5640,8 +5615,8 @@ pub const CodeGen = struct {
);
}
fn genGenericCall(self: *CodeGen, qualified_name: []const u8, template: GenericTemplate, call_node: ast.Call) !c.LLVMValueRef {
const fd = template.fd;
fn genGenericCall(self: *CodeGen, qualified_name: []const u8, template: ast.FnDecl, call_node: ast.Call) !c.LLVMValueRef {
const fd = template;
// Check for runtime type dispatch: cast(runtime_type_var, any_val) as argument
if (self.current_match_tags) |match_tags| {
@@ -5904,11 +5879,11 @@ pub const CodeGen = struct {
/// For each type tag in match_tags, monomorphize the generic function and dispatch via switch.
fn genGenericCallWithRuntimeDispatch(
self: *CodeGen,
template: GenericTemplate,
template: ast.FnDecl,
call_node: ast.Call,
match_tags: []const u64,
) !c.LLVMValueRef {
const fd = template.fd;
const fd = template;
// Find the cast argument and extract the runtime type tag + any value source
var cast_arg_idx: usize = 0;
@@ -6166,11 +6141,9 @@ pub const CodeGen = struct {
try saved_named_values.put(entry.key_ptr.*, entry.value_ptr.*);
}
// Save scope_saves and defer_stack — generic body must not pollute caller's scope tracking
const saved_scope_saves = self.scope_saves;
const saved_defer_stack = self.defer_stack;
self.scope_saves = std.ArrayList(std.ArrayList(ScopeEntry)).empty;
self.defer_stack = std.ArrayList(std.ArrayList(*Node)).empty;
// Save scope stack — generic body must not pollute caller's scope tracking
const saved_scope_stack = self.scope_stack;
self.scope_stack = std.ArrayList(Scope).empty;
// Set type param bindings (save/restore to support nested generic instantiation)
const saved_bindings = self.type_param_bindings;
@@ -6279,9 +6252,8 @@ pub const CodeGen = struct {
}
saved_named_values.deinit();
// Restore scope_saves and defer_stack
self.scope_saves = saved_scope_saves;
self.defer_stack = saved_defer_stack;
// Restore scope stack
self.scope_stack = saved_scope_stack;
return function;
}
@@ -7114,7 +7086,7 @@ pub const CodeGen = struct {
break :blk null;
};
if (template) |tmpl| {
const gfd = tmpl.fd;
const gfd = tmpl;
// Build widened type bindings from all call args
var inferred_bindings = std.StringHashMap(Type).init(self.allocator);
for (gfd.params, 0..) |param, pi| {

View File

@@ -50,10 +50,6 @@ pub const Lexer = struct {
return self.lexString(start);
}
// Backtick (multi-line) string literals
if (c == '`') {
return self.lexBacktickString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
if (c == '#') {
@@ -272,22 +268,6 @@ pub const Lexer = struct {
return self.makeToken(.invalid, start, self.index);
}
fn lexBacktickString(self: *Lexer, start: u32) Token {
self.index += 1; // skip opening `
while (self.index < self.source.len) {
const ch = self.source[self.index];
if (ch == '`') {
self.index += 1;
return self.makeToken(.string_literal, start, self.index);
}
if (ch == '\\') {
self.index += 1; // skip escape
}
self.index += 1;
}
// Unterminated string
return self.makeToken(.invalid, start, self.index);
}
/// Lex a #string heredoc. Called after "#string" has been matched.
/// Syntax: #string DELIM\n...content...\nDELIM
@@ -491,28 +471,12 @@ test "lex string" {
try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
}
test "lex backtick string" {
const source: [:0]const u8 = "`Hello`";
test "lex multiline string" {
const source: [:0]const u8 = "\"line1\nline2\nline3\"";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
}
test "lex backtick multiline string" {
const source: [:0]const u8 = "`line1\nline2\nline3`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
}
test "lex backtick string with escape" {
const source: [:0]const u8 = "`hello\\`world`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
try std.testing.expectEqualStrings("\"line1\nline2\nline3\"", tok.slice(source));
}
test "lex #string heredoc" {

View File

@@ -375,6 +375,13 @@ pub const Type = union(enum) {
return false;
}
fn fmtAlloc(allocator: std.mem.Allocator, comptime fmt: []const u8, args: anytype) ![]const u8 {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, fmt, args) catch
return try std.fmt.allocPrint(allocator, fmt, args);
return try allocator.dupe(u8, result);
}
/// Format type name for mangling and display (e.g. "s32", "u8", "f64")
pub fn displayName(self: Type, allocator: std.mem.Allocator) ![]const u8 {
return switch (self) {
@@ -397,66 +404,11 @@ pub const Type = union(enum) {
.enum_type => |name| name,
.struct_type => |name| name,
.union_type => |name| name,
.slice_type => |info| {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, "[]{s}", .{info.element_name}) catch {
// Fall back to dynamic allocation for very long element names
var dyn = std.ArrayList(u8).empty;
try dyn.appendSlice(allocator, "[]");
try dyn.appendSlice(allocator, info.element_name);
return try dyn.toOwnedSlice(allocator);
};
return try allocator.dupe(u8, result);
},
.pointer_type => |info| {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, "*{s}", .{info.pointee_name}) catch {
var dyn = std.ArrayList(u8).empty;
try dyn.appendSlice(allocator, "*");
try dyn.appendSlice(allocator, info.pointee_name);
return try dyn.toOwnedSlice(allocator);
};
return try allocator.dupe(u8, result);
},
.many_pointer_type => |info| {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, "[*]{s}", .{info.element_name}) catch {
var dyn = std.ArrayList(u8).empty;
try dyn.appendSlice(allocator, "[*]");
try dyn.appendSlice(allocator, info.element_name);
return try dyn.toOwnedSlice(allocator);
};
return try allocator.dupe(u8, result);
},
.array_type => |info| {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, "[{d}]{s}", .{ info.length, info.element_name }) catch {
var dyn = std.ArrayList(u8).empty;
try dyn.appendSlice(allocator, "[");
var tmp: [10]u8 = undefined;
const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable;
try dyn.appendSlice(allocator, len_str);
try dyn.appendSlice(allocator, "]");
try dyn.appendSlice(allocator, info.element_name);
return try dyn.toOwnedSlice(allocator);
};
return try allocator.dupe(u8, result);
},
.vector_type => |info| {
var buf: [128]u8 = undefined;
const result = std.fmt.bufPrint(&buf, "Vector({d},{s})", .{ info.length, info.element_name }) catch {
var dyn = std.ArrayList(u8).empty;
try dyn.appendSlice(allocator, "Vector(");
var tmp: [10]u8 = undefined;
const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable;
try dyn.appendSlice(allocator, len_str);
try dyn.appendSlice(allocator, ",");
try dyn.appendSlice(allocator, info.element_name);
try dyn.appendSlice(allocator, ")");
return try dyn.toOwnedSlice(allocator);
};
return try allocator.dupe(u8, result);
},
.slice_type => |info| return fmtAlloc(allocator, "[]{s}", .{info.element_name}),
.pointer_type => |info| return fmtAlloc(allocator, "*{s}", .{info.pointee_name}),
.many_pointer_type => |info| return fmtAlloc(allocator, "[*]{s}", .{info.element_name}),
.array_type => |info| return fmtAlloc(allocator, "[{d}]{s}", .{ info.length, info.element_name }),
.vector_type => |info| return fmtAlloc(allocator, "Vector({d},{s})", .{ info.length, info.element_name }),
.function_type => |info| {
var buf = std.ArrayList(u8).empty;
try buf.append(allocator, '(');

View File

@@ -8,39 +8,20 @@ pub fn unescapeString(allocator: std.mem.Allocator, raw: []const u8) ![]u8 {
while (i < raw.len) {
if (raw[i] == '\\' and i + 1 < raw.len) {
i += 1;
switch (raw[i]) {
'n' => {
result[j] = '\n';
},
't' => {
result[j] = '\t';
},
'r' => {
result[j] = '\r';
},
'\\' => {
result[j] = '\\';
},
'"' => {
result[j] = '"';
},
'0' => {
result[j] = 0;
},
'`' => {
result[j] = '`';
},
else => {
result[j] = raw[i];
},
}
j += 1;
i += 1;
result[j] = switch (raw[i]) {
'n' => '\n',
't' => '\t',
'r' => '\r',
'\\' => '\\',
'"' => '"',
'0' => 0,
else => raw[i],
};
} else {
result[j] = raw[i];
j += 1;
i += 1;
}
j += 1;
i += 1;
}
return result[0..j];
}