This commit is contained in:
agra
2026-02-12 16:23:42 +02:00
parent 1087bd1977
commit dab162bfe4
13 changed files with 1190 additions and 775 deletions

View File

@@ -118,6 +118,7 @@ pub const BoolLiteral = struct {
pub const StringLiteral = struct {
raw: []const u8,
is_raw: bool = false,
};
pub const Identifier = struct {

View File

@@ -2617,6 +2617,9 @@ pub const CodeGen = struct {
'0' => {
result[j] = 0;
},
'`' => {
result[j] = '`';
},
else => {
result[j] = raw[i];
},
@@ -2648,10 +2651,10 @@ pub const CodeGen = struct {
return c.LLVMConstInt(i1_type, if (lit.value) 1 else 0, 0);
},
.string_literal => |lit| {
const unescaped = try unescapeString(self.allocator, lit.raw);
const str_z = try self.allocator.dupeZ(u8, unescaped);
const content = if (lit.is_raw) lit.raw else try unescapeString(self.allocator, lit.raw);
const str_z = try self.allocator.dupeZ(u8, content);
const ptr = c.LLVMBuildGlobalStringPtr(self.builder, str_z.ptr, "str");
return self.buildStringSlice(ptr, @intCast(unescaped.len));
return self.buildStringSlice(ptr, @intCast(content.len));
},
.identifier => |ident| {
if (self.named_values.get(ident.name)) |entry| {
@@ -3251,8 +3254,9 @@ pub const CodeGen = struct {
// String literal → pointer context: produce raw pointer directly (no {ptr, len} wrapping)
if (node.data == .string_literal and target_ty.isPointer()) {
const unescaped = try unescapeString(self.allocator, node.data.string_literal.raw);
const str_z = try self.allocator.dupeZ(u8, unescaped);
const lit = node.data.string_literal;
const content = if (lit.is_raw) lit.raw else try unescapeString(self.allocator, lit.raw);
const str_z = try self.allocator.dupeZ(u8, content);
return c.LLVMBuildGlobalStringPtr(self.builder, str_z.ptr, "str");
}
@@ -5329,13 +5333,14 @@ pub const CodeGen = struct {
if (self.comptime_param_nodes) |cpn| {
if (cpn.get(param.name)) |node| {
if (node.data == .string_literal) {
const raw = node.data.string_literal.raw;
const inner = if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
const slit = node.data.string_literal;
const raw = slit.raw;
const inner = if (!slit.is_raw and raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
raw[1 .. raw.len - 1]
else
raw;
const unescaped = try unescapeString(self.allocator, inner);
const str_val = self.buildConstStr(unescaped);
const content = if (slit.is_raw) inner else try unescapeString(self.allocator, inner);
const str_val = self.buildConstStr(content);
const param_name_z = try self.allocator.dupeZ(u8, param.name);
const alloca = c.LLVMBuildAlloca(self.builder, self.getStringStructType(), param_name_z.ptr);
_ = c.LLVMBuildStore(self.builder, str_val, alloca);

View File

@@ -50,8 +50,24 @@ pub const Lexer = struct {
return self.lexString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library
// Backtick (multi-line) string literals
if (c == '`') {
return self.lexBacktickString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
if (c == '#') {
// #string needs special handling (heredoc)
const str_kw = "#string";
const str_len: u32 = str_kw.len;
if (self.source.len >= start + str_len and
std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
(start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
{
self.index = start + str_len;
return self.lexHeredoc(start);
}
const directives = .{
.{ "#import", Tag.hash_import },
.{ "#insert", Tag.hash_insert },
@@ -254,6 +270,81 @@ pub const Lexer = struct {
return self.makeToken(.invalid, start, self.index);
}
fn lexBacktickString(self: *Lexer, start: u32) Token {
self.index += 1; // skip opening `
while (self.index < self.source.len) {
const ch = self.source[self.index];
if (ch == '`') {
self.index += 1;
return self.makeToken(.string_literal, start, self.index);
}
if (ch == '\\') {
self.index += 1; // skip escape
}
self.index += 1;
}
// Unterminated string
return self.makeToken(.invalid, start, self.index);
}
/// Lex a #string heredoc. Called after "#string" has been matched.
/// Syntax: #string DELIM\n...content...\nDELIM
fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
// Skip spaces/tabs to find delimiter identifier
while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
self.index += 1;
}
// Read delimiter identifier
const delim_start = self.index;
if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
return self.makeToken(.invalid, directive_start, self.index);
}
while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
self.index += 1;
}
const delimiter = self.source[delim_start..self.index];
// Skip to newline (rest of line after delimiter is ignored)
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index >= self.source.len) {
return self.makeToken(.invalid, directive_start, self.index);
}
self.index += 1; // skip the newline
// Content starts here
const content_start = self.index;
// Scan lines until delimiter appears at column 0
while (self.index < self.source.len) {
const line_start = self.index;
// Check if this line starts with the delimiter
if (self.index + delimiter.len <= self.source.len and
std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
(line_start + delimiter.len >= self.source.len or
!isIdentContinue(self.source[line_start + delimiter.len])))
{
const content_end = line_start;
self.index = line_start + @as(u32, @intCast(delimiter.len));
return self.makeToken(.raw_string_literal, content_start, content_end);
}
// Skip to next line
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index < self.source.len) {
self.index += 1; // skip '\n'
}
}
// Unterminated heredoc
return self.makeToken(.invalid, directive_start, self.index);
}
fn peek(self: *const Lexer) u8 {
if (self.index < self.source.len) {
return self.source[self.index];
@@ -398,6 +489,56 @@ test "lex string" {
try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
}
test "lex backtick string" {
const source: [:0]const u8 = "`Hello`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
}
test "lex backtick multiline string" {
const source: [:0]const u8 = "`line1\nline2\nline3`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
}
test "lex backtick string with escape" {
const source: [:0]const u8 = "`hello\\`world`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
}
test "lex #string heredoc" {
const source: [:0]const u8 = "#string END\nhello world\nEND";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
}
test "lex #string heredoc multiline" {
const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
}
test "lex #string heredoc followed by semicolon" {
const source: [:0]const u8 = "#string END\ncontent\nEND;";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("content\n", tok.slice(source));
const semi = lex.next();
try std.testing.expectEqual(Tag.semicolon, semi.tag);
}
test "lex hex literal" {
var lex = Lexer.init("0xFF 0X1A");
const tok1 = lex.next();

View File

@@ -1,48 +1,207 @@
const std = @import("std");
const sx = struct {
pub const ast = @import("../ast.zig");
pub const parser = @import("../parser.zig");
pub const sema = @import("../sema.zig");
pub const imports = @import("../imports.zig");
};
pub const Import = struct {
/// Namespace name. null for flat imports.
ns: ?[]const u8,
/// Resolved absolute file path.
path: []const u8,
};
pub const Document = struct {
/// Resolved absolute file path.
path: []const u8,
/// Source text of this file.
source: [:0]const u8,
/// LSP version (from didOpen/didChange), -1 for disk-loaded imports.
version: i64,
/// AST root for this file only (not merged).
root: ?*sx.ast.Node,
/// Sema results for this file (references are relative to this source).
sema: ?sx.sema.SemaResult,
/// Import declarations parsed from this file.
imports: []const Import,
pub fn topLevelSymbols(self: *const Document) []const sx.sema.Symbol {
const sr = self.sema orelse return &.{};
return sr.symbols;
}
};
pub const DocumentStore = struct {
documents: std.StringHashMap(Document),
allocator: std.mem.Allocator,
io: std.Io,
/// All loaded documents keyed by resolved file path.
by_path: std.StringHashMap(*Document),
pub const Document = struct {
uri: []const u8,
text: []const u8,
version: i64,
};
pub fn init(allocator: std.mem.Allocator) DocumentStore {
pub fn init(allocator: std.mem.Allocator, io: std.Io) DocumentStore {
return .{
.documents = std.StringHashMap(Document).init(allocator),
.allocator = allocator,
.io = io,
.by_path = std.StringHashMap(*Document).init(allocator),
};
}
pub fn open(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void {
const uri_copy = try self.allocator.dupe(u8, uri);
const text_copy = try self.allocator.dupe(u8, text);
try self.documents.put(uri_copy, .{
.uri = uri_copy,
.text = text_copy,
.version = version,
});
/// Get or create a document for the given file path. Reads from disk if not yet loaded.
pub fn getOrLoad(self: *DocumentStore, path: []const u8) !*Document {
if (self.by_path.get(path)) |doc| return doc;
const bytes = std.Io.Dir.readFileAlloc(.cwd(), self.io, path, self.allocator, .limited(10 * 1024 * 1024)) catch {
return error.FileNotFound;
};
const source = try self.allocator.dupeZ(u8, bytes);
return self.createDocument(path, source, -1);
}
pub fn update(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void {
if (self.documents.getPtr(uri)) |doc| {
self.allocator.free(doc.text);
doc.text = try self.allocator.dupe(u8, text);
/// Create or update a document with editor-provided source (for didOpen/didChange).
pub fn openOrUpdate(self: *DocumentStore, path: []const u8, source: [:0]const u8, version: i64) !*Document {
if (self.by_path.get(path)) |doc| {
doc.source = source;
doc.version = version;
// Invalidate analysis
doc.root = null;
doc.sema = null;
doc.imports = &.{};
return doc;
}
return self.createDocument(path, source, version);
}
pub fn close(self: *DocumentStore, uri: []const u8) void {
if (self.documents.fetchRemove(uri)) |kv| {
self.allocator.free(kv.value.text);
self.allocator.free(kv.key);
}
fn createDocument(self: *DocumentStore, path: []const u8, source: [:0]const u8, version: i64) !*Document {
const doc = try self.allocator.create(Document);
const path_owned = try self.allocator.dupe(u8, path);
doc.* = .{
.path = path_owned,
.source = source,
.version = version,
.root = null,
.sema = null,
.imports = &.{},
};
try self.by_path.put(path_owned, doc);
return doc;
}
pub fn get(self: *const DocumentStore, uri: []const u8) ?*const Document {
return self.documents.getPtr(uri);
/// Analyze a document: parse, resolve imports, run sema with imported symbols pre-registered.
pub fn analyzeDocument(self: *DocumentStore, doc: *Document) !void {
// Parse if needed
if (doc.root == null) {
var p = sx.parser.Parser.init(self.allocator, doc.source);
doc.root = p.parse() catch return;
}
const root = doc.root orelse return;
// Extract imports from AST
var import_list = std.ArrayList(Import).empty;
const base_dir = sx.imports.dirName(doc.path);
if (root.data == .root) {
for (root.data.root.decls) |decl| {
if (decl.data != .import_decl) continue;
const imp = decl.data.import_decl;
const resolved_path = if (std.mem.eql(u8, base_dir, "."))
imp.path
else
try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ base_dir, imp.path });
try import_list.append(self.allocator, .{
.ns = imp.name,
.path = resolved_path,
});
}
}
doc.imports = try import_list.toOwnedSlice(self.allocator);
// Recursively analyze imported documents and pre-register their symbols
var analyzer = sx.sema.Analyzer.init(self.allocator);
// Track in-progress documents to detect cycles
var cycle_guard = std.StringHashMap(void).init(self.allocator);
try cycle_guard.put(doc.path, {});
for (doc.imports) |imp| {
const imp_doc = self.getOrLoad(imp.path) catch continue;
// Cycle detection
if (cycle_guard.contains(imp.path)) continue;
// Ensure imported doc is analyzed
if (imp_doc.sema == null) {
try cycle_guard.put(imp.path, {});
self.analyzeDocument(imp_doc) catch {};
_ = cycle_guard.remove(imp.path);
}
const imp_sema = imp_doc.sema orelse continue;
if (imp.ns) |ns_name| {
// Namespaced import: register one namespace symbol
try analyzer.preRegisterSymbol(.{
.name = ns_name,
.kind = .namespace,
.ty = null,
.def_span = .{ .start = 0, .end = 0 },
.scope_depth = 0,
.origin = imp.path,
});
// Copy fn_signatures with namespace prefix
var sig_it = imp_sema.fn_signatures.iterator();
while (sig_it.next()) |entry| {
const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
try analyzer.fn_signatures.put(prefixed, entry.value_ptr.*);
}
// Copy struct_types with namespace prefix
var struct_it = imp_sema.struct_types.iterator();
while (struct_it.next()) |entry| {
const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
try analyzer.struct_types.put(prefixed, entry.value_ptr.*);
}
// Copy enum_types with namespace prefix
var enum_it = imp_sema.enum_types.iterator();
while (enum_it.next()) |entry| {
const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
try analyzer.enum_types.put(prefixed, entry.value_ptr.*);
}
} else {
// Flat import: pre-register all top-level symbols with origin set
for (imp_sema.symbols) |sym| {
if (sym.scope_depth == 0) {
try analyzer.preRegisterSymbol(.{
.name = sym.name,
.kind = sym.kind,
.ty = sym.ty,
.def_span = sym.def_span,
.scope_depth = 0,
.origin = imp.path,
});
}
}
// Copy fn_signatures as-is
var sig_it = imp_sema.fn_signatures.iterator();
while (sig_it.next()) |entry| {
try analyzer.fn_signatures.put(entry.key_ptr.*, entry.value_ptr.*);
}
// Copy struct_types
var struct_it = imp_sema.struct_types.iterator();
while (struct_it.next()) |entry| {
try analyzer.struct_types.put(entry.key_ptr.*, entry.value_ptr.*);
}
// Copy enum_types
var enum_it = imp_sema.enum_types.iterator();
while (enum_it.next()) |entry| {
try analyzer.enum_types.put(entry.key_ptr.*, entry.value_ptr.*);
}
}
}
// Run sema on this file's own AST
doc.sema = analyzer.analyze(root) catch null;
}
pub fn get(self: *const DocumentStore, path: []const u8) ?*Document {
return self.by_path.get(path);
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -1087,6 +1087,12 @@ pub const Parser = struct {
self.advance();
return try self.createNode(start, .{ .string_literal = .{ .raw = raw[1 .. raw.len - 1] } });
},
.raw_string_literal => {
// #string heredoc — token span is content only, no stripping needed
const raw = self.tokenSlice(self.current);
self.advance();
return try self.createNode(start, .{ .string_literal = .{ .raw = raw, .is_raw = true } });
},
.kw_true => {
self.advance();
return try self.createNode(start, .{ .bool_literal = .{ .value = true } });

View File

@@ -23,6 +23,8 @@ pub const Symbol = struct {
ty: ?Type,
def_span: Span,
scope_depth: u32,
/// null = defined in the current file. Non-null = absolute path of the origin file.
origin: ?[]const u8 = null,
};
pub const Reference = struct {
@@ -521,6 +523,11 @@ pub const Analyzer = struct {
});
}
/// Pre-register an imported symbol so references in this file can resolve to it.
pub fn preRegisterSymbol(self: *Analyzer, sym: Symbol) !void {
try self.symbols.append(self.allocator, sym);
}
fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void {
// Search backwards to find the most recent declaration with this name
// that is at or above the current scope depth.

View File

@@ -3,6 +3,7 @@ pub const Tag = enum {
int_literal,
float_literal,
string_literal,
raw_string_literal,
// Identifiers and keywords
identifier,