strings

2026-02-12 16:23:42 +02:00
parent 1087bd1977
commit dab162bfe4
13 changed files with 1190 additions and 775 deletions
--- a/src/ast.zig
+++ b/src/ast.zig
@@ -118,6 +118,7 @@ pub const BoolLiteral = struct {

 pub const StringLiteral = struct {
    raw: []const u8,
+    is_raw: bool = false,
 };

 pub const Identifier = struct {
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -2617,6 +2617,9 @@ pub const CodeGen = struct {
                    '0' => {
                        result[j] = 0;
                    },
+                    '`' => {
+                        result[j] = '`';
+                    },
                    else => {
                        result[j] = raw[i];
                    },
@@ -2648,10 +2651,10 @@ pub const CodeGen = struct {
                return c.LLVMConstInt(i1_type, if (lit.value) 1 else 0, 0);
            },
            .string_literal => |lit| {
-                const unescaped = try unescapeString(self.allocator, lit.raw);
-                const str_z = try self.allocator.dupeZ(u8, unescaped);
+                const content = if (lit.is_raw) lit.raw else try unescapeString(self.allocator, lit.raw);
+                const str_z = try self.allocator.dupeZ(u8, content);
                const ptr = c.LLVMBuildGlobalStringPtr(self.builder, str_z.ptr, "str");
-                return self.buildStringSlice(ptr, @intCast(unescaped.len));
+                return self.buildStringSlice(ptr, @intCast(content.len));
            },
            .identifier => |ident| {
                if (self.named_values.get(ident.name)) |entry| {
@@ -3251,8 +3254,9 @@ pub const CodeGen = struct {

        // String literal → pointer context: produce raw pointer directly (no {ptr, len} wrapping)
        if (node.data == .string_literal and target_ty.isPointer()) {
-            const unescaped = try unescapeString(self.allocator, node.data.string_literal.raw);
-            const str_z = try self.allocator.dupeZ(u8, unescaped);
+            const lit = node.data.string_literal;
+            const content = if (lit.is_raw) lit.raw else try unescapeString(self.allocator, lit.raw);
+            const str_z = try self.allocator.dupeZ(u8, content);
            return c.LLVMBuildGlobalStringPtr(self.builder, str_z.ptr, "str");
        }

@@ -5329,13 +5333,14 @@ pub const CodeGen = struct {
                if (self.comptime_param_nodes) |cpn| {
                    if (cpn.get(param.name)) |node| {
                        if (node.data == .string_literal) {
-                            const raw = node.data.string_literal.raw;
-                            const inner = if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
+                            const slit = node.data.string_literal;
+                            const raw = slit.raw;
+                            const inner = if (!slit.is_raw and raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
                                raw[1 .. raw.len - 1]
                            else
                                raw;
-                            const unescaped = try unescapeString(self.allocator, inner);
-                            const str_val = self.buildConstStr(unescaped);
+                            const content = if (slit.is_raw) inner else try unescapeString(self.allocator, inner);
+                            const str_val = self.buildConstStr(content);
                            const param_name_z = try self.allocator.dupeZ(u8, param.name);
                            const alloca = c.LLVMBuildAlloca(self.builder, self.getStringStructType(), param_name_z.ptr);
                            _ = c.LLVMBuildStore(self.builder, str_val, alloca);
--- a/src/lexer.zig
+++ b/src/lexer.zig
@@ -50,8 +50,24 @@ pub const Lexer = struct {
            return self.lexString(start);
        }

-        // Directives: #import, #insert, #run, #builtin, #foreign, #library
+        // Backtick (multi-line) string literals
+        if (c == '`') {
+            return self.lexBacktickString(start);
+        }
+
+        // Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
        if (c == '#') {
+            // #string needs special handling (heredoc)
+            const str_kw = "#string";
+            const str_len: u32 = str_kw.len;
+            if (self.source.len >= start + str_len and
+                std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
+                (start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
+            {
+                self.index = start + str_len;
+                return self.lexHeredoc(start);
+            }
+
            const directives = .{
                .{ "#import", Tag.hash_import },
                .{ "#insert", Tag.hash_insert },
@@ -254,6 +270,81 @@ pub const Lexer = struct {
        return self.makeToken(.invalid, start, self.index);
    }

+    fn lexBacktickString(self: *Lexer, start: u32) Token {
+        self.index += 1; // skip opening `
+        while (self.index < self.source.len) {
+            const ch = self.source[self.index];
+            if (ch == '`') {
+                self.index += 1;
+                return self.makeToken(.string_literal, start, self.index);
+            }
+            if (ch == '\\') {
+                self.index += 1; // skip escape
+            }
+            self.index += 1;
+        }
+        // Unterminated string
+        return self.makeToken(.invalid, start, self.index);
+    }
+
+    /// Lex a #string heredoc. Called after "#string" has been matched.
+    /// Syntax: #string DELIM\n...content...\nDELIM
+    fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
+        // Skip spaces/tabs to find delimiter identifier
+        while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
+            self.index += 1;
+        }
+
+        // Read delimiter identifier
+        const delim_start = self.index;
+        if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
+            return self.makeToken(.invalid, directive_start, self.index);
+        }
+        while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
+            self.index += 1;
+        }
+        const delimiter = self.source[delim_start..self.index];
+
+        // Skip to newline (rest of line after delimiter is ignored)
+        while (self.index < self.source.len and self.source[self.index] != '\n') {
+            self.index += 1;
+        }
+        if (self.index >= self.source.len) {
+            return self.makeToken(.invalid, directive_start, self.index);
+        }
+        self.index += 1; // skip the newline
+
+        // Content starts here
+        const content_start = self.index;
+
+        // Scan lines until delimiter appears at column 0
+        while (self.index < self.source.len) {
+            const line_start = self.index;
+
+            // Check if this line starts with the delimiter
+            if (self.index + delimiter.len <= self.source.len and
+                std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
+                (line_start + delimiter.len >= self.source.len or
+                !isIdentContinue(self.source[line_start + delimiter.len])))
+            {
+                const content_end = line_start;
+                self.index = line_start + @as(u32, @intCast(delimiter.len));
+                return self.makeToken(.raw_string_literal, content_start, content_end);
+            }
+
+            // Skip to next line
+            while (self.index < self.source.len and self.source[self.index] != '\n') {
+                self.index += 1;
+            }
+            if (self.index < self.source.len) {
+                self.index += 1; // skip '\n'
+            }
+        }
+
+        // Unterminated heredoc
+        return self.makeToken(.invalid, directive_start, self.index);
+    }
+
    fn peek(self: *const Lexer) u8 {
        if (self.index < self.source.len) {
            return self.source[self.index];
@@ -398,6 +489,56 @@ test "lex string" {
    try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
 }

+test "lex backtick string" {
+    const source: [:0]const u8 = "`Hello`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
+}
+
+test "lex backtick multiline string" {
+    const source: [:0]const u8 = "`line1\nline2\nline3`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
+}
+
+test "lex backtick string with escape" {
+    const source: [:0]const u8 = "`hello\\`world`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
+}
+
+test "lex #string heredoc" {
+    const source: [:0]const u8 = "#string END\nhello world\nEND";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
+}
+
+test "lex #string heredoc multiline" {
+    const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
+}
+
+test "lex #string heredoc followed by semicolon" {
+    const source: [:0]const u8 = "#string END\ncontent\nEND;";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("content\n", tok.slice(source));
+    const semi = lex.next();
+    try std.testing.expectEqual(Tag.semicolon, semi.tag);
+}
+
 test "lex hex literal" {
    var lex = Lexer.init("0xFF 0X1A");
    const tok1 = lex.next();
--- a/src/lsp/document.zig
+++ b/src/lsp/document.zig
@@ -1,48 +1,207 @@
 const std = @import("std");
+const sx = struct {
+    pub const ast = @import("../ast.zig");
+    pub const parser = @import("../parser.zig");
+    pub const sema = @import("../sema.zig");
+    pub const imports = @import("../imports.zig");
+};
+
+pub const Import = struct {
+    /// Namespace name. null for flat imports.
+    ns: ?[]const u8,
+    /// Resolved absolute file path.
+    path: []const u8,
+};
+
+pub const Document = struct {
+    /// Resolved absolute file path.
+    path: []const u8,
+    /// Source text of this file.
+    source: [:0]const u8,
+    /// LSP version (from didOpen/didChange), -1 for disk-loaded imports.
+    version: i64,
+    /// AST root for this file only (not merged).
+    root: ?*sx.ast.Node,
+    /// Sema results for this file (references are relative to this source).
+    sema: ?sx.sema.SemaResult,
+    /// Import declarations parsed from this file.
+    imports: []const Import,
+
+    pub fn topLevelSymbols(self: *const Document) []const sx.sema.Symbol {
+        const sr = self.sema orelse return &.{};
+        return sr.symbols;
+    }
+};

 pub const DocumentStore = struct {
-    documents: std.StringHashMap(Document),
    allocator: std.mem.Allocator,
+    io: std.Io,
+    /// All loaded documents keyed by resolved file path.
+    by_path: std.StringHashMap(*Document),

-    pub const Document = struct {
-        uri: []const u8,
-        text: []const u8,
-        version: i64,
-    };
-
-    pub fn init(allocator: std.mem.Allocator) DocumentStore {
+    pub fn init(allocator: std.mem.Allocator, io: std.Io) DocumentStore {
        return .{
-            .documents = std.StringHashMap(Document).init(allocator),
            .allocator = allocator,
+            .io = io,
+            .by_path = std.StringHashMap(*Document).init(allocator),
        };
    }

-    pub fn open(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void {
-        const uri_copy = try self.allocator.dupe(u8, uri);
-        const text_copy = try self.allocator.dupe(u8, text);
-        try self.documents.put(uri_copy, .{
-            .uri = uri_copy,
-            .text = text_copy,
-            .version = version,
-        });
+    /// Get or create a document for the given file path. Reads from disk if not yet loaded.
+    pub fn getOrLoad(self: *DocumentStore, path: []const u8) !*Document {
+        if (self.by_path.get(path)) |doc| return doc;
+
+        const bytes = std.Io.Dir.readFileAlloc(.cwd(), self.io, path, self.allocator, .limited(10 * 1024 * 1024)) catch {
+            return error.FileNotFound;
+        };
+        const source = try self.allocator.dupeZ(u8, bytes);
+        return self.createDocument(path, source, -1);
    }

-    pub fn update(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void {
-        if (self.documents.getPtr(uri)) |doc| {
-            self.allocator.free(doc.text);
-            doc.text = try self.allocator.dupe(u8, text);
+    /// Create or update a document with editor-provided source (for didOpen/didChange).
+    pub fn openOrUpdate(self: *DocumentStore, path: []const u8, source: [:0]const u8, version: i64) !*Document {
+        if (self.by_path.get(path)) |doc| {
+            doc.source = source;
            doc.version = version;
+            // Invalidate analysis
+            doc.root = null;
+            doc.sema = null;
+            doc.imports = &.{};
+            return doc;
        }
+        return self.createDocument(path, source, version);
    }

-    pub fn close(self: *DocumentStore, uri: []const u8) void {
-        if (self.documents.fetchRemove(uri)) |kv| {
-            self.allocator.free(kv.value.text);
-            self.allocator.free(kv.key);
-        }
+    fn createDocument(self: *DocumentStore, path: []const u8, source: [:0]const u8, version: i64) !*Document {
+        const doc = try self.allocator.create(Document);
+        const path_owned = try self.allocator.dupe(u8, path);
+        doc.* = .{
+            .path = path_owned,
+            .source = source,
+            .version = version,
+            .root = null,
+            .sema = null,
+            .imports = &.{},
+        };
+        try self.by_path.put(path_owned, doc);
+        return doc;
    }

-    pub fn get(self: *const DocumentStore, uri: []const u8) ?*const Document {
-        return self.documents.getPtr(uri);
+    /// Analyze a document: parse, resolve imports, run sema with imported symbols pre-registered.
+    pub fn analyzeDocument(self: *DocumentStore, doc: *Document) !void {
+        // Parse if needed
+        if (doc.root == null) {
+            var p = sx.parser.Parser.init(self.allocator, doc.source);
+            doc.root = p.parse() catch return;
+        }
+        const root = doc.root orelse return;
+
+        // Extract imports from AST
+        var import_list = std.ArrayList(Import).empty;
+        const base_dir = sx.imports.dirName(doc.path);
+        if (root.data == .root) {
+            for (root.data.root.decls) |decl| {
+                if (decl.data != .import_decl) continue;
+                const imp = decl.data.import_decl;
+                const resolved_path = if (std.mem.eql(u8, base_dir, "."))
+                    imp.path
+                else
+                    try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ base_dir, imp.path });
+                try import_list.append(self.allocator, .{
+                    .ns = imp.name,
+                    .path = resolved_path,
+                });
+            }
+        }
+        doc.imports = try import_list.toOwnedSlice(self.allocator);
+
+        // Recursively analyze imported documents and pre-register their symbols
+        var analyzer = sx.sema.Analyzer.init(self.allocator);
+
+        // Track in-progress documents to detect cycles
+        var cycle_guard = std.StringHashMap(void).init(self.allocator);
+        try cycle_guard.put(doc.path, {});
+
+        for (doc.imports) |imp| {
+            const imp_doc = self.getOrLoad(imp.path) catch continue;
+
+            // Cycle detection
+            if (cycle_guard.contains(imp.path)) continue;
+
+            // Ensure imported doc is analyzed
+            if (imp_doc.sema == null) {
+                try cycle_guard.put(imp.path, {});
+                self.analyzeDocument(imp_doc) catch {};
+                _ = cycle_guard.remove(imp.path);
+            }
+
+            const imp_sema = imp_doc.sema orelse continue;
+
+            if (imp.ns) |ns_name| {
+                // Namespaced import: register one namespace symbol
+                try analyzer.preRegisterSymbol(.{
+                    .name = ns_name,
+                    .kind = .namespace,
+                    .ty = null,
+                    .def_span = .{ .start = 0, .end = 0 },
+                    .scope_depth = 0,
+                    .origin = imp.path,
+                });
+                // Copy fn_signatures with namespace prefix
+                var sig_it = imp_sema.fn_signatures.iterator();
+                while (sig_it.next()) |entry| {
+                    const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
+                    try analyzer.fn_signatures.put(prefixed, entry.value_ptr.*);
+                }
+                // Copy struct_types with namespace prefix
+                var struct_it = imp_sema.struct_types.iterator();
+                while (struct_it.next()) |entry| {
+                    const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
+                    try analyzer.struct_types.put(prefixed, entry.value_ptr.*);
+                }
+                // Copy enum_types with namespace prefix
+                var enum_it = imp_sema.enum_types.iterator();
+                while (enum_it.next()) |entry| {
+                    const prefixed = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, entry.key_ptr.* });
+                    try analyzer.enum_types.put(prefixed, entry.value_ptr.*);
+                }
+            } else {
+                // Flat import: pre-register all top-level symbols with origin set
+                for (imp_sema.symbols) |sym| {
+                    if (sym.scope_depth == 0) {
+                        try analyzer.preRegisterSymbol(.{
+                            .name = sym.name,
+                            .kind = sym.kind,
+                            .ty = sym.ty,
+                            .def_span = sym.def_span,
+                            .scope_depth = 0,
+                            .origin = imp.path,
+                        });
+                    }
+                }
+                // Copy fn_signatures as-is
+                var sig_it = imp_sema.fn_signatures.iterator();
+                while (sig_it.next()) |entry| {
+                    try analyzer.fn_signatures.put(entry.key_ptr.*, entry.value_ptr.*);
+                }
+                // Copy struct_types
+                var struct_it = imp_sema.struct_types.iterator();
+                while (struct_it.next()) |entry| {
+                    try analyzer.struct_types.put(entry.key_ptr.*, entry.value_ptr.*);
+                }
+                // Copy enum_types
+                var enum_it = imp_sema.enum_types.iterator();
+                while (enum_it.next()) |entry| {
+                    try analyzer.enum_types.put(entry.key_ptr.*, entry.value_ptr.*);
+                }
+            }
+        }
+
+        // Run sema on this file's own AST
+        doc.sema = analyzer.analyze(root) catch null;
+    }
+
+    pub fn get(self: *const DocumentStore, path: []const u8) ?*Document {
+        return self.by_path.get(path);
    }
 };
--- a/src/lsp/server.zig
+++ b/src/lsp/server.zig
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -1087,6 +1087,12 @@ pub const Parser = struct {
                self.advance();
                return try self.createNode(start, .{ .string_literal = .{ .raw = raw[1 .. raw.len - 1] } });
            },
+            .raw_string_literal => {
+                // #string heredoc — token span is content only, no stripping needed
+                const raw = self.tokenSlice(self.current);
+                self.advance();
+                return try self.createNode(start, .{ .string_literal = .{ .raw = raw, .is_raw = true } });
+            },
            .kw_true => {
                self.advance();
                return try self.createNode(start, .{ .bool_literal = .{ .value = true } });
--- a/src/sema.zig
+++ b/src/sema.zig
@@ -23,6 +23,8 @@ pub const Symbol = struct {
    ty: ?Type,
    def_span: Span,
    scope_depth: u32,
+    /// null = defined in the current file. Non-null = absolute path of the origin file.
+    origin: ?[]const u8 = null,
 };

 pub const Reference = struct {
@@ -521,6 +523,11 @@ pub const Analyzer = struct {
        });
    }

+    /// Pre-register an imported symbol so references in this file can resolve to it.
+    pub fn preRegisterSymbol(self: *Analyzer, sym: Symbol) !void {
+        try self.symbols.append(self.allocator, sym);
+    }
+
    fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void {
        // Search backwards to find the most recent declaration with this name
        // that is at or above the current scope depth.
--- a/src/token.zig
+++ b/src/token.zig
@@ -3,6 +3,7 @@ pub const Tag = enum {
    int_literal,
    float_literal,
    string_literal,
+    raw_string_literal,

    // Identifiers and keywords
    identifier,