strings

2026-02-12 16:23:42 +02:00
parent 1087bd1977
commit dab162bfe4
13 changed files with 1190 additions and 775 deletions
--- a/src/lexer.zig
+++ b/src/lexer.zig
@@ -50,8 +50,24 @@ pub const Lexer = struct {
            return self.lexString(start);
        }

-        // Directives: #import, #insert, #run, #builtin, #foreign, #library
+        // Backtick (multi-line) string literals
+        if (c == '`') {
+            return self.lexBacktickString(start);
+        }
+
+        // Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
        if (c == '#') {
+            // #string needs special handling (heredoc)
+            const str_kw = "#string";
+            const str_len: u32 = str_kw.len;
+            if (self.source.len >= start + str_len and
+                std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
+                (start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
+            {
+                self.index = start + str_len;
+                return self.lexHeredoc(start);
+            }
+
            const directives = .{
                .{ "#import", Tag.hash_import },
                .{ "#insert", Tag.hash_insert },
@@ -254,6 +270,81 @@ pub const Lexer = struct {
        return self.makeToken(.invalid, start, self.index);
    }

+    fn lexBacktickString(self: *Lexer, start: u32) Token {
+        self.index += 1; // skip opening `
+        while (self.index < self.source.len) {
+            const ch = self.source[self.index];
+            if (ch == '`') {
+                self.index += 1;
+                return self.makeToken(.string_literal, start, self.index);
+            }
+            if (ch == '\\') {
+                self.index += 1; // skip escape
+            }
+            self.index += 1;
+        }
+        // Unterminated string
+        return self.makeToken(.invalid, start, self.index);
+    }
+
+    /// Lex a #string heredoc. Called after "#string" has been matched.
+    /// Syntax: #string DELIM\n...content...\nDELIM
+    fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
+        // Skip spaces/tabs to find delimiter identifier
+        while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
+            self.index += 1;
+        }
+
+        // Read delimiter identifier
+        const delim_start = self.index;
+        if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
+            return self.makeToken(.invalid, directive_start, self.index);
+        }
+        while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
+            self.index += 1;
+        }
+        const delimiter = self.source[delim_start..self.index];
+
+        // Skip to newline (rest of line after delimiter is ignored)
+        while (self.index < self.source.len and self.source[self.index] != '\n') {
+            self.index += 1;
+        }
+        if (self.index >= self.source.len) {
+            return self.makeToken(.invalid, directive_start, self.index);
+        }
+        self.index += 1; // skip the newline
+
+        // Content starts here
+        const content_start = self.index;
+
+        // Scan lines until delimiter appears at column 0
+        while (self.index < self.source.len) {
+            const line_start = self.index;
+
+            // Check if this line starts with the delimiter
+            if (self.index + delimiter.len <= self.source.len and
+                std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
+                (line_start + delimiter.len >= self.source.len or
+                !isIdentContinue(self.source[line_start + delimiter.len])))
+            {
+                const content_end = line_start;
+                self.index = line_start + @as(u32, @intCast(delimiter.len));
+                return self.makeToken(.raw_string_literal, content_start, content_end);
+            }
+
+            // Skip to next line
+            while (self.index < self.source.len and self.source[self.index] != '\n') {
+                self.index += 1;
+            }
+            if (self.index < self.source.len) {
+                self.index += 1; // skip '\n'
+            }
+        }
+
+        // Unterminated heredoc
+        return self.makeToken(.invalid, directive_start, self.index);
+    }
+
    fn peek(self: *const Lexer) u8 {
        if (self.index < self.source.len) {
            return self.source[self.index];
@@ -398,6 +489,56 @@ test "lex string" {
    try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
 }

+test "lex backtick string" {
+    const source: [:0]const u8 = "`Hello`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
+}
+
+test "lex backtick multiline string" {
+    const source: [:0]const u8 = "`line1\nline2\nline3`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
+}
+
+test "lex backtick string with escape" {
+    const source: [:0]const u8 = "`hello\\`world`";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.string_literal, tok.tag);
+    try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
+}
+
+test "lex #string heredoc" {
+    const source: [:0]const u8 = "#string END\nhello world\nEND";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
+}
+
+test "lex #string heredoc multiline" {
+    const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
+}
+
+test "lex #string heredoc followed by semicolon" {
+    const source: [:0]const u8 = "#string END\ncontent\nEND;";
+    var lex = Lexer.init(source);
+    const tok = lex.next();
+    try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
+    try std.testing.expectEqualStrings("content\n", tok.slice(source));
+    const semi = lex.next();
+    try std.testing.expectEqual(Tag.semicolon, semi.tag);
+}
+
 test "lex hex literal" {
    var lex = Lexer.init("0xFF 0X1A");
    const tok1 = lex.next();