This commit is contained in:
agra
2026-02-12 16:23:42 +02:00
parent 1087bd1977
commit dab162bfe4
13 changed files with 1190 additions and 775 deletions

View File

@@ -50,8 +50,24 @@ pub const Lexer = struct {
return self.lexString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library
// Backtick (multi-line) string literals
if (c == '`') {
return self.lexBacktickString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
if (c == '#') {
// #string needs special handling (heredoc)
const str_kw = "#string";
const str_len: u32 = str_kw.len;
if (self.source.len >= start + str_len and
std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
(start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
{
self.index = start + str_len;
return self.lexHeredoc(start);
}
const directives = .{
.{ "#import", Tag.hash_import },
.{ "#insert", Tag.hash_insert },
@@ -254,6 +270,81 @@ pub const Lexer = struct {
return self.makeToken(.invalid, start, self.index);
}
fn lexBacktickString(self: *Lexer, start: u32) Token {
self.index += 1; // skip opening `
while (self.index < self.source.len) {
const ch = self.source[self.index];
if (ch == '`') {
self.index += 1;
return self.makeToken(.string_literal, start, self.index);
}
if (ch == '\\') {
self.index += 1; // skip escape
}
self.index += 1;
}
// Unterminated string
return self.makeToken(.invalid, start, self.index);
}
/// Lex a #string heredoc. Called after "#string" has been matched.
/// Syntax: #string DELIM\n...content...\nDELIM
fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
// Skip spaces/tabs to find delimiter identifier
while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
self.index += 1;
}
// Read delimiter identifier
const delim_start = self.index;
if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
return self.makeToken(.invalid, directive_start, self.index);
}
while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
self.index += 1;
}
const delimiter = self.source[delim_start..self.index];
// Skip to newline (rest of line after delimiter is ignored)
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index >= self.source.len) {
return self.makeToken(.invalid, directive_start, self.index);
}
self.index += 1; // skip the newline
// Content starts here
const content_start = self.index;
// Scan lines until delimiter appears at column 0
while (self.index < self.source.len) {
const line_start = self.index;
// Check if this line starts with the delimiter
if (self.index + delimiter.len <= self.source.len and
std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
(line_start + delimiter.len >= self.source.len or
!isIdentContinue(self.source[line_start + delimiter.len])))
{
const content_end = line_start;
self.index = line_start + @as(u32, @intCast(delimiter.len));
return self.makeToken(.raw_string_literal, content_start, content_end);
}
// Skip to next line
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index < self.source.len) {
self.index += 1; // skip '\n'
}
}
// Unterminated heredoc
return self.makeToken(.invalid, directive_start, self.index);
}
fn peek(self: *const Lexer) u8 {
if (self.index < self.source.len) {
return self.source[self.index];
@@ -398,6 +489,56 @@ test "lex string" {
try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
}
test "lex backtick string" {
const source: [:0]const u8 = "`Hello`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
}
test "lex backtick multiline string" {
const source: [:0]const u8 = "`line1\nline2\nline3`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
}
test "lex backtick string with escape" {
const source: [:0]const u8 = "`hello\\`world`";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
}
test "lex #string heredoc" {
const source: [:0]const u8 = "#string END\nhello world\nEND";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
}
test "lex #string heredoc multiline" {
const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
}
test "lex #string heredoc followed by semicolon" {
const source: [:0]const u8 = "#string END\ncontent\nEND;";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("content\n", tok.slice(source));
const semi = lex.next();
try std.testing.expectEqual(Tag.semicolon, semi.tag);
}
test "lex hex literal" {
var lex = Lexer.init("0xFF 0X1A");
const tok1 = lex.next();