strings
This commit is contained in:
143
src/lexer.zig
143
src/lexer.zig
@@ -50,8 +50,24 @@ pub const Lexer = struct {
|
||||
return self.lexString(start);
|
||||
}
|
||||
|
||||
// Directives: #import, #insert, #run, #builtin, #foreign, #library
|
||||
// Backtick (multi-line) string literals
|
||||
if (c == '`') {
|
||||
return self.lexBacktickString(start);
|
||||
}
|
||||
|
||||
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
|
||||
if (c == '#') {
|
||||
// #string needs special handling (heredoc)
|
||||
const str_kw = "#string";
|
||||
const str_len: u32 = str_kw.len;
|
||||
if (self.source.len >= start + str_len and
|
||||
std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
|
||||
(start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
|
||||
{
|
||||
self.index = start + str_len;
|
||||
return self.lexHeredoc(start);
|
||||
}
|
||||
|
||||
const directives = .{
|
||||
.{ "#import", Tag.hash_import },
|
||||
.{ "#insert", Tag.hash_insert },
|
||||
@@ -254,6 +270,81 @@ pub const Lexer = struct {
|
||||
return self.makeToken(.invalid, start, self.index);
|
||||
}
|
||||
|
||||
fn lexBacktickString(self: *Lexer, start: u32) Token {
|
||||
self.index += 1; // skip opening `
|
||||
while (self.index < self.source.len) {
|
||||
const ch = self.source[self.index];
|
||||
if (ch == '`') {
|
||||
self.index += 1;
|
||||
return self.makeToken(.string_literal, start, self.index);
|
||||
}
|
||||
if (ch == '\\') {
|
||||
self.index += 1; // skip escape
|
||||
}
|
||||
self.index += 1;
|
||||
}
|
||||
// Unterminated string
|
||||
return self.makeToken(.invalid, start, self.index);
|
||||
}
|
||||
|
||||
/// Lex a #string heredoc. Called after "#string" has been matched.
|
||||
/// Syntax: #string DELIM\n...content...\nDELIM
|
||||
fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
|
||||
// Skip spaces/tabs to find delimiter identifier
|
||||
while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
|
||||
self.index += 1;
|
||||
}
|
||||
|
||||
// Read delimiter identifier
|
||||
const delim_start = self.index;
|
||||
if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
|
||||
return self.makeToken(.invalid, directive_start, self.index);
|
||||
}
|
||||
while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
|
||||
self.index += 1;
|
||||
}
|
||||
const delimiter = self.source[delim_start..self.index];
|
||||
|
||||
// Skip to newline (rest of line after delimiter is ignored)
|
||||
while (self.index < self.source.len and self.source[self.index] != '\n') {
|
||||
self.index += 1;
|
||||
}
|
||||
if (self.index >= self.source.len) {
|
||||
return self.makeToken(.invalid, directive_start, self.index);
|
||||
}
|
||||
self.index += 1; // skip the newline
|
||||
|
||||
// Content starts here
|
||||
const content_start = self.index;
|
||||
|
||||
// Scan lines until delimiter appears at column 0
|
||||
while (self.index < self.source.len) {
|
||||
const line_start = self.index;
|
||||
|
||||
// Check if this line starts with the delimiter
|
||||
if (self.index + delimiter.len <= self.source.len and
|
||||
std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
|
||||
(line_start + delimiter.len >= self.source.len or
|
||||
!isIdentContinue(self.source[line_start + delimiter.len])))
|
||||
{
|
||||
const content_end = line_start;
|
||||
self.index = line_start + @as(u32, @intCast(delimiter.len));
|
||||
return self.makeToken(.raw_string_literal, content_start, content_end);
|
||||
}
|
||||
|
||||
// Skip to next line
|
||||
while (self.index < self.source.len and self.source[self.index] != '\n') {
|
||||
self.index += 1;
|
||||
}
|
||||
if (self.index < self.source.len) {
|
||||
self.index += 1; // skip '\n'
|
||||
}
|
||||
}
|
||||
|
||||
// Unterminated heredoc
|
||||
return self.makeToken(.invalid, directive_start, self.index);
|
||||
}
|
||||
|
||||
fn peek(self: *const Lexer) u8 {
|
||||
if (self.index < self.source.len) {
|
||||
return self.source[self.index];
|
||||
@@ -398,6 +489,56 @@ test "lex string" {
|
||||
try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
|
||||
}
|
||||
|
||||
test "lex backtick string" {
|
||||
const source: [:0]const u8 = "`Hello`";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("`Hello`", tok.slice(source));
|
||||
}
|
||||
|
||||
test "lex backtick multiline string" {
|
||||
const source: [:0]const u8 = "`line1\nline2\nline3`";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("`line1\nline2\nline3`", tok.slice(source));
|
||||
}
|
||||
|
||||
test "lex backtick string with escape" {
|
||||
const source: [:0]const u8 = "`hello\\`world`";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("`hello\\`world`", tok.slice(source));
|
||||
}
|
||||
|
||||
test "lex #string heredoc" {
|
||||
const source: [:0]const u8 = "#string END\nhello world\nEND";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
|
||||
}
|
||||
|
||||
test "lex #string heredoc multiline" {
|
||||
const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
|
||||
}
|
||||
|
||||
test "lex #string heredoc followed by semicolon" {
|
||||
const source: [:0]const u8 = "#string END\ncontent\nEND;";
|
||||
var lex = Lexer.init(source);
|
||||
const tok = lex.next();
|
||||
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
|
||||
try std.testing.expectEqualStrings("content\n", tok.slice(source));
|
||||
const semi = lex.next();
|
||||
try std.testing.expectEqual(Tag.semicolon, semi.tag);
|
||||
}
|
||||
|
||||
test "lex hex literal" {
|
||||
var lex = Lexer.init("0xFF 0X1A");
|
||||
const tok1 = lex.next();
|
||||
|
||||
Reference in New Issue
Block a user