Files
sx/src/lexer.zig
agra 8d1816018a ffi: define-by-default #jni_class + #foreign modifier + #jni_main token
Flip the surface semantics for type-introducer directives: bare
`Foo :: #jni_class("path") { ... }` now means "DEFINE a new Java class
at that path" (sx-side provides the implementations). The `#foreign`
prefix modifier flips it back to "REFERENCE an existing class on the
foreign runtime." Matches how `#foreign` already reads in sx for C
function declarations (`printf :: ... #foreign;`).

    Foo :: #foreign  #jni_class("path/to/Foo") { ... }  // reference
    Foo ::           #jni_class("path/to/Foo") { ... }  // define
    Foo :: #jni_main #jni_class("path/to/Foo") { ... }  // define + main Activity

Compiler-side changes:
- New `hash_jni_main` lexer token (the launchable-Activity marker).
  Existing `hash_foreign` is reused; no new modifier token there.
- `ForeignClassDecl` gains `is_foreign: bool` + `is_main: bool`.
  `ForeignMethodDecl` gains `body: ?*Node` so defined-class methods
  can carry sx-side implementations (foreign-class methods stay
  `;`-terminated).
- Parser learns `tryParseForeignClassPrefix` — peek-and-consume the
  modifier tokens, then dispatch to the unchanged
  `parseForeignClassDecl` with the flags threaded through.
- Sema rejects two illegal combinations: `#foreign + #jni_main`
  (can't be both an external reference and the app's main entry),
  and bodied methods on `#foreign` decls (foreign methods are
  runtime-provided).
- Lower's foreign-class dispatch errors on non-foreign decls with
  a pointer to the runtime-synthesis follow-up; defined-class
  codegen (Java class emission, RegisterNatives wiring, manifest
  entry generation) lands in a separate session.

Migration:
- `library/modules/platform/android_jni.sx`: all four foreign class
  decls (`Activity`, `Window`, `View`, `WindowInsets`) gain `#foreign`.
- `examples/ffi-jni-class-{01..08}*.sx`: every test's `#jni_class` /
  `#jni_interface` / `#objc_class` / `#objc_protocol` / `#swift_class`
  / `#swift_struct` / `#swift_protocol` usage gains `#foreign`. All
  9 files mechanical perl rename; snapshots unchanged.

Verified locally:
- `zig build test` clean.
- `bash tests/run_examples.sh` 129/129.
- `bash tests/cross_compile.sh` 3/3.
- Chess APK rebuilds, reinstalls, launches on Pixel; safe-area
  clearance preserved.
2026-05-20 12:46:40 +03:00

601 lines
23 KiB
Zig

const std = @import("std");
const Token = @import("token.zig").Token;
const Tag = @import("token.zig").Tag;
const getKeyword = @import("token.zig").getKeyword;
pub const Lexer = struct {
source: [:0]const u8,
index: u32,
pub fn init(source: [:0]const u8) Lexer {
return .{ .source = source, .index = 0 };
}
pub fn next(self: *Lexer) Token {
// Skip whitespace and comments
while (true) {
if (self.index >= self.source.len) {
return self.makeToken(.eof, self.index, self.index);
}
const c = self.source[self.index];
if (c == ' ' or c == '\t' or c == '\n' or c == '\r') {
self.index += 1;
continue;
}
// Line comments
if (c == '/' and self.index + 1 < self.source.len and self.source[self.index + 1] == '/') {
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
continue;
}
break;
}
const start = self.index;
const c = self.source[start];
// Integer / float literals
if (isDigit(c)) {
return self.lexNumber(start);
}
// Identifiers and keywords
if (isIdentStart(c)) {
return self.lexIdentifier(start);
}
// String literals
if (c == '"') {
return self.lexString(start);
}
// Directives: #import, #insert, #run, #builtin, #foreign, #library, #string
if (c == '#') {
// #string needs special handling (heredoc)
const str_kw = "#string";
const str_len: u32 = str_kw.len;
if (self.source.len >= start + str_len and
std.mem.eql(u8, self.source[start .. start + str_len], str_kw) and
(start + str_len >= self.source.len or !isIdentContinue(self.source[start + str_len])))
{
self.index = start + str_len;
return self.lexHeredoc(start);
}
const directives = .{
.{ "#import", Tag.hash_import },
.{ "#insert", Tag.hash_insert },
.{ "#run", Tag.hash_run },
.{ "#builtin", Tag.hash_builtin },
.{ "#compiler", Tag.hash_compiler },
.{ "#foreign", Tag.hash_foreign },
.{ "#library", Tag.hash_library },
.{ "#framework", Tag.hash_framework },
.{ "#using", Tag.hash_using },
.{ "#include", Tag.hash_include },
.{ "#source", Tag.hash_source },
.{ "#define", Tag.hash_define },
.{ "#flags", Tag.hash_flags },
.{ "#inline", Tag.hash_inline },
.{ "#objc_call", Tag.hash_objc_call },
.{ "#jni_call", Tag.hash_jni_call },
.{ "#jni_static_call", Tag.hash_jni_static_call },
.{ "#jni_class", Tag.hash_jni_class },
.{ "#jni_interface", Tag.hash_jni_interface },
.{ "#objc_class", Tag.hash_objc_class },
.{ "#objc_protocol", Tag.hash_objc_protocol },
.{ "#swift_class", Tag.hash_swift_class },
.{ "#swift_struct", Tag.hash_swift_struct },
.{ "#swift_protocol", Tag.hash_swift_protocol },
.{ "#extends", Tag.hash_extends },
.{ "#implements", Tag.hash_implements },
.{ "#jni_method_descriptor", Tag.hash_jni_method_descriptor },
.{ "#jni_env", Tag.hash_jni_env },
.{ "#jni_main", Tag.hash_jni_main },
};
inline for (directives) |d| {
const keyword = d[0];
const tag = d[1];
const len: u32 = keyword.len;
if (self.source.len >= start + len and
std.mem.eql(u8, self.source[start .. start + len], keyword) and
(start + len >= self.source.len or !isIdentContinue(self.source[start + len])))
{
self.index = start + len;
return self.makeToken(tag, start, self.index);
}
}
self.index += 1;
return self.makeToken(.invalid, start, self.index);
}
// Punctuation and operators
self.index += 1;
switch (c) {
';' => return self.makeToken(.semicolon, start, self.index),
',' => return self.makeToken(.comma, start, self.index),
'(' => return self.makeToken(.l_paren, start, self.index),
')' => return self.makeToken(.r_paren, start, self.index),
'{' => return self.makeToken(.l_brace, start, self.index),
'}' => return self.makeToken(.r_brace, start, self.index),
'[' => return self.makeToken(.l_bracket, start, self.index),
']' => return self.makeToken(.r_bracket, start, self.index),
'.' => {
if (self.peek() == '.') {
self.index += 1;
return self.makeToken(.dot_dot, start, self.index);
}
return self.makeToken(.dot, start, self.index);
},
'$' => return self.makeToken(.dollar, start, self.index),
':' => {
if (self.peek() == ':') {
self.index += 1;
return self.makeToken(.colon_colon, start, self.index);
}
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.colon_equal, start, self.index);
}
return self.makeToken(.colon, start, self.index);
},
'=' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.equal_equal, start, self.index);
}
if (self.peek() == '>') {
self.index += 1;
return self.makeToken(.fat_arrow, start, self.index);
}
return self.makeToken(.equal, start, self.index);
},
'+' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.plus_equal, start, self.index);
}
return self.makeToken(.plus, start, self.index);
},
'-' => {
if (self.peek() == '-' and (self.index + 1) < self.source.len and self.source[self.index + 1] == '-') {
self.index += 2;
return self.makeToken(.triple_minus, start, self.index);
}
if (self.peek() == '>') {
self.index += 1;
return self.makeToken(.arrow, start, self.index);
}
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.minus_equal, start, self.index);
}
return self.makeToken(.minus, start, self.index);
},
'*' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.star_equal, start, self.index);
}
return self.makeToken(.star, start, self.index);
},
'/' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.slash_equal, start, self.index);
}
return self.makeToken(.slash, start, self.index);
},
'%' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.percent_equal, start, self.index);
}
return self.makeToken(.percent, start, self.index);
},
'&' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.ampersand_equal, start, self.index);
}
return self.makeToken(.ampersand, start, self.index);
},
'@' => return self.makeToken(.at, start, self.index),
'|' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.pipe_equal, start, self.index);
}
if (self.peek() == '>') {
self.index += 1;
return self.makeToken(.pipe_arrow, start, self.index);
}
return self.makeToken(.pipe, start, self.index);
},
'^' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.caret_equal, start, self.index);
}
return self.makeToken(.caret, start, self.index);
},
'~' => return self.makeToken(.tilde, start, self.index),
'?' => {
if (self.peek() == '?') {
self.index += 1;
return self.makeToken(.question_question, start, self.index);
}
if (self.peek() == '.') {
self.index += 1;
return self.makeToken(.question_dot, start, self.index);
}
return self.makeToken(.question, start, self.index);
},
'!' => {
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.bang_equal, start, self.index);
}
return self.makeToken(.bang, start, self.index);
},
'<' => {
if (self.peek() == '<') {
self.index += 1;
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.less_less_equal, start, self.index);
}
return self.makeToken(.less_less, start, self.index);
}
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.less_equal, start, self.index);
}
return self.makeToken(.less, start, self.index);
},
'>' => {
if (self.peek() == '>') {
self.index += 1;
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.greater_greater_equal, start, self.index);
}
return self.makeToken(.greater_greater, start, self.index);
}
if (self.peek() == '=') {
self.index += 1;
return self.makeToken(.greater_equal, start, self.index);
}
return self.makeToken(.greater, start, self.index);
},
else => return self.makeToken(.invalid, start, self.index),
}
}
fn lexNumber(self: *Lexer, start: u32) Token {
// Advance past the initial digit that was already matched
self.index += 1;
// Check for hex (0x/0X) or binary (0b/0B) prefix
if (self.source[start] == '0' and self.index < self.source.len) {
const prefix = self.source[self.index];
if (prefix == 'x' or prefix == 'X') {
self.index += 1; // skip 'x'/'X'
while (self.index < self.source.len and isHexDigit(self.source[self.index])) {
self.index += 1;
}
return self.makeToken(.int_literal, start, self.index);
}
if (prefix == 'b' or prefix == 'B') {
self.index += 1; // skip 'b'/'B'
while (self.index < self.source.len and (self.source[self.index] == '0' or self.source[self.index] == '1')) {
self.index += 1;
}
return self.makeToken(.int_literal, start, self.index);
}
}
while (self.index < self.source.len and isDigit(self.source[self.index])) {
self.index += 1;
}
// Check for float
if (self.index < self.source.len and self.source[self.index] == '.') {
// Look ahead: must be followed by a digit (not `.identifier`)
if (self.index + 1 < self.source.len and isDigit(self.source[self.index + 1])) {
self.index += 1; // skip '.'
while (self.index < self.source.len and isDigit(self.source[self.index])) {
self.index += 1;
}
return self.makeToken(.float_literal, start, self.index);
}
}
return self.makeToken(.int_literal, start, self.index);
}
fn lexIdentifier(self: *Lexer, start: u32) Token {
while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
self.index += 1;
}
const text = self.source[start..self.index];
if (getKeyword(text)) |kw| {
return self.makeToken(kw, start, self.index);
}
return self.makeToken(.identifier, start, self.index);
}
fn lexString(self: *Lexer, start: u32) Token {
self.index += 1; // skip opening "
while (self.index < self.source.len) {
const ch = self.source[self.index];
if (ch == '"') {
self.index += 1;
return self.makeToken(.string_literal, start, self.index);
}
if (ch == '\\') {
self.index += 1; // skip escape
}
self.index += 1;
}
// Unterminated string
return self.makeToken(.invalid, start, self.index);
}
/// Lex a #string heredoc. Called after "#string" has been matched.
/// Syntax: #string DELIM\n...content...\nDELIM
fn lexHeredoc(self: *Lexer, directive_start: u32) Token {
// Skip spaces/tabs to find delimiter identifier
while (self.index < self.source.len and (self.source[self.index] == ' ' or self.source[self.index] == '\t')) {
self.index += 1;
}
// Read delimiter identifier
const delim_start = self.index;
if (self.index >= self.source.len or !isIdentStart(self.source[self.index])) {
return self.makeToken(.invalid, directive_start, self.index);
}
while (self.index < self.source.len and isIdentContinue(self.source[self.index])) {
self.index += 1;
}
const delimiter = self.source[delim_start..self.index];
// Skip to newline (rest of line after delimiter is ignored)
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index >= self.source.len) {
return self.makeToken(.invalid, directive_start, self.index);
}
self.index += 1; // skip the newline
// Content starts here
const content_start = self.index;
// Scan lines until delimiter appears at column 0
while (self.index < self.source.len) {
const line_start = self.index;
// Check if this line starts with the delimiter
if (self.index + delimiter.len <= self.source.len and
std.mem.eql(u8, self.source[line_start .. line_start + delimiter.len], delimiter) and
(line_start + delimiter.len >= self.source.len or
!isIdentContinue(self.source[line_start + delimiter.len])))
{
const content_end = line_start;
self.index = line_start + @as(u32, @intCast(delimiter.len));
return self.makeToken(.raw_string_literal, content_start, content_end);
}
// Skip to next line
while (self.index < self.source.len and self.source[self.index] != '\n') {
self.index += 1;
}
if (self.index < self.source.len) {
self.index += 1; // skip '\n'
}
}
// Unterminated heredoc
return self.makeToken(.invalid, directive_start, self.index);
}
fn peek(self: *const Lexer) u8 {
if (self.index < self.source.len) {
return self.source[self.index];
}
return 0;
}
fn makeToken(_: *const Lexer, tag: Tag, start: u32, end: u32) Token {
return .{ .tag = tag, .loc = .{ .start = start, .end = end } };
}
fn isDigit(c: u8) bool {
return c >= '0' and c <= '9';
}
fn isIdentStart(c: u8) bool {
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
}
fn isHexDigit(c: u8) bool {
return isDigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F');
}
fn isIdentContinue(c: u8) bool {
return isIdentStart(c) or isDigit(c);
}
};
test "lex minimal main" {
var lex = Lexer.init("main :: () { 42; }");
const expected = [_]Tag{ .identifier, .colon_colon, .l_paren, .r_paren, .l_brace, .int_literal, .semicolon, .r_brace, .eof };
for (expected) |exp| {
const tok = lex.next();
try std.testing.expectEqual(exp, tok.tag);
}
}
test "lex with comments" {
var lex = Lexer.init("// comment\nmain :: () { 0; }");
try std.testing.expectEqual(Tag.identifier, lex.next().tag);
try std.testing.expectEqual(Tag.colon_colon, lex.next().tag);
}
test "lex operators" {
var lex = Lexer.init(":= : :: += -= *= /= -> => == != <= >=");
const expected = [_]Tag{
.colon_equal, .colon, .colon_colon, .plus_equal, .minus_equal,
.star_equal, .slash_equal, .arrow, .fat_arrow, .equal_equal,
.bang_equal, .less_equal, .greater_equal,
};
for (expected) |exp| {
try std.testing.expectEqual(exp, lex.next().tag);
}
}
test "lex float" {
var lex = Lexer.init("0.3 42 0.9");
try std.testing.expectEqual(Tag.float_literal, lex.next().tag);
try std.testing.expectEqual(Tag.int_literal, lex.next().tag);
try std.testing.expectEqual(Tag.float_literal, lex.next().tag);
}
test "lex keywords" {
var lex = Lexer.init("if else then true false enum case break return f32 f64 struct");
const expected = [_]Tag{
.kw_if, .kw_else, .kw_then, .kw_true, .kw_false,
.kw_enum, .kw_case, .kw_break, .kw_return, .kw_f32, .kw_f64, .kw_struct,
};
for (expected) |exp| {
try std.testing.expectEqual(exp, lex.next().tag);
}
}
test "lex type-like identifiers" {
// s32, u8, bool, string are identifiers, not keywords
var lex = Lexer.init("s32 u8 bool string");
for (0..4) |_| {
try std.testing.expectEqual(Tag.identifier, lex.next().tag);
}
}
test "lex hash_run" {
var lex = Lexer.init("#run");
try std.testing.expectEqual(Tag.hash_run, lex.next().tag);
try std.testing.expectEqual(Tag.eof, lex.next().tag);
// #run followed by identifier
var lex2 = Lexer.init("#run compute(5)");
try std.testing.expectEqual(Tag.hash_run, lex2.next().tag);
try std.testing.expectEqual(Tag.identifier, lex2.next().tag);
// #running should not match (identContinue after "run")
var lex3 = Lexer.init("#running");
try std.testing.expectEqual(Tag.invalid, lex3.next().tag);
}
test "lex hash_import" {
var lex = Lexer.init("#import \"foo.sx\"");
try std.testing.expectEqual(Tag.hash_import, lex.next().tag);
try std.testing.expectEqual(Tag.string_literal, lex.next().tag);
try std.testing.expectEqual(Tag.eof, lex.next().tag);
// #importing should not match
var lex2 = Lexer.init("#importing");
try std.testing.expectEqual(Tag.invalid, lex2.next().tag);
}
test "lex hash_insert" {
var lex = Lexer.init("#insert #run generate()");
try std.testing.expectEqual(Tag.hash_insert, lex.next().tag);
try std.testing.expectEqual(Tag.hash_run, lex.next().tag);
try std.testing.expectEqual(Tag.identifier, lex.next().tag);
// #inserting should not match
var lex2 = Lexer.init("#inserting");
try std.testing.expectEqual(Tag.invalid, lex2.next().tag);
}
test "lex hash_foreign" {
var lex = Lexer.init("#foreign");
try std.testing.expectEqual(Tag.hash_foreign, lex.next().tag);
try std.testing.expectEqual(Tag.eof, lex.next().tag);
var lex2 = Lexer.init("#foreignx");
try std.testing.expectEqual(Tag.invalid, lex2.next().tag);
}
test "lex hash_library" {
var lex = Lexer.init("#library \"raylib\"");
try std.testing.expectEqual(Tag.hash_library, lex.next().tag);
try std.testing.expectEqual(Tag.string_literal, lex.next().tag);
try std.testing.expectEqual(Tag.eof, lex.next().tag);
var lex2 = Lexer.init("#librarypath");
try std.testing.expectEqual(Tag.invalid, lex2.next().tag);
}
test "lex string" {
var lex = Lexer.init("\"Hello\"");
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\""));
}
test "lex multiline string" {
const source: [:0]const u8 = "\"line1\nline2\nline3\"";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.string_literal, tok.tag);
try std.testing.expectEqualStrings("\"line1\nline2\nline3\"", tok.slice(source));
}
test "lex #string heredoc" {
const source: [:0]const u8 = "#string END\nhello world\nEND";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("hello world\n", tok.slice(source));
}
test "lex #string heredoc multiline" {
const source: [:0]const u8 = "#string GLSL\n#version 330\nvoid main() {}\nGLSL";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("#version 330\nvoid main() {}\n", tok.slice(source));
}
test "lex #string heredoc followed by semicolon" {
const source: [:0]const u8 = "#string END\ncontent\nEND;";
var lex = Lexer.init(source);
const tok = lex.next();
try std.testing.expectEqual(Tag.raw_string_literal, tok.tag);
try std.testing.expectEqualStrings("content\n", tok.slice(source));
const semi = lex.next();
try std.testing.expectEqual(Tag.semicolon, semi.tag);
}
test "lex hex literal" {
var lex = Lexer.init("0xFF 0X1A");
const tok1 = lex.next();
try std.testing.expectEqual(Tag.int_literal, tok1.tag);
try std.testing.expectEqualStrings("0xFF", tok1.slice("0xFF 0X1A"));
const tok2 = lex.next();
try std.testing.expectEqual(Tag.int_literal, tok2.tag);
try std.testing.expectEqualStrings("0X1A", tok2.slice("0xFF 0X1A"));
}
test "lex binary literal" {
var lex = Lexer.init("0b1010 0B110");
const tok1 = lex.next();
try std.testing.expectEqual(Tag.int_literal, tok1.tag);
try std.testing.expectEqualStrings("0b1010", tok1.slice("0b1010 0B110"));
const tok2 = lex.next();
try std.testing.expectEqual(Tag.int_literal, tok2.tag);
try std.testing.expectEqualStrings("0B110", tok2.slice("0b1010 0B110"));
}