commit 55fc5790e43ac356787ffd1744201ec5c4152f05 Author: agra Date: Wed Feb 4 01:34:30 2026 +0200 so... jai :D diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee4c0e0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.zig-cache +zig-out +.DS_Store +.vscode/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4585f62 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 agra + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..e95400d --- /dev/null +++ b/build.zig @@ -0,0 +1,90 @@ +const std = @import("std"); +const math = @import("math"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const static_llvm = b.option(bool, "static-llvm", "Statically link LLVM (self-contained binary, no LLVM needed at runtime)") orelse false; + const llvm_prefix = b.option([]const u8, "llvm-prefix", "Path to LLVM installation") orelse "/opt/homebrew/opt/llvm@18"; + + const include_dir = b.fmt("{s}/include", .{llvm_prefix}); + const lib_dir = b.fmt("{s}/lib", .{llvm_prefix}); + const llvm_config = b.fmt("{s}/bin/llvm-config", .{llvm_prefix}); + + const mod = b.addModule("sx", .{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + mod.addSystemIncludePath(.{ .cwd_relative = include_dir }); + mod.addLibraryPath(.{ .cwd_relative = lib_dir }); + mod.link_libc = true; + mod.addCSourceFile(.{ + .file = b.path("llvm_shim.c"), + .flags = &.{b.fmt("-I{s}", .{include_dir})}, + }); + + if (static_llvm) { + // Query llvm-config for the static libraries needed + const libs_raw = std.mem.trim(u8, b.run(&.{ llvm_config, "--libs", "--link-static" }), " \t\n\r"); + var libs_it = std.mem.tokenizeAny(u8, libs_raw, " \t\n\r"); + while (libs_it.next()) |flag| { + if (flag.len > 2 and std.mem.startsWith(u8, flag, "-l")) { + mod.linkSystemLibrary(flag[2..], .{ .preferred_link_mode = .static }); + } + } + + // System libraries LLVM depends on (zlib, zstd, curses, etc.) + const syslibs_raw = std.mem.trim(u8, b.run(&.{ llvm_config, "--system-libs", "--link-static" }), " \t\n\r"); + var syslibs_it = std.mem.tokenizeAny(u8, syslibs_raw, " \t\n\r"); + while (syslibs_it.next()) |flag| { + if (flag.len > 2 and std.mem.startsWith(u8, flag, "-l")) { + mod.linkSystemLibrary(flag[2..], .{}); + } + } + + // LLVM is C++ — link the C++ standard library + mod.link_libcpp = true; + } else { + mod.linkSystemLibrary("LLVM-18", .{}); + } + + const exe = b.addExecutable(.{ + .name = "sx", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "sx", .module = mod }, + }, + }), + }); + + b.installArtifact(exe); + + const run_step = b.step("run", "Run the app"); + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const mod_tests = b.addTest(.{ + .root_module = mod, + }); + const run_mod_tests = b.addRunArtifact(mod_tests); + + const exe_tests = b.addTest(.{ + .root_module = exe.root_module, + }); + const run_exe_tests = b.addRunArtifact(exe_tests); + + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_mod_tests.step); + test_step.dependOn(&run_exe_tests.step); + +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..e6256b6 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .so, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x98c64403d9494683, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.16.0-dev.2290+200fb7c2a", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/editors/vscode/.gitignore b/editors/vscode/.gitignore new file mode 100644 index 0000000..72aae85 --- /dev/null +++ b/editors/vscode/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +out/ diff --git a/editors/vscode/.vscodeignore b/editors/vscode/.vscodeignore new file mode 100644 index 0000000..6bcd6ab --- /dev/null +++ b/editors/vscode/.vscodeignore @@ -0,0 +1,3 @@ +src/ +tsconfig.json +.gitignore diff --git a/editors/vscode/language-configuration.json b/editors/vscode/language-configuration.json new file mode 100644 index 0000000..423ddfd --- /dev/null +++ b/editors/vscode/language-configuration.json @@ -0,0 +1,22 @@ +{ + "comments": { + "lineComment": "//" + }, + "brackets": [ + ["(", ")"] + ], + "autoClosingPairs": [ + { "open": "{", "close": "}" }, + { "open": "(", "close": ")" }, + { "open": "\"", "close": "\"", "notIn": ["string"] } + ], + "surroundingPairs": [ + { "open": "{", "close": "}" }, + { "open": "(", "close": ")" }, + { "open": "\"", "close": "\"" } + ], + "indentationRules": { + "increaseIndentPattern": "\\{\\s*$", + "decreaseIndentPattern": "^\\s*\\}" + } +} diff --git a/editors/vscode/package-lock.json b/editors/vscode/package-lock.json new file mode 100644 index 0000000..5b24002 --- /dev/null +++ b/editors/vscode/package-lock.json @@ -0,0 +1,121 @@ +{ + "name": "sx-lang", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "sx-lang", + "version": "0.0.1", + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "^1.75.0", + "typescript": "^5.0.0" + }, + "engines": { + "vscode": "^1.75.0" + } + }, + "node_modules/@types/vscode": { + "version": "1.108.1", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.108.1.tgz", + "integrity": "sha512-DerV0BbSzt87TbrqmZ7lRDIYaMiqvP8tmJTzW2p49ZBVtGUnGAu2RGQd1Wv4XMzEVUpaHbsemVM5nfuQJj7H6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "license": "MIT", + "dependencies": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + }, + "engines": { + "vscode": "^1.82.0" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "license": "MIT", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==", + "license": "MIT" + } + } +} diff --git a/editors/vscode/package.json b/editors/vscode/package.json new file mode 100644 index 0000000..c80051f --- /dev/null +++ b/editors/vscode/package.json @@ -0,0 +1,68 @@ +{ + "name": "sx-lang", + "displayName": "sx", + "description": "Language support for the sx programming language", + "version": "0.0.1", + "publisher": "swipelab", + "engines": { + "vscode": "^1.75.0" + }, + "categories": [ + "Programming Languages" + ], + "main": "./out/extension.js", + "contributes": { + "languages": [ + { + "id": "sx", + "aliases": [ + "sx" + ], + "extensions": [ + ".sx" + ], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "sx", + "scopeName": "source.sx", + "path": "./syntaxes/sx.tmLanguage.json" + } + ], + "configuration": { + "title": "sx", + "properties": { + "sx.lspPath": { + "type": "string", + "default": "sx-lsp", + "description": "Path to the sx-lsp binary" + } + } + }, + "configurationDefaults": { + "editor.tokenColorCustomizations": { + "textMateRules": [ + { + "scope": "punctuation.definition.template-expression", + "settings": { + "foreground": "#E5C07B" + } + } + ] + } + } + }, + "scripts": { + "build": "tsc -p .", + "watch": "tsc -watch -p ." + }, + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "^1.75.0", + "typescript": "^5.0.0" + } +} diff --git a/editors/vscode/src/extension.ts b/editors/vscode/src/extension.ts new file mode 100644 index 0000000..ff63a48 --- /dev/null +++ b/editors/vscode/src/extension.ts @@ -0,0 +1,41 @@ +import { + workspace, + ExtensionContext, +} from "vscode"; +import { + LanguageClient, + LanguageClientOptions, + ServerOptions, +} from "vscode-languageclient/node"; + +let client: LanguageClient; + +export function activate(context: ExtensionContext) { + const config = workspace.getConfiguration("sx"); + const lspPath = config.get("lspPath", "sx-lsp"); + + const serverOptions: ServerOptions = { + command: lspPath, + args: ["lsp"], + }; + + const clientOptions: LanguageClientOptions = { + documentSelector: [{ scheme: "file", language: "sx" }], + }; + + client = new LanguageClient( + "sx-lsp", + "sx Language Server", + serverOptions, + clientOptions + ); + + client.start(); +} + +export function deactivate(): Thenable | undefined { + if (!client) { + return undefined; + } + return client.stop(); +} diff --git a/editors/vscode/sx-lang-0.0.1.vsix b/editors/vscode/sx-lang-0.0.1.vsix new file mode 100644 index 0000000..9a01694 Binary files /dev/null and b/editors/vscode/sx-lang-0.0.1.vsix differ diff --git a/editors/vscode/syntaxes/sx.tmLanguage.json b/editors/vscode/syntaxes/sx.tmLanguage.json new file mode 100644 index 0000000..e3b915c --- /dev/null +++ b/editors/vscode/syntaxes/sx.tmLanguage.json @@ -0,0 +1,208 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "sx", + "scopeName": "source.sx", + "patterns": [ + { "include": "#comments" }, + { "include": "#strings" }, + { "include": "#directives" }, + { "include": "#keywords" }, + { "include": "#types" }, + { "include": "#type-declaration" }, + { "include": "#type-annotation" }, + { "include": "#constants" }, + { "include": "#numbers" }, + { "include": "#operators" }, + { "include": "#function-declaration" }, + { "include": "#enum-literal" }, + { "include": "#identifiers" } + ], + "repository": { + "comments": { + "patterns": [ + { + "name": "comment.line.double-slash.sx", + "match": "//.*$" + } + ] + }, + "strings": { + "patterns": [ + { + "begin": "\"", + "end": "\"", + "beginCaptures": { + "0": { "name": "punctuation.definition.string.begin.sx" } + }, + "endCaptures": { + "0": { "name": "punctuation.definition.string.end.sx" } + }, + "patterns": [ + { + "name": "constant.character.escape.sx", + "match": "\\\\[ntr\"\\\\{}]" + }, + { + "begin": "\\{", + "end": "\\}", + "beginCaptures": { + "0": { "name": "punctuation.definition.template-expression.begin.sx" } + }, + "endCaptures": { + "0": { "name": "punctuation.definition.template-expression.end.sx" } + }, + "patterns": [ + { "include": "$self" } + ] + }, + { + "name": "string.quoted.double.sx", + "match": "[^\"\\\\{}]+" + } + ] + } + ] + }, + "directives": { + "patterns": [ + { + "name": "keyword.other.directive.sx", + "match": "#run" + } + ] + }, + "keywords": { + "patterns": [ + { + "name": "keyword.control.sx", + "match": "\\b(if|else|then|return|case|break|defer)\\b" + }, + { + "name": "keyword.other.sx", + "match": "\\b(enum|struct)\\b" + }, + { + "name": "keyword.operator.cast.sx", + "match": "\\bxx\\b" + } + ] + }, + "types": { + "patterns": [ + { + "name": "storage.type.sx", + "match": "\\b(s[0-9]+|u[0-9]+|f32|f64|bool|string)\\b" + } + ] + }, + "type-declaration": { + "patterns": [ + { + "match": "([A-Z][a-zA-Z0-9_]*)\\s*(::)\\s*(?=struct\\b|enum\\b)", + "captures": { + "1": { "name": "entity.name.type.sx" }, + "2": { "name": "keyword.operator.declaration.sx" } + } + } + ] + }, + "type-annotation": { + "patterns": [ + { + "match": "(?|=>" + }, + { + "name": "keyword.operator.comparison.sx", + "match": "==|!=|<=|>=" + }, + { + "name": "keyword.operator.assignment.sx", + "match": "[+\\-*/]=" + }, + { + "name": "keyword.operator.sx", + "match": "[+\\-*/=<>!]" + } + ] + }, + "function-declaration": { + "patterns": [ + { + "match": "([a-zA-Z_][a-zA-Z0-9_]*)\\s*(::)\\s*(?=\\(|\\{)", + "captures": { + "1": { "name": "entity.name.function.sx" }, + "2": { "name": "keyword.operator.declaration.sx" } + } + } + ] + }, + "enum-literal": { + "patterns": [ + { + "match": "\\.([a-zA-Z_][a-zA-Z0-9_]*)", + "captures": { + "1": { "name": "variable.other.enummember.sx" } + } + } + ] + }, + "identifiers": { + "patterns": [ + { + "name": "variable.other.generic-type.sx", + "match": "\\$([a-zA-Z_][a-zA-Z0-9_]*)", + "captures": { + "1": { "name": "entity.name.type.parameter.sx" } + } + }, + { + "match": "\\b(io)\\b", + "name": "support.module.sx" + } + ] + } + } +} diff --git a/editors/vscode/tsconfig.json b/editors/vscode/tsconfig.json new file mode 100644 index 0000000..e89a992 --- /dev/null +++ b/editors/vscode/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ES2020", + "outDir": "out", + "rootDir": "src", + "lib": ["ES2020"], + "sourceMap": true, + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src"], + "exclude": ["node_modules"] +} diff --git a/examples/01-basic.sx b/examples/01-basic.sx new file mode 100644 index 0000000..69dc250 --- /dev/null +++ b/examples/01-basic.sx @@ -0,0 +1,5 @@ +#import "modules/std.sx"; + +main :: () -> s32 { + if false then 40 else 42; +} \ No newline at end of file diff --git a/examples/02-stdout.sx b/examples/02-stdout.sx new file mode 100644 index 0000000..f484c9f --- /dev/null +++ b/examples/02-stdout.sx @@ -0,0 +1,4 @@ +#import "modules/std.sx"; +main :: () { + print("Hello\n"); +} \ No newline at end of file diff --git a/examples/03-structs.sx b/examples/03-structs.sx new file mode 100644 index 0000000..7acca06 --- /dev/null +++ b/examples/03-structs.sx @@ -0,0 +1,22 @@ +#import "modules/std.sx"; +Vec4 :: struct { + x, y, z, w: f32; +} + +main :: () { + v1 : Vec4 = .{ 1, 2, 3, 0}; + v2 := Vec4.{ 4, 1, 1, 3}; + v3 := Vec4.{ w=0, x=2, y=3, z=4}; + z := 5.0; // z is f32 + w := 6.0; // w is f32 + v4 := Vec4.{ y=3, x=9, w, z}; + + v4.y = 0; + print("v1: {}\nv2: {}\nv3: {}\nv4: {}\n", v1, v2, v3, v4); +} + +// ** stdout ** +//v1: Vec4{x:1.0, y:2.0, z:3.0, w:0.0} +//v2: Vec4{x:4.0, y:1.0, z:1.0, w:3.0} +//v3: Vec4{x:2.0, y:3.0, z:4.0, w:0.0} +//v4: Vec4{x:9.0, y:3.0, z:5.0, w:6.0} diff --git a/examples/04-shadow.sx b/examples/04-shadow.sx new file mode 100644 index 0000000..fd5a2e7 --- /dev/null +++ b/examples/04-shadow.sx @@ -0,0 +1,20 @@ +#import "modules/std.sx"; +main :: () -> s32 { + x := 42; + { + print("scope opened\n"); + defer print("scope closed\n"); + + // define a inner variable x shadowing the one define in the outer scope(s) + x:= 6; + print("scoped x: {}\n", x); //expect 6 + } + print("main x: {}\n", x); //expect 42 +} + +// ** stdout ** +// scope opened +// scoped x: 6 +// scope closed +// main x: 42 +// diff --git a/examples/05-run.sx b/examples/05-run.sx new file mode 100644 index 0000000..2ef6afc --- /dev/null +++ b/examples/05-run.sx @@ -0,0 +1,20 @@ +#import "modules/std.sx"; +// this will bake x to be 7 as a global constant +x :: #run compute(5); + +compute :: (v: s32) -> s32 => v + 2; + +main :: () { + //test + y :: #run compute(7); + c :: 2; + print("hello {}\n", x + y * c); +} + +#run main(); + +// ** stdout after build ** +// hello 25 + +// ** stdout after run ** +// hello 25 diff --git a/examples/06-generic.sx b/examples/06-generic.sx new file mode 100644 index 0000000..20e9d53 --- /dev/null +++ b/examples/06-generic.sx @@ -0,0 +1,17 @@ +#import "modules/std.sx"; +sum :: (a:$T, b:T) -> T { + return a + b; +} + +main :: () { + x:=sum(2,3); + + print("sum: {}\n", x); + print("sum: {}\n", sum(40,2)); + print("sum: {}\n", sum(40,2.5)); +} + +// ** stdout ** +// sum: 42 +// sum: 42.500000 +// diff --git a/examples/07-defer.sx b/examples/07-defer.sx new file mode 100644 index 0000000..4502c8c --- /dev/null +++ b/examples/07-defer.sx @@ -0,0 +1,11 @@ +#import "modules/std.sx"; +main :: () -> s32 { + defer print("still here\n"); + return 42; +} + +// ** exit code ** +// 42 +// ** stdout ** +// still here +// \ No newline at end of file diff --git a/examples/08-types.sx b/examples/08-types.sx new file mode 100644 index 0000000..8d420c0 --- /dev/null +++ b/examples/08-types.sx @@ -0,0 +1,43 @@ +#import "modules/std.sx"; +SPECIAL_VALUE :u8: 42; + +resolve :: (x: u8) -> s32 { + return 12 + x; +} + +Foo :: struct { + a : u2; // this will have 0 as default + b : u8 = SPECIAL_VALUE; + c : u8 = ---; // default for c is undefined + d : u8 = #run xx resolve(5); // converts s32 to u8 +} + +main :: () { + a : Foo; // default value of 0 + print("a 0 : {}\n", a); + a.a = 1; + // a.c is still undefined at this point + a.c = 8; + print("a 1 : {}\n", a); + + large: f64 = 5989.5; + b : Foo = ---; // undefined + b.a = 1; + b.c = xx large; // converts f64 to u8 + // expect stdout : "b: Foo{a:1, b: 42, c: 7, d: 12}" + print("b: {}", b); + print("\n"); + + f := Pack.{1,0,3,5,9,100,3.5}; + print("{}\n", f); +} + +Pack :: struct { + a: u1; + b: u2; + c: u8; + d: u32; + f: u64; + v: s32; + x: f32; +} diff --git a/examples/09-import.sx b/examples/09-import.sx new file mode 100644 index 0000000..7ca61db --- /dev/null +++ b/examples/09-import.sx @@ -0,0 +1,16 @@ +std :: #import "modules/std.sx"; + +//flat +#import "modules/math.sx"; + +main :: () -> s32 { + { + defer std.print("after hello"); + //expect stdout : hello there + std.print("hello there"); + } + + v:= std.Vector(3,f32).[1,2,3]; + + std.print("\n{}\n", v); +} diff --git a/examples/10-generic-struct.sx b/examples/10-generic-struct.sx new file mode 100644 index 0000000..fc22a80 --- /dev/null +++ b/examples/10-generic-struct.sx @@ -0,0 +1,91 @@ +#import "modules/std.sx"; + +Vec :: struct($N: u32, $T:Type) { + // (LLVM Vector) + // Vector is a Builtin Type + data: Vector(N,T); +} + +Complex :: ($T:Type) -> Type { + return struct { + value: T; + //..inject + count: u32; + }; +} + +Vec3 :: Vec(3, f32); + +vec3 :: (x:f32, y:f32, z:f32) -> Vector(3,f32) { + .[x, y, z]; +} + +Foo :: Complex(u32); + +main :: () { + v1 := Vec3.{data = .[1,3,2]}; + print("v1: {}\n", v1); + //stdout: Vec(3,f32){data: [1.0, 3.0, 2.0]} + // + + v2 := vec3(1,3,2); + print("v2: {}\n", v2); + //stdout: [1.0, 3.0, 2.0] + // + + // [N x T] (LLVM Array) + buffer : [5]f32 = .[0, 2, 3.5, 4, 0]; + print("buff: {}\n", buffer); + //stdout: [0.0, 2.0, 3.5, 4.0, 0.0] + // + + comp : Foo = .{value = 42, count = 1}; + print("comp: {}\n", comp); + //stdout: Foo{value: 42, count: 1} + // + + // Vector arithmetic + v3 := vec3(3,2,1); + add := v2 + v3; + print("add: {}\n", add); + + // Element access + v2x := v2.x; + print("v2.x: {}\n", v2x); + + // Index access + v2i := v2[1]; + print("v2[1]: {}\n", v2i); + + // Scalar broadcast + scaled := v2 * 2.0; + print("scaled: {}\n", scaled); + + // Negation + neg := -v2; + print("neg: {}\n", neg); + + // sqrt + s := sqrt(9.0); + print("sqrt(9): {}\n", s); + + // inline generic type + Sx :: (user: $T) -> Type { + return union { + counter: s32; + user: T; + }; + } + + sx := Sx(f32).user(0.5); + print("{}\n", sx); + + print("{}\n", size_of(f32)); + print("{}\n", size_of(Sx(f32))); + print("{}\n", size_of(Foo)); + print("{}\n", size_of(Complex)); + + + size:= size_of(Sx); + print("{}\n", size); +} diff --git a/examples/11-vector-math.sx b/examples/11-vector-math.sx new file mode 100644 index 0000000..19da380 --- /dev/null +++ b/examples/11-vector-math.sx @@ -0,0 +1,28 @@ +#import "modules/std.sx"; +math :: #import "modules/std/math.sx"; + +vec3 :: (x:f32, y:f32, z:f32) -> Vector(3,f32) { + .[x, y, z]; +} + +main :: () { + a := vec3(1, 0, 0); + b := vec3(0, 1, 0); + + // dot product + d := math.dot(a, b); + print("dot: {}\n", d); + + // cross product + cr := math.cross(a, b); + print("cross: {}\n", cr); + + // length + v := vec3(3, 4, 0); + len := math.length(v); + print("length: {}\n", len); + + // normalize + n := math.normalize(v); + print("norm: {}\n", n); +} diff --git a/examples/12-meta.sx b/examples/12-meta.sx new file mode 100644 index 0000000..b8601c6 --- /dev/null +++ b/examples/12-meta.sx @@ -0,0 +1,12 @@ +#import "modules/std.sx"; +#import "modules/math.sx"; + +main :: () { + x:Type = f64; + v:f64 = 3.2; + print("{}\n", x); + print("{}\n", v); + + x= Vec4; + print("{}\n", x); +} diff --git a/examples/13-code.sx b/examples/13-code.sx new file mode 100644 index 0000000..7488446 --- /dev/null +++ b/examples/13-code.sx @@ -0,0 +1,9 @@ +#import "modules/std.sx"; + +generate::() -> string { + return "print(\"hello from the other side\n\");"; +} + +main :: () { + #insert #run generate(); +} \ No newline at end of file diff --git a/examples/14-demo.sx b/examples/14-demo.sx new file mode 100644 index 0000000..e1f1814 --- /dev/null +++ b/examples/14-demo.sx @@ -0,0 +1,15 @@ +std :: #import "modules/std.sx"; + +vec3 :: (x:f32, y:f32, z:f32) -> std.Vector(3, f32) { + .[x,y,z]; +} + +main :: () { + v1 := vec3(1,0,0); + v2 := vec3(0,0,1); + s := 0.5; + + sum := (v1 - v2);// math.cross(v1, v2); + + std.print("{}\n", sum); +} diff --git a/examples/15-while.sx b/examples/15-while.sx new file mode 100644 index 0000000..cdf9049 --- /dev/null +++ b/examples/15-while.sx @@ -0,0 +1,50 @@ +#import "modules/std.sx"; + +sumOf10 :: () -> s32 { + i:= 1; + s:=0; + while i <= 10 { + s+=i; + i+=1; + } + s; +} + +someSum :: #run sumOf10(); + +main :: { + // Basic while loop: count to 5 + i := 0; + while i < 5 { + i += 1; + } + print("count: {}\n", i); + + // While with break + x := 1; + while x < 100 { + if x == 12 { + break; + } + x += 1; + } + print("break at: {}\n", x); + + // While with continue: sum odd numbers 1-9 + sum := 0; + j := 0; + while j < 10 { + j += 1; + // Skip even numbers + if j == 2 { continue; } + if j == 4 { continue; } + if j == 6 { continue; } + if j == 8 { continue; } + if j == 10 { continue; } + sum += j; + } + print("sum of odd 1-9: {}\n", sum); + + print("sum {}", someSum); + +} diff --git a/examples/16-union.sx b/examples/16-union.sx new file mode 100644 index 0000000..bb14a86 --- /dev/null +++ b/examples/16-union.sx @@ -0,0 +1,48 @@ +#import "modules/std.sx"; + +Shape :: union { + circle: f32; + rect: s32; + none; +} + +main :: () { + // Construction with .variant(payload) + s :Shape = .circle(3.14); + print("circle: {}\n", s); + + // Payload access + r := s.circle; + print("radius: {}\n", r); + + // Void variant via enum literal + s = .none; + print("none: {}\n", s); + + // Reassign with payload + s = .rect(42); + print("rect: {}\n", s); + + // Explicit prefix construction + sh :Shape = Shape.circle(2.71); + print("sh: {}\n", sh); + + // Field access on second union variable + sh2 :Shape = .rect(10); + val := sh2.rect; + print("rect val: {}\n", val); + + // Match on union + if sh2 == { + case .circle: print("matched circle\n"); + case .rect: print("matched rect\n"); + case .none: print("matched none\n"); + } + + cs := if sh2 == { + case .circle: 1; + case .rect: 2; + case .none: 3; + } + print("case : {}", cs); +} diff --git a/examples/17-lambda.sx b/examples/17-lambda.sx new file mode 100644 index 0000000..ab10547 --- /dev/null +++ b/examples/17-lambda.sx @@ -0,0 +1,9 @@ +#import "modules/std.sx"; + +main :: () { + fx :: (s:s3) -> s3 { + s; + } + + print("{}\n", fx(133)); +} diff --git a/examples/18-conditions.sx b/examples/18-conditions.sx new file mode 100644 index 0000000..f6bdfd2 --- /dev/null +++ b/examples/18-conditions.sx @@ -0,0 +1,23 @@ +#import "modules/std.sx"; + +main :: () { + x:= 32; + y:= 40; + + if 0 <= x <= 100 and 0 <= y <= 100 { + print("contained"); + } + + if 0 <= x <= 100 and 0 <= y <= 100 { + print("contained"); + } + + + if 1000 > x > -100 and 0 <= y <= 100 { + print("contained"); + } + + if 1000 > x >= -100 and 0 <= y <= 100 { + print("contained"); + } +} diff --git a/examples/19-varargs.sx b/examples/19-varargs.sx new file mode 100644 index 0000000..ccac756 --- /dev/null +++ b/examples/19-varargs.sx @@ -0,0 +1,35 @@ +#import "modules/std.sx"; + +sum :: (args: ..s32) -> s32 { + result := 0; + for args { + result = result + it; + } + result; +} + +print_all :: (args: ..s32) { + for args { + write(int_to_string(it)); + write(" "); + } + write("\n"); +} + +main :: () -> s32 { + write(int_to_string(sum(10, 20, 30))); + write("\n"); + + print_all(1, 2, 3, 4, 5); + + arr : [3]s32 = .[10, 20, 30]; + write(int_to_string(sum(..arr))); + write("\n"); + + for arr { + write(int_to_string(it)); + write(" "); + } + write("\n"); + 0; +} diff --git a/examples/20-any-varargs.sx b/examples/20-any-varargs.sx new file mode 100644 index 0000000..41d278c --- /dev/null +++ b/examples/20-any-varargs.sx @@ -0,0 +1,47 @@ +#import "modules/std.sx"; + +Point :: struct { + x: s32; + y: s32; +} + +// Print all arguments — accepts any type, dispatches via type-switch +print_any :: (args: ..Any) { + for args { + type := type_of(it); + if type == { + case int: write(int_to_string(cast(s32) it)); + case string: write(cast(string) it); + case bool: write(bool_to_string(cast(bool) it)); + case float: write(float_to_string(cast(f64) it)); + case Point: { + p := cast(Point) it; + write("("); + write(int_to_string(p.x)); + write(","); + write(int_to_string(p.y)); + write(")"); + } + } + write(" "); + } + write("\n"); +} + +count :: (args: ..Any) -> s32 { + args.len; +} + +main :: () -> s32 { + print_any(42, "hello", true, 3.14); + + // Test with struct + p := Point.{ x=10, y=20 }; + print_any("point:", p, 99); + + // Test count + write(int_to_string(count(1, 2, 3))); + write("\n"); + + 0; +} diff --git a/examples/21-categories.sx b/examples/21-categories.sx new file mode 100644 index 0000000..c2a5672 --- /dev/null +++ b/examples/21-categories.sx @@ -0,0 +1,19 @@ +#import "modules/std.sx"; + +Point :: struct { + x, y: s32; +} + +Color :: struct { + r, g, b: s32; +} + +main :: () { + p := Point.{10, 20}; + c := Color.{255, 128, 0}; + print("p: {}\n", p); + print("c: {}\n", c); + print("n: {}\n", 42); + print("s: {}\n", "hello"); + print("b: {}\n", true); +} diff --git a/examples/22-anytype.sx b/examples/22-anytype.sx new file mode 100644 index 0000000..55cd709 --- /dev/null +++ b/examples/22-anytype.sx @@ -0,0 +1,11 @@ +#import "modules/std.sx"; + +main :: { + i := 0; + while i < 10 { + i+=1; + if i == 2 then continue; + if i == 5 then break; + } + print("{}\n", i); +} \ No newline at end of file diff --git a/examples/modules/math.sx b/examples/modules/math.sx new file mode 100644 index 0000000..8266786 --- /dev/null +++ b/examples/modules/math.sx @@ -0,0 +1,17 @@ +#import "std.sx"; + +dot :: (a: Vector(3,f32), b: Vector(3,f32)) -> f32 { + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +cross :: (a: Vector(3,f32), b: Vector(3,f32)) -> Vector(3,f32) { + .[a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x]; +} + +length :: (v: Vector(3,f32)) -> f32 { + return sqrt(dot(v, v)); +} + +normalize :: (v: Vector(3,f32)) -> Vector(3,f32) { + return v / length(v); +} \ No newline at end of file diff --git a/examples/modules/std.sx b/examples/modules/std.sx new file mode 100644 index 0000000..79e729c --- /dev/null +++ b/examples/modules/std.sx @@ -0,0 +1,220 @@ +Vector :: ($N: int, $T: Type) -> Type #builtin; +write :: (str: string) -> void #builtin; +sqrt :: (x: $T) -> T #builtin; +size_of :: ($T: Type) -> s32 #builtin; +alloc :: (size: s32) -> string #builtin; +type_of :: (val: $T) -> Type #builtin; +type_name :: ($T: Type) -> string #builtin; +field_count :: ($T: Type) -> s32 #builtin; +field_name :: ($T: Type, idx: s32) -> string #builtin; +field_value :: (s: $T, idx: s32) -> Any #builtin; + +int_to_string :: (n: s32) -> string { + if n == 0 { return "0"; } + neg := n < 0; + v := if neg then 0 - n else n; + tmp := v; + len := 0; + while tmp > 0 { len += 1; tmp = tmp / 10; } + total := if neg then len + 1 else len; + buf := alloc(total); + i := total - 1; + while v > 0 { + buf[i] = (v % 10) + 48; + v = v / 10; + i -= 1; + } + if neg { buf[0] = 45; } + buf; +} + +bool_to_string :: (b: bool) -> string { + if b then "true" else "false"; +} + +float_to_string :: (f: f64) -> string { + neg := f < 0.0; + v := if neg then 0.0 - f else f; + int_part := cast(s32) v; + frac := cast(s32) ((v - cast(f64) int_part) * 1000000.0); + if frac < 0 { frac = 0 - frac; } + istr := int_to_string(int_part); + fstr := int_to_string(frac); + il := istr.len; + fl := fstr.len; + prefix := if neg then 1 else 0; + total := prefix + il + 1 + 6; + buf := alloc(total); + pos := 0; + if neg { buf[0] = 45; pos = 1; } + i := 0; + while i < il { buf[pos] = istr[i]; pos += 1; i += 1; } + buf[pos] = 46; + pos += 1; + pad := 6 - fl; + j := 0; + while j < pad { buf[pos] = 48; pos += 1; j += 1; } + k := 0; + while k < fl { buf[pos] = fstr[k]; pos += 1; k += 1; } + buf; +} + +concat :: (a: string, b: string) -> string { + al := a.len; + bl := b.len; + buf := alloc(al + bl); + i := 0; + while i < al { buf[i] = a[i]; i += 1; } + j := 0; + while j < bl { buf[al + j] = b[j]; j += 1; } + buf; +} + +substr :: (s: string, start: s32, len: s32) -> string { + buf := alloc(len); + i := 0; + while i < len { + buf[i] = s[start + i]; + i += 1; + } + buf; +} + +struct_to_string :: (s: $T) -> string { + result := concat(type_name(T), "{"); + i := 0; + while i < field_count(T) { + if i > 0 { result = concat(result, ", "); } + result = concat(result, field_name(T, i)); + result = concat(result, ": "); + result = concat(result, any_to_string(field_value(s, i))); + i += 1; + } + concat(result, "}"); +} + +enum_to_string :: (e: $T) -> string { + concat(".", field_name(T, cast(s32) e)); +} + +vector_to_string :: (v: $T) -> string { + result := "["; + i := 0; + while i < field_count(T) { + if i > 0 { result = concat(result, ", "); } + result = concat(result, any_to_string(field_value(v, i))); + i += 1; + } + concat(result, "]"); +} + +array_to_string :: (a: $T) -> string { + result := "["; + i := 0; + while i < field_count(T) { + if i > 0 { result = concat(result, ", "); } + result = concat(result, any_to_string(field_value(a, i))); + i += 1; + } + concat(result, "]"); +} + +union_to_string :: (u: $T) -> string { + tag := cast(s32) u; + result := concat(".", field_name(T, tag)); + payload := field_value(u, tag); + pstr := any_to_string(payload); + if pstr.len > 0 { + result = concat(result, concat("(", concat(pstr, ")"))); + } + result; +} + +any_to_string :: (val: Any) -> string { + result := ""; + type := type_of(val); + if type == { + case void: result = ""; + case int: result = int_to_string(xx val); + case string: { s : string = xx val; result = s; } + case bool: result = bool_to_string(xx val); + case float: result = float_to_string(xx val); + case struct: result = struct_to_string(cast(type) val); + case enum: result = enum_to_string(cast(type) val); + case vector: result = vector_to_string(cast(type) val); + case array: result = array_to_string(cast(type) val); + case union: result = union_to_string(cast(type) val); + } + result; +} + +build_print :: (fmt: string) -> string { + code := "result := \"\"; "; + seg_start := 0; + i := 0; + arg_idx := 0; + while i < fmt.len { + if fmt[i] == 123 { + if i + 1 < fmt.len { + if fmt[i + 1] == 125 { + if i > seg_start { + code = concat(code, "result = concat(result, substr(fmt, "); + code = concat(code, int_to_string(seg_start)); + code = concat(code, ", "); + code = concat(code, int_to_string(i - seg_start)); + code = concat(code, ")); "); + } + code = concat(code, "result = concat(result, any_to_string(args["); + code = concat(code, int_to_string(arg_idx)); + code = concat(code, "])); "); + arg_idx += 1; + i += 2; + seg_start = i; + } else if fmt[i + 1] == 123 { + code = concat(code, "result = concat(result, substr(fmt, "); + code = concat(code, int_to_string(seg_start)); + code = concat(code, ", "); + code = concat(code, int_to_string(i - seg_start + 1)); + code = concat(code, ")); "); + i += 2; + seg_start = i; + } else { + i += 1; + } + } else { + i += 1; + } + } else if fmt[i] == 125 { + if i + 1 < fmt.len { + if fmt[i + 1] == 125 { + code = concat(code, "result = concat(result, substr(fmt, "); + code = concat(code, int_to_string(seg_start)); + code = concat(code, ", "); + code = concat(code, int_to_string(i - seg_start + 1)); + code = concat(code, ")); "); + i += 2; + seg_start = i; + } else { + i += 1; + } + } else { + i += 1; + } + } else { + i += 1; + } + } + if seg_start < fmt.len { + code = concat(code, "result = concat(result, substr(fmt, "); + code = concat(code, int_to_string(seg_start)); + code = concat(code, ", "); + code = concat(code, int_to_string(fmt.len - seg_start)); + code = concat(code, ")); "); + } + code = concat(code, "write(result);"); + code; +} + +print :: ($fmt: string, args: ..Any) { + #insert build_print(fmt); +} diff --git a/examples/vision.sx b/examples/vision.sx new file mode 100644 index 0000000..71b2031 --- /dev/null +++ b/examples/vision.sx @@ -0,0 +1,98 @@ +main :: { + // imagine a game loop + while(running) { + render_ui(build_menu); + } +} + +build_menu :: (ctx: ViewContext) -> View { + // use ctx to allocate some state at the for Menu ViewContext + state : MenuState = ctx.state(MenuState); + + // named args + HStack(ctx, + children = .[ + Button(ctx, + label = "Up", + onTap = ctx.callback(goUp, state), + ), + ScrollView(ctx, + LazyVStack(ctx, + builder = ctx.callback(build_menu_entry, state), + ), + ) + ], + ); +} + +build_menu_entry :: (ctx: *ViewContext, index: s32, state: *MenuState) -> View { + entry := state.entries[index]; + is_selected := index == state.selected_index; + icon := if entry.is_dir then "[D]" else " "; + Button(ctx, + label = concat(icon, " ", entry.name), + on_tap = ctx.callback(menu_go, state, index), + ); +} + +ViewContext :: struct { + //TBD +} + +MenuState :: struct { + current_path: string; + entries: List(MenuEntry); + error_message: string; +} + +MenuEntry :: struct { + name: string; + is_dir: bool; +} + +menu_go_up :: (state: *MenuState) { + parent := fs.path.dirname(state.current_path) else return; + // this frees the current path & copies parent to be owned by MenuState + state.current_path = parent; + menu_refresh(state); +} + +menu_go :: (state: *MenuState, s32 index) { + entry := state.entries[index] else return; + state.current_path := concat(state.current_path, "/", entry.name); + menu_refresh(state); +} + +menu_refresh :: (state: *MenuState) { + // this could retain the capacity + state.entries.clear(); + // this would basically create a copy of the empty string :( + state.error_message = ""; + + // ... multi return params vs Generic Result to deal with exceptions + // ... nullable + dir := io.Dir.open(state.current_path); + if !dir { + state.error_message = "failed to open"; + return; + }; + defer dir.close(); + + for iter.iterate() { + entries.append(.{it.name, it.kind == .Directory}); + } +} + +HStackState :: struct { + spacing: f32 = 8; + alignment: VerticalAlignment = .center; + padding: f32 = 0; + background: ?Color; + corner_radius: f32 = 0, +} + +HStack :: (ctx: ViewContext, children: []View) -> View { + data := ctx.alloc(HStackState); + data.* = .{}; +} + diff --git a/llvm_shim.c b/llvm_shim.c new file mode 100644 index 0000000..c0fd238 --- /dev/null +++ b/llvm_shim.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +void sx_llvm_init_all_targets(void) { + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargets(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmPrinters(); + LLVMInitializeAllAsmParsers(); +} + +void sx_llvm_init_native_target(void) { + LLVMInitializeNativeTarget(); + LLVMInitializeNativeAsmPrinter(); +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..63e74d8 --- /dev/null +++ b/readme.md @@ -0,0 +1,40 @@ +# sx + +*** HIGHLY EXPERIMENTAL *** DON'T USE *** + +This experiment is trying to answer a few questions: + +Q: Can we have an system language to build declarative ui ? + + +NOTE: +> i hope you have memory... currently it doesn't free anything :D + +## Building + +Requires **Zig 0.16+** and **LLVM 19**. + +```sh +zig build +``` + +## Usage + +```sh +# compile to binary +sx build examples/06-generic.sx + +# compile and run +sx run examples/06-generic.sx + +# emit LLVM IR +sx ir examples/06-generic.sx + +# start the language server +sx lsp +``` + +## Acknowledgments + +- [Jonathan Blow](https://en.wikipedia.org/wiki/Jonathan_Blow) — for Jai, the language that inspired this one +- [Andrew Kelley](https://andrewkelley.me) — for Zig, which made this compiler a joy to write diff --git a/specs.md b/specs.md new file mode 100644 index 0000000..9a7bfc6 --- /dev/null +++ b/specs.md @@ -0,0 +1,853 @@ +# sx language specification + +## 1. Lexical Structure + +### Comments +Line comments start with `//` and extend to end of line. +```sx +// this is a comment +``` + +### Identifiers +- Lowercase or mixed-case for variables, functions: `x`, `compute`, `main` +- UPPER_SNAKE_CASE for constants: `SOME_INT`, `SOME_STR` +- PascalCase for types: `Foo` + +### Literals + +| Kind | Examples | Type | +|-----------|---------------------|---------| +| Integer | `0`, `42`, `0xFF`, `0b1010` | `s32` | +| Float | `0.3`, `0.9` | `f32` | +| String | `"Hello"`, `"z: {z}"` | `string` | +| Boolean | `true`, `false` | `bool` | +| Enum | `.variant1` | inferred from context | +| Undefined | `---` | context-dependent | + +### Keywords +`if`, `else`, `then`, `while`, `break`, `continue`, `true`, `false`, `enum`, `struct`, `union`, `case`, `return`, `defer`, `xx`, `and`, `or` + +### Operators + +| Operator | Meaning | +|----------|------------------| +| `+` | addition | +| `-` | subtraction / negation | +| `*` | multiplication | +| `/` | division | +| `==` | equality | +| `!=` | inequality | +| `<` | less than | +| `>` | greater than | +| `<=` | less or equal | +| `>=` | greater or equal | +| `and` | logical AND (short-circuit) | +| `or` | logical OR (short-circuit) | +| `+=` | add-assign | +| `-=` | sub-assign | +| `*=` | mul-assign | +| `/=` | div-assign | + +### Delimiters and Punctuation + +| Token | Meaning | +|--------|--------------------------------------| +| `::` | constant binding / definition | +| `:=` | variable binding (mutable, inferred) | +| `:` | type annotation | +| `=` | assignment (in typed var decl) | +| `;` | statement terminator | +| `,` | separator | +| `.` | field access / enum literal prefix | +| `->` | return type annotation | +| `=>` | lambda arrow | +| `$` | generic type parameter introduction | +| `---` | undefined value | +| `()` | grouping / params | +| `{}` | blocks / bodies | + +--- + +## 2. Type System + +### Primitive Types +- `s1`..`s64` — signed integers (1 to 64 bits). `s32` is the default for integer literals. +- `u1`..`u64` — unsigned integers (1 to 64 bits). +- `f32` — 32-bit floating point +- `f64` — 64-bit floating point +- `bool` — boolean (`true` / `false`) +- `string` — string of characters +- `Any` — type-erased value, represented as `{ i32, i64 }` (type tag + payload). Used for variadic arguments and runtime type dispatch. +- `Type` — compile-time type value. At runtime, represented as an `i32` type tag (same tag space as `Any`). + +### Enum Types +User-defined sum types with named variants. +```sx +Foo :: enum { + variant1; + variant2; +} +``` +Variants are referenced with dot-prefix syntax: `.variant1` + +### Struct Types +User-defined product types with named fields. +```sx +Vec4 :: struct { + x, y, z, w: f32; +} +``` +Fields are declared as `name1, name2: type;` (comma-separated names sharing a type, semicolon-terminated). + +#### Field Defaults +Fields may have default values. Fields without an explicit default have a zero-value default. `---` marks a field as explicitly undefined. +```sx +Foo :: struct { + a : u2; // default is 0 + b : u8 = 42; // default is 42 + c : u8 = ---; // default is undefined +} +``` + +#### Struct Literals +```sx +// Positional (with type annotation — type inferred from annotation) +v1 : Vec4 = .{ 1, 2, 3, 0 }; + +// Positional (with type prefix) +v2 := Vec4.{ 4, 1, 1, 3 }; + +// Named fields (any order) +v3 := Vec4.{ w=0, x=2, y=3, z=4 }; + +// Mixed named + shorthand (bare identifier = field name matches variable name) +z := 5.0; +w := 6.0; +v4 := Vec4.{ y=3, x=9, w, z }; +``` + +#### Field Access and Assignment +```sx +v1.x // read field x of struct v1 +v1.x = 3.0; // assign to field x of struct v1 +``` + +#### Struct Interpolation +Struct values in string interpolation print as `TypeName{field:value, ...}`: +```sx +print("{v1}"); // Vec4{x:1.0, y:2.0, z:3.0, w:0.0} +``` + +### Union Types (Tagged Unions) +Sum types where each variant can carry typed data or be void. Internally represented as `{ i32, [max_payload_size x i8] }`. + +#### Declaration +```sx +Shape :: union { + circle: f32; // typed variant + rect: s32; // typed variant + none; // void variant +} +``` + +#### Construction +```sx +s :Shape = .circle(3.14); // inferred from context +s = .none; // void variant (enum literal syntax) +s = Shape.rect(42); // explicit prefix +``` + +#### Payload Access +```sx +r := s.circle; // load payload as f32 (undefined behavior if wrong variant active) +``` + +#### Pattern Matching +```sx +if s == { + case .circle: print("circle\n"); + case .rect: print("rect\n"); + case .none: print("none\n"); +} +``` + +#### Union Interpolation +Union values in string interpolation print as ``: +```sx +print("{s}"); // +``` + +### Array Types +Fixed-size arrays with element type and length. +```sx +buffer : [5]f32 = .[0, 2, 3.5, 4, 0]; +val := buffer[2]; // 3.5 +``` + +### Vector Types (SIMD) +LLVM SIMD vectors, parameterized by length and element type. +```sx +v := vec3(1, 3, 2); // Vector(3, f32) +``` + +**Arithmetic**: Element-wise `+`, `-`, `*`, `/` on vectors of same dimensions. +```sx +add := v1 + v2; // element-wise addition +``` + +**Scalar broadcast**: Scalar operands are broadcast to match the vector. +```sx +scaled := v * 2.0; // [2.0, 6.0, 4.0] +``` + +**Negation**: Unary `-` negates each element. +```sx +neg := -v; // [-1.0, -3.0, -2.0] +``` + +**Element access**: `.x`, `.y`, `.z`, `.w` (aliases `.r`, `.g`, `.b`, `.a`) extract single components. +```sx +v.x // first element +v.z // third element +``` + +**Index access**: `v[i]` extracts by index. +```sx +v[0] // first element +``` + +**Built-in `sqrt`**: Calls LLVM `llvm.sqrt.f32`/`.f64` intrinsic. +```sx +s := sqrt(9.0); // 3.0 +``` + +### Function Types +Expressed as `(param_types) -> return_type`. +A function with no return type annotation returns void. +```sx +// type is (s32) -> s32 +compute :: (x: s32) -> s32 { x * x; } + +// type is () -> void +main :: () { } +``` + +### Type Aliases +A name bound to an existing type. +```sx +SOME_TYPE :: f64; +``` + +### Generic Functions (Monomorphization) +Functions can be parameterized over types using `$T` syntax. The `$` prefix introduces a type parameter; subsequent uses of the name reference it. +```sx +sum :: (a: $T, b: T) -> T { + return a + b; +} +``` +- `$T` in a parameter type **introduces** type parameter `T` +- Bare `T` (without `$`) **references** the introduced type parameter +- At call sites, type arguments are **inferred** from actual argument types: + ```sx + sum(40, 2) // T = s32 + sum(1.5, 2.5) // T = f32 + ``` +- Each unique set of concrete types produces a **separate specialized function** (monomorphization) +- Multiple type parameters are supported: `(a: $T, b: $U) -> T` + +### Variadic Functions +Functions can accept a variable number of arguments using `..Type` syntax: +```sx +print :: (fmt: string, args: ..Any) { ... } +``` +- `..Any` means zero or more arguments, each boxed into `Any` (type tag + payload) +- The variadic parameter must be the last parameter +- At call sites, variadic arguments are automatically boxed: `print("x={}, y={}\n", x, y)` +- Inside the function body, `args` is accessed as a slice-like sequence + +### Type Inference +- `::` bindings infer type from the right-hand side +- `:=` bindings infer type from the right-hand side +- Explicit annotation overrides inference: `NAME : f64 : 0.9;` +- Integer literals default to `s32` +- Float literals default to `f32` +- Enum literals (`.variant`) infer their enum type from context (expected type) + +### Type Conversions + +**Implicit (widening)** — allowed without annotation: +- Integer to wider integer of same signedness (`u8` → `u16`, `s8` → `s32`) +- Unsigned to strictly wider signed (`u8` → `s16`) +- Any integer to any float (`u8` → `f32`, `s32` → `f64`) +- Float to wider float (`f32` → `f64`) +- Integer and float literals can convert to any numeric type implicitly + +**Explicit (narrowing)** — requires `xx` prefix: +- Integer to narrower integer (`s32` → `u8`) +- Signed to unsigned (`s32` → `u32`) +- Float to narrower float (`f64` → `f32`) +- Float to any integer (`f64` → `u16`) +- Unsigned to signed of same or narrower width (`u8` → `s8`) + +The `xx` prefix operator marks an expression for auto-conversion to the expected type from context (assignment, declaration, argument, return): +```sx +large: f64 = 5999.5; +x : u16 = xx large; // f64 → u16 +d : u8 = #run xx resolve(5); // s32 → u8 at compile time +``` + +Using `xx` outside a typed context (where the target type is known) is a compile error. + +--- + +## 3. Declarations + +### Constant Binding (immutable) + +```sx +// inferred type +NAME :: value; + +// explicit type +NAME : type : value; +``` + +The `::` operator creates an immutable binding. The value is evaluated at compile time when possible. + +Examples: +```sx +SOME_INT :: 0; // s32 +SOME_STR :: "Hello"; // string +SOME_FLOAT :: 0.3; // f32 +SOME_DOUBLE : f64 : 0.9; // f64 (explicit) +SOME_FUNC :: () => 42; // () -> s32 +SOME_TYPE :: f64; // type alias +``` + +### Variable Binding (mutable) + +```sx +// inferred type +name := value; + +// explicit type +name : type = value; + +// default-initialized (type required) +name : type; + +// undefined (type required) +name : type = ---; +``` + +The `:=` operator creates a mutable binding. The type is inferred unless explicitly annotated. + +`name : type;` initializes using the type's defaults: zero for primitives, per-field defaults for structs (see Field Defaults). + +`name : type = ---;` leaves the value undefined (uninitialized memory). Reading before writing is undefined behavior. + +Examples: +```sx +x := 42; // s32, mutable +x := if true then 1 else 2; +z : Foo = .variant2; // Foo, mutable, explicit type +a : Foo; // Foo, default-initialized (a=0, b=42, c=undef) +b : Foo = ---; // Foo, entirely undefined +``` + +### Function Definition + +```sx +name :: (params) -> return_type { + body +} +``` + +- Parameters: `name: type` separated by commas +- Return type: `-> type` (omit for void) +- Body: block of statements; last expression is the implicit return value +- No `return` keyword needed (last expression = return value) + +Examples: +```sx +compute :: (x: s32) -> s32 { + x * x; +} + +main :: () { + // void return, no -> annotation +} + +// Bare-block shorthand (equivalent to no-arg void function): +main :: { + // same as main :: () { ... } +} +``` + +### Enum Definition + +```sx +Name :: enum { + variant1; + variant2; +} +``` + +Defines a new enum type with the given variants. Trailing comma is allowed. + +--- + +## 4. Expressions + +Everything in `sx` is expression-oriented where possible. + +### Operator Precedence + +| Prec | Operators | Notes | +|------|-----------|-------| +| 6 (highest) | `*`, `/` | multiplication, division | +| 5 | `+`, `-` | addition, subtraction | +| 4 | `<`, `<=`, `>`, `>=`, `==`, `!=` | comparisons (chainable) | +| 2 | `and` | logical AND (short-circuit) | +| 1 (lowest) | `or` | logical OR (short-circuit) | + +### Arithmetic +Standard infix: `+`, `-`, `*`, `/` with usual precedence (`*`/`/` before `+`/`-`). +```sx +x * x +x + 2 +``` + +### Chained Comparisons +Comparison operators can be chained. Each operand is evaluated exactly once. +```sx +0 <= x <= 100 // equivalent to: 0 <= x and x <= 100 +1000 > x >= -100 // equivalent to: 1000 > x and x >= -100 +a == b == c // equivalent to: a == b and b == c +``` +Mixed operators are allowed: `a < b <= c > d` means `a < b and b <= c and c > d`. + +### Logical Operators +`and` and `or` are short-circuit boolean operators. The right operand is not evaluated if the left operand determines the result. +```sx +if 0 <= x <= 100 and 0 <= y <= 100 { + print("contained"); +} +``` + +### If Expression (inline form) +```sx +if condition then consequent else alternate +``` +Both branches are single expressions. The whole form produces a value. +```sx +x := if true then 1 else 2; +``` +The `else` branch is optional. Without it, the form is a statement (no value): +```sx +if i == 2 then continue; +if done then break; +if err then return; +``` + +### If Expression (block form) +```sx +if condition { + stmts +} else { + stmts +} +``` +Each branch is a block. The last expression in each block is the branch's value. Can be used inline within other expressions: +```sx +y := x + if false { + 7; +} else { + 12; +}; +``` + +### Pattern Matching +```sx +if subject == { + case pattern: body + case pattern: body + else: body // optional default arm +} +``` +Matches `subject` against each `case`. Patterns can be: +- **Enum literals**: `.variant` — matches a specific enum variant. +- **Integer/bool literals**: `42`, `true` — matches a specific value. +- **Type categories**: `struct`, `enum`, `union` — matches all types in that category (used with `type_of` values). + +`break` exits a case arm without producing a value. The optional `else:` arm matches when no `case` pattern matches. +```sx +if z == { + case .variant1: break; + case .variant2: + print("z: {z}"); + else: + print("unknown"); +} +``` + +#### Type Category Matching +When switching on a `Type` value (from `type_of`), category keywords match all registered types of that category: +```sx +type := type_of(val); +if type == { + case int: result = int_to_string(xx val); + case struct: result = struct_to_string(cast(type) val); + case enum: result = enum_to_string(cast(type) val); +} +``` +Available categories: `int`, `float`, `bool`, `string`, `struct`, `enum`, `union`. + +Inside a category arm, `cast(type) val` performs **runtime generic dispatch**: the compiler generates a switch over all types in the category, monomorphizing the callee for each concrete type. + +### While Loop +```sx +while condition { + body +} +``` +Repeats `body` as long as `condition` is true. `break;` exits the loop. `continue;` skips to the next iteration. +```sx +i := 0; +while i < 10 { + i += 1; + if i == 5 { continue; } + if i == 8 { break; } + print("{i}\n"); +} +``` + +### For Loop +```sx +for iterable { + // `it` is the current element + // `it_index` is the current index (s32) + print("{it}\n"); +} +``` +Iterates over arrays and slices. The loop body has two implicit variables: +- `it` — the current element value +- `it_index` — the current index (s32, starting at 0) + +`break;` exits the loop. `continue;` skips to the next iteration. +```sx +arr : [5]s32 = .[1, 2, 3, 4, 5]; +for arr { + if it_index == 2 { continue; } + print("{it}\n"); +} +``` + +### Lambda +```sx +(params) => expr +(params) -> return_type => expr +``` +Anonymous function. Produces a function value. Supports the same parameter features as named functions: `$` generic type params, `..` variadic params, and optional return type annotation. +```sx +SOME_FUNC :: () => 42; // () -> s32 +double :: (x: $T) -> T => x + x; // generic lambda with return type +``` + +### Function Call +```sx +callee(args) +``` +```sx +compute(6) +print("hello") +``` + +### Field Access +```sx +object.field +``` +Used for module access (`std.print`) and struct member access. + +### Enum Literal +```sx +.variant_name +``` +The enum type is inferred from context (expected type from declaration or parameter). + +### String Interpolation +Curly braces inside string literals interpolate expressions: +```sx +"z: {z}" +``` +The expression inside `{}` is evaluated and formatted according to its type: +- `s32` — decimal integer +- `f64` — decimal float +- `string` — as-is + +--- + +## 5. Statements + +Statements are terminated by `;`. + +- **Declaration**: `name :: value;` / `name := value;` +- **Assignment**: `name = value;` / `name += value;` (and other compound assignments). Also supports field targets: `obj.field = value;` +- **Expression statement**: `expr;` — evaluates the expression (last in a block = return value) +- **Return**: `return expr;` — returns from the enclosing function with the given value. `return;` returns void. +- **Break**: `break;` — exits a match arm or while loop +- **Continue**: `continue;` — skips to the next iteration of a while loop +- **Defer**: `defer expr;` — defers execution of `expr` until the enclosing block exits (LIFO order) + +--- + +## 6. Blocks, Scoping, and Implicit Returns + +A block `{ ... }` contains zero or more statements. The last expression in a block is its value (implicit return). + +In function bodies, the last expression becomes the return value: +```sx +compute :: (x: s32) -> s32 { + x * x; // this is returned +} +``` + +### Scope Blocks + +Bare blocks can be used as statements to introduce a new lexical scope. Variables declared inside a scope block are local to that block. No trailing `;` is required. + +```sx +main :: { + x := 42; + { + x := 6; // shadows outer x + print("inner: {x}"); // prints 6 + } + print("outer: {x}"); // prints 42 +} +``` + +### Variable Shadowing + +A variable declaration (`name :=`) inside an inner scope shadows any variable with the same name from outer scopes. The outer variable is restored when the inner scope exits. + +### Defer + +`defer expr;` schedules `expr` to execute when the enclosing scope block exits. Multiple defers in the same scope execute in reverse order (LIFO). + +```sx +{ + defer print("second"); + defer print("first"); +} +// prints: first, then second +``` + +--- + +## 7. Built-in Functions + +Built-in functions are declared in `std.sx` with the `#builtin` suffix, which tells the compiler to generate the implementation internally rather than looking for a function body. + +### I/O +- `write(str: string) -> void` — write a string to standard output +- `print(fmt: string, args: ..Any)` — formatted print. Parses `{}` placeholders in the format string and substitutes arguments. When all argument types are statically known, the compiler specializes the call at compile time (no `Any` boxing). + +### Math +- `sqrt(x: $T) -> T` — square root (maps to LLVM intrinsic) + +### Memory +- `alloc(size: s32) -> string` — allocate `size` bytes of memory, returned as a string slice +- `size_of($T: Type) -> s32` — size of type `T` in bytes + +### Type Introspection +- `type_of(val: $T) -> Type` — returns the runtime type tag of a value +- `type_name($T: Type) -> string` — returns the name of type `T` as a string (e.g., `"Point"`) +- `field_count($T: Type) -> s32` — returns the number of fields (struct), variants (enum), or elements (vector) in type `T` +- `field_name($T: Type, idx: s32) -> string` — returns the name of the `idx`-th field (struct) or variant (enum) of type `T` +- `field_value(s: $T, idx: s32) -> Any` — returns the `idx`-th field (struct) or element (vector) of `s`, boxed as `Any` + +### Type Conversion +- `cast(Type) expr` — prefix operator that converts `expr` to `Type`. Examples: `cast(s32) 3.14`, `cast(f64) n`. When `Type` is a runtime `Type` value inside a type-category match arm, the compiler generates a dispatch switch over all types in the category, monomorphizing the callee for each concrete type. + +### Vectors +- `Vector($N: int, $T: Type) -> Type` — returns an LLVM vector type of `N` elements of type `T` + +--- + +## 8. Compile-time Evaluation + +### `#run` Directive + +`#run expr` evaluates `expr` at compile time using lazy JIT execution. It can appear in two contexts: + +**Compile-time constants** — bind a compile-time value to a name: +```sx +compute :: (x: s32) -> s32 { x * x; } +x :: #run compute(5); // x = 25, evaluated at compile time +``` + +Comptime globals are resolved lazily: the JIT executes only when the value is first referenced during code generation. Chained dependencies are resolved automatically. + +**Side effects** — execute code at compile time for its side effects: +```sx +#run print("compiling..."); +``` + +### `#insert` Directive + +`#insert expr;` evaluates `expr` at compile time to obtain a string, then parses and compiles that string as inline code at the insertion point. + +```sx +generate :: () -> string { + return "print(\"hello from the other side\");"; +} + +main :: () { + #insert #run generate(); + // equivalent to: print("hello from the other side"); +} +``` + +The inserted string must contain valid `sx` statements (including semicolons). The statements are parsed and compiled in the same scope as the `#insert` site. + +--- + +## 9. Modules / Imports + +### `#import` Directive + +The `#import` directive brings declarations from another `.sx` file into the current file. Paths are resolved relative to the importing file's directory. + +**Flat import** — splices all declarations from the imported file into the current scope: +```sx +#import "modules/std/math.sx"; +``` + +**Namespaced import** — wraps all declarations under a namespace name: +```sx +std :: #import "modules/std.sx"; +``` + +Namespaced declarations are accessed with dot notation: +```sx +std.print("hello"); +``` + +### Import Resolution + +- Imports are resolved after parsing and before code generation. +- Paths are relative to the directory of the file containing the `#import`. +- Nested imports are supported (imported files may themselves contain `#import`). +- Circular imports are detected and silently skipped (each file is imported at most once). +- Generic functions in namespaced imports are supported (e.g., `std.mul(5, 2)` where `mul` is generic). + +### Intra-module References + +Functions within a namespaced import can call each other without the namespace prefix. When generating code for a namespaced module, unresolved function names are automatically tried with the namespace prefix. + +### Example + +```sx +// modules/std/math.sx +mul :: (base: $T, exp: T) -> T { base * exp; } + +// modules/std/std.sx +print :: (str: string) -> void #builtin; + +// main.sx +std :: #import "modules/std.sx"; +#import "modules/std/math.sx"; + +main :: () -> s32 { + std.print("hello there"); + mul(5, 2); +} +``` + +--- + +## 10. Program Structure + +A program is a sequence of top-level declarations and `#import` directives. Execution begins at `main`. + +```sx +main :: () { + // entry point +} +``` + +`main` takes no arguments and returns void. The process exit code is 0 unless otherwise specified. + +--- + +## 11. Grammar (informal) + +``` +program = top_level* +top_level = decl | import_decl +import_decl = '#import' STRING ';' + | IDENT '::' '#import' STRING ';' +decl = const_decl | var_decl | fn_decl | enum_decl | struct_decl +const_decl = IDENT '::' expr ';' + | IDENT ':' type ':' expr ';' +var_decl = IDENT ':=' expr ';' + | IDENT ':' type '=' expr ';' + | IDENT ':' type ';' +fn_decl = IDENT '::' '(' params? ')' ('->' type)? block + | IDENT '::' block +enum_decl = IDENT '::' 'enum' '{' (IDENT ';')* '}' +struct_decl = IDENT '::' 'struct' '{' field_group* '}' +field_group = IDENT (',' IDENT)* ':' type ('=' expr)? ';' +params = param (',' param)* +param = IDENT ':' type +block = '{' stmt* '}' +stmt = decl | assignment ';' | return_stmt | defer_stmt | insert_stmt + | break_stmt | continue_stmt | expr ';' +return_stmt = 'return' expr? ';' +break_stmt = 'break' ';' +continue_stmt = 'continue' ';' +defer_stmt = 'defer' expr ';' +insert_stmt = '#insert' expr ';' +assignment = lvalue ('=' | '+=' | '-=' | '*=' | '/=') expr +lvalue = IDENT | postfix '.' IDENT +expr = if_expr | match_expr | while_expr | for_expr | lambda | binary +while_expr = 'while' expr block +for_expr = 'for' expr block +binary = unary (binop unary)* +unary = ('-' | '!' | 'xx' | 'cast' '(' type ')') postfix + | postfix +postfix = primary ('(' args? ')' | '.' IDENT | '.{' field_init_list '}')* +primary = INT | HEX_INT | BIN_INT | FLOAT | STRING | BOOL | IDENT | '---' + | '.' IDENT | '.' '{' field_init_list '}' + | '(' expr ')' | block | '#run' expr +field_init_list = field_init (',' field_init)* +field_init = IDENT '=' expr | IDENT | expr +if_expr = 'if' expr 'then' expr ('else' expr)? + | 'if' expr block ('else' block)? +match_expr = 'if' expr '==' '{' case_arm* else_arm? '}' +case_arm = 'case' pattern ':' (stmt* | 'break' ';') +else_arm = 'else' ':' stmt* +pattern = '.' IDENT | INT | BOOL | IDENT +lambda = '(' params? ')' ('->' type)? '=>' expr +args = expr (',' expr)* +type = '$' IDENT | 's32' | 'f32' | 'f64' | 'bool' | 'string' + | 'Any' | 'Type' | '..' type | '[' expr ']' type | IDENT +``` + +--- + +## 12. Open Questions + +These are inferred gaps — things not shown in the readme that need decisions: + +- **`return`**: Both `return expr;` and implicit return (last expression) are supported. +- **Else in match**: Is there a default/else arm in pattern matching? +- **Nested functions**: Can functions be defined inside other functions? +- **Mutability of params**: Are function parameters immutable by default? +- **Array/list types**: Not shown — deferred. +- **Struct types**: Implemented — named struct types with positional/named/shorthand literals. +- **Imports/modules**: `#import` directive supports flat and namespaced imports (see Section 8). +- **Operator overloading**: Not shown — presumably no. +- **Semicolons**: Required on all statements? What about the last expression in a block? +- **Top-level expressions**: Are bare expressions allowed at the top level or only declarations? diff --git a/src/ast.zig b/src/ast.zig new file mode 100644 index 0000000..b1ee246 --- /dev/null +++ b/src/ast.zig @@ -0,0 +1,326 @@ +const std = @import("std"); + +pub const Span = struct { + start: u32, + end: u32, +}; + +pub const Node = struct { + span: Span, + data: Data, + + pub const Data = union(enum) { + root: Root, + fn_decl: FnDecl, + block: Block, + int_literal: IntLiteral, + float_literal: FloatLiteral, + bool_literal: BoolLiteral, + string_literal: StringLiteral, + identifier: Identifier, + enum_literal: EnumLiteral, + binary_op: BinaryOp, + chained_comparison: ChainedComparison, + unary_op: UnaryOp, + call: Call, + field_access: FieldAccess, + if_expr: IfExpr, + match_expr: MatchExpr, + match_arm: MatchArm, + const_decl: ConstDecl, + var_decl: VarDecl, + assignment: Assignment, + enum_decl: EnumDecl, + struct_decl: StructDecl, + struct_literal: StructLiteral, + union_decl: UnionDecl, + union_literal: UnionLiteral, + lambda: Lambda, + type_expr: TypeExpr, + param: Param, + defer_stmt: DeferStmt, + comptime_expr: ComptimeExpr, + insert_expr: InsertExpr, + return_stmt: ReturnStmt, + import_decl: ImportDecl, + namespace_decl: NamespaceDecl, + array_type_expr: ArrayTypeExpr, + array_literal: ArrayLiteral, + parameterized_type_expr: ParameterizedTypeExpr, + index_expr: IndexExpr, + while_expr: WhileExpr, + for_expr: ForExpr, + spread_expr: SpreadExpr, + break_expr: void, + continue_expr: void, + undef_literal: void, + builtin_expr: void, + + pub fn declName(self: Data) ?[]const u8 { + return switch (self) { + .fn_decl => |d| d.name, + .const_decl => |d| d.name, + .var_decl => |d| d.name, + .enum_decl => |d| d.name, + .struct_decl => |d| d.name, + .union_decl => |d| d.name, + .namespace_decl => |d| d.name, + else => null, + }; + } + }; +}; + +pub const Root = struct { + decls: []const *Node, +}; + +pub const FnDecl = struct { + name: []const u8, + params: []const Param, + return_type: ?*Node, + body: *Node, + type_params: []const StructTypeParam = &.{}, +}; + +pub const Param = struct { + name: []const u8, + name_span: Span, + type_expr: *Node, + is_variadic: bool = false, + is_comptime: bool = false, +}; + +pub const Block = struct { + stmts: []const *Node, +}; + +pub const IntLiteral = struct { + value: i64, +}; + +pub const FloatLiteral = struct { + value: f64, +}; + +pub const BoolLiteral = struct { + value: bool, +}; + +pub const StringLiteral = struct { + raw: []const u8, +}; + +pub const Identifier = struct { + name: []const u8, +}; + +pub const EnumLiteral = struct { + name: []const u8, // without the leading dot +}; + +pub const BinaryOp = struct { + op: Op, + lhs: *Node, + rhs: *Node, + + pub const Op = enum { + add, + sub, + mul, + div, + mod, + eq, + neq, + lt, + lte, + gt, + gte, + and_op, + or_op, + }; +}; + +pub const ChainedComparison = struct { + operands: []const *Node, + ops: []const BinaryOp.Op, +}; + +pub const UnaryOp = struct { + op: Op, + operand: *Node, + + pub const Op = enum { + negate, + not, + xx, + }; +}; + +pub const Call = struct { + callee: *Node, + args: []const *Node, +}; + +pub const FieldAccess = struct { + object: *Node, + field: []const u8, +}; + +pub const IfExpr = struct { + condition: *Node, + then_branch: *Node, + else_branch: ?*Node, + is_inline: bool, // true for `if cond then a else b` +}; + +pub const MatchExpr = struct { + subject: *Node, + arms: []const MatchArm, +}; + +pub const MatchArm = struct { + pattern: ?*Node, // null = else (default) arm + body: *Node, + is_break: bool, +}; + +pub const ConstDecl = struct { + name: []const u8, + type_annotation: ?*Node, + value: *Node, +}; + +pub const VarDecl = struct { + name: []const u8, + type_annotation: ?*Node, + value: ?*Node, +}; + +pub const Assignment = struct { + target: *Node, + op: Op, + value: *Node, + + pub const Op = enum { + assign, + add_assign, + sub_assign, + mul_assign, + div_assign, + mod_assign, + }; +}; + +pub const EnumDecl = struct { + name: []const u8, + variants: []const []const u8, +}; + +pub const StructTypeParam = struct { + name: []const u8, // e.g. "N" or "T" (without $) + constraint: *Node, // type_expr: "u32" for value param, "Type" for type param +}; + +pub const StructDecl = struct { + name: []const u8, + field_names: []const []const u8, + field_types: []const *Node, // type_expr nodes + field_defaults: []const ?*Node, // default value per field, null if none + type_params: []const StructTypeParam = &.{}, +}; + +pub const StructFieldInit = struct { + name: ?[]const u8, // null for positional, non-null for named/shorthand + value: *Node, +}; + +pub const StructLiteral = struct { + struct_name: ?[]const u8, // null for anonymous `.{ ... }` + type_expr: ?*Node = null, // for GenericType(args).{ ... } + field_inits: []const StructFieldInit, +}; + +pub const Lambda = struct { + params: []const Param, + return_type: ?*Node, + body: *Node, + type_params: []const StructTypeParam = &.{}, +}; + +pub const TypeExpr = struct { + name: []const u8, + is_generic: bool = false, +}; + +pub const DeferStmt = struct { + expr: *Node, +}; + +pub const ComptimeExpr = struct { + expr: *Node, +}; + +pub const InsertExpr = struct { + expr: *Node, +}; + +pub const ReturnStmt = struct { + value: ?*Node, +}; + +pub const ImportDecl = struct { + path: []const u8, + name: ?[]const u8, +}; + +pub const ArrayTypeExpr = struct { + length: *Node, // int_literal for the size + element_type: *Node, // type_expr for the element type +}; + +pub const ArrayLiteral = struct { + elements: []const *Node, + type_expr: ?*Node = null, +}; + +pub const ParameterizedTypeExpr = struct { + name: []const u8, // e.g. "Vector", or later generic struct names + args: []const *Node, // e.g. [int_literal(3), type_expr("f32")] +}; + +pub const IndexExpr = struct { + object: *Node, + index: *Node, +}; + +pub const WhileExpr = struct { + condition: *Node, + body: *Node, +}; + +pub const ForExpr = struct { + iterable: *Node, + body: *Node, +}; + +pub const SpreadExpr = struct { + operand: *Node, +}; + +pub const UnionDecl = struct { + name: []const u8, + variant_names: []const []const u8, + variant_types: []const ?*Node, // null for void variants +}; + +pub const UnionLiteral = struct { + union_name: ?[]const u8, // null for anonymous `.variant(expr)` + variant_name: []const u8, + payload: ?*Node, // null for void variants +}; + +pub const NamespaceDecl = struct { + name: []const u8, + decls: []const *Node, +}; diff --git a/src/builtins.zig b/src/builtins.zig new file mode 100644 index 0000000..f6028a8 --- /dev/null +++ b/src/builtins.zig @@ -0,0 +1,25 @@ +const llvm = @import("llvm_api.zig"); +const c = llvm.c; + +pub const Builtins = struct { + printf_fn: c.LLVMValueRef, + calloc_fn: c.LLVMValueRef, + + pub fn init(module: c.LLVMModuleRef, ctx: c.LLVMContextRef) Builtins { + const ptr_type = c.LLVMPointerTypeInContext(ctx, 0); + const i64_type = c.LLVMInt64TypeInContext(ctx); + const i32_type = c.LLVMInt32TypeInContext(ctx); + + // Declare: int printf(const char*, ...) + var printf_params = [_]c.LLVMTypeRef{ptr_type}; + const printf_type = c.LLVMFunctionType(i32_type, &printf_params, 1, 1); + const printf_fn = c.LLVMAddFunction(module, "printf", printf_type); + + // Declare: void* calloc(size_t count, size_t size) + var calloc_params = [_]c.LLVMTypeRef{ i64_type, i64_type }; + const calloc_type = c.LLVMFunctionType(ptr_type, &calloc_params, 2, 0); + const calloc_fn = c.LLVMAddFunction(module, "calloc", calloc_type); + + return .{ .printf_fn = printf_fn, .calloc_fn = calloc_fn }; + } +}; diff --git a/src/codegen.zig b/src/codegen.zig new file mode 100644 index 0000000..d9c1381 --- /dev/null +++ b/src/codegen.zig @@ -0,0 +1,4999 @@ +const std = @import("std"); +const ast = @import("ast.zig"); +const Node = ast.Node; +const Span = ast.Span; +const llvm = @import("llvm_api.zig"); +const c = llvm.c; +const types = @import("types.zig"); +const Type = types.Type; +const Builtins = @import("builtins.zig").Builtins; +const Parser = @import("parser.zig").Parser; +const errors = @import("errors.zig"); +const sema = @import("sema.zig"); +const comptime_mod = @import("comptime.zig"); + +pub const CodeGen = struct { + context: c.LLVMContextRef, + module: c.LLVMModuleRef, + builder: c.LLVMBuilderRef, + allocator: std.mem.Allocator, + + // Symbol table: maps variable names to their alloca pointers + named_values: std.StringHashMap(NamedValue), + // Enum type registry: maps enum name to variant list + enum_types: std.StringHashMap([]const []const u8), + // Type alias registry: maps alias name to target type name + type_aliases: std.StringHashMap([]const u8), + // Struct type registry: maps struct name to field info + LLVM type + struct_types: std.StringHashMap(StructInfo), + // Union type registry: maps union name to variant info + LLVM type + union_types: std.StringHashMap(UnionInfo), + // Built-in functions (printf, etc.) + builtins: ?Builtins, + // Current function being generated (for alloca insertion) + current_function: c.LLVMValueRef, + // Return type of the current function being generated + current_return_type: Type = .void_type, + // Scope save stack: each entry records shadowed names to restore on scope exit + scope_saves: std.ArrayList(std.ArrayList(ScopeEntry)), + // Defer stack: parallel to scope_saves, each entry holds deferred expressions + defer_stack: std.ArrayList(std.ArrayList(*Node)), + // Compile-time globals: maps name to global variable info for #run results + comptime_globals: std.StringHashMap(ComptimeGlobal), + // Top-level #run expressions for side effects only + comptime_side_effects: std.ArrayList(*Node), + // Generic function templates: maps name to AST for deferred monomorphization + generic_templates: std.StringHashMap(GenericTemplate), + // Instantiated generic functions: maps mangled name to LLVM function + generic_instances: std.StringHashMap(c.LLVMValueRef), + // Active type parameter bindings during generic instantiation (null when not instantiating) + type_param_bindings: ?std.StringHashMap(Type) = null, + // Active value parameter bindings during generic struct instantiation + value_param_bindings: ?std.StringHashMap(i64) = null, + // Active comptime param AST nodes during generic function instantiation (for #insert substitution) + comptime_param_nodes: ?std.StringHashMap(*Node) = null, + // Generic struct templates: maps name to AST for deferred instantiation + generic_struct_templates: std.StringHashMap(GenericStructTemplate), + // Known namespace names (for import resolution) + namespaces: std.StringHashMap(void), + // Functions declared with #builtin (only available when imported) + builtin_functions: std.StringHashMap(void), + // Active namespace during body generation of imported modules + current_namespace: ?[]const u8 = null, + // Diagnostics list (optional, for structured error reporting) + diagnostics: ?*errors.DiagnosticList = null, + // Current source span (set at genExpr/genStmt/genExprAsType entry) + current_span: Span = .{ .start = 0, .end = 0 }, + // Loop context: break/continue target basic blocks (null when not in a loop) + loop_break_bb: c.LLVMBasicBlockRef = null, + loop_continue_bb: c.LLVMBasicBlockRef = null, + // Sema result (optional, for type-aware comptime evaluation) + sema_result: ?*const sema.SemaResult = null, + // Root declarations from the AST (for VM on-demand function compilation) + root_decls: []const *Node = &.{}, + // Cached LLVM struct type for string slices {ptr, i32} + string_struct_type: c.LLVMTypeRef = null, + // Cached LLVM struct type for Any {i32 tag, i64 value} + any_struct_type: c.LLVMTypeRef = null, + // Dynamic type ID assignment for Any tags (named types get unique IDs starting from 7) + any_type_id_map: std.StringHashMap(u64), + next_any_type_id: u64 = 7, + // Cache of auto-generated to_string functions for complex types + // Variadic function info: maps function name to variadic metadata + variadic_functions: std.StringHashMap(VariadicInfo), + // Enriched Any type entries: maps type name to tag + category + sx type + any_type_entries: std.StringHashMap(AnyTypeEntry), + // Current match arm type entries (set during category match arm body generation) + current_match_tags: ?[]const u64 = null, + + const TypeCategory = enum { + struct_cat, + enum_cat, + union_cat, + vector_cat, + array_cat, + slice_cat, + }; + + const AnyTypeEntry = struct { + tag_id: u64, + category: TypeCategory, + sx_type: Type, + }; + + const VariadicInfo = struct { + fixed_param_count: u32, // number of non-variadic params + element_type_name: []const u8, // element type of the variadic slice (e.g. "s32") + }; + + const GenericTemplate = struct { + fd: ast.FnDecl, + }; + + const GenericStructTemplate = struct { + sd: ast.StructDecl, + }; + + const ComptimeGlobal = struct { + global: c.LLVMValueRef, // LLVM global variable + ty: Type, // sx type + expr: *Node, // the inner expression to JIT-evaluate + is_resolved: bool = false, // true if initializer already set (no JIT needed) + }; + + const StructInfo = struct { + field_names: []const []const u8, + field_types: []const Type, + field_defaults: []const ?*Node, + llvm_type: c.LLVMTypeRef, + display_name: ?[]const u8 = null, // pretty name for generic instances + }; + + const UnionInfo = struct { + variant_names: []const []const u8, + variant_types: []const Type, // void_type for void variants + llvm_type: c.LLVMTypeRef, // { i32, [max_payload_size x i8] } + max_payload_size: u64, + }; + + // Scope stack entry: records what a name mapped to before being shadowed + const ScopeEntry = struct { + name: []const u8, + prev: ?NamedValue, // null = name didn't exist before this scope + }; + + const NamedValue = struct { + ptr: c.LLVMValueRef, // alloca pointer + ty: Type, // sx type + }; + + pub fn init(allocator: std.mem.Allocator, module_name: [*:0]const u8) CodeGen { + const ctx = c.LLVMContextCreate(); + const module = c.LLVMModuleCreateWithNameInContext(module_name, ctx); + const builder = c.LLVMCreateBuilderInContext(ctx); + return .{ + .context = ctx, + .module = module, + .builder = builder, + .allocator = allocator, + .named_values = std.StringHashMap(NamedValue).init(allocator), + .enum_types = std.StringHashMap([]const []const u8).init(allocator), + .type_aliases = std.StringHashMap([]const u8).init(allocator), + .struct_types = std.StringHashMap(StructInfo).init(allocator), + .union_types = std.StringHashMap(UnionInfo).init(allocator), + .builtins = null, + .current_function = null, + .scope_saves = std.ArrayList(std.ArrayList(ScopeEntry)).empty, + .defer_stack = std.ArrayList(std.ArrayList(*Node)).empty, + .comptime_globals = std.StringHashMap(ComptimeGlobal).init(allocator), + .comptime_side_effects = std.ArrayList(*Node).empty, + .generic_templates = std.StringHashMap(GenericTemplate).init(allocator), + .generic_instances = std.StringHashMap(c.LLVMValueRef).init(allocator), + .generic_struct_templates = std.StringHashMap(GenericStructTemplate).init(allocator), + .namespaces = std.StringHashMap(void).init(allocator), + .builtin_functions = std.StringHashMap(void).init(allocator), + .variadic_functions = std.StringHashMap(VariadicInfo).init(allocator), + .any_type_id_map = std.StringHashMap(u64).init(allocator), + .any_type_entries = std.StringHashMap(AnyTypeEntry).init(allocator), + }; + } + + pub fn deinit(self: *CodeGen) void { + self.named_values.deinit(); + self.enum_types.deinit(); + self.type_aliases.deinit(); + self.struct_types.deinit(); + self.union_types.deinit(); + self.comptime_globals.deinit(); + self.generic_templates.deinit(); + self.generic_instances.deinit(); + self.generic_struct_templates.deinit(); + self.namespaces.deinit(); + self.builtin_functions.deinit(); + self.variadic_functions.deinit(); + self.any_type_id_map.deinit(); + self.any_type_entries.deinit(); + c.LLVMDisposeBuilder(self.builder); + c.LLVMDisposeModule(self.module); + c.LLVMContextDispose(self.context); + } + + fn emitError(self: *CodeGen, msg: []const u8) error{CodeGenError} { + if (self.diagnostics) |diags| diags.add(.err, msg, self.current_span); + return error.CodeGenError; + } + + fn emitErrorFmt(self: *CodeGen, comptime fmt: []const u8, args: anytype) error{CodeGenError} { + if (self.diagnostics) |diags| diags.addFmt(.err, self.current_span, fmt, args); + return error.CodeGenError; + } + + pub fn typeToLLVM(self: *CodeGen, ty: Type) c.LLVMTypeRef { + return switch (ty) { + .signed => |w| c.LLVMIntTypeInContext(self.context, w), + .unsigned => |w| c.LLVMIntTypeInContext(self.context, w), + .f32 => c.LLVMFloatTypeInContext(self.context), + .f64 => c.LLVMDoubleTypeInContext(self.context), + .void_type => c.LLVMVoidTypeInContext(self.context), + .boolean => c.LLVMInt1TypeInContext(self.context), + .string_type, .slice_type => self.getStringStructType(), // slices use same {ptr, i32} layout + .enum_type => c.LLVMInt32TypeInContext(self.context), + .struct_type => |name| if (self.struct_types.get(name)) |info| info.llvm_type else unreachable, + .union_type => |name| if (self.union_types.get(name)) |info| info.llvm_type else unreachable, + .array_type => |info| { + const elem_ty = Type.fromName(info.element_name) orelse unreachable; + return c.LLVMArrayType2(self.typeToLLVM(elem_ty), info.length); + }, + .vector_type => |info| { + const elem_ty = Type.fromName(info.element_name) orelse unreachable; + return c.LLVMVectorType(self.typeToLLVM(elem_ty), info.length); + }, + .any_type => self.getAnyStructType(), + .meta_type => c.LLVMPointerTypeInContext(self.context, 0), + }; + } + + fn getAnyStructType(self: *CodeGen) c.LLVMTypeRef { + if (self.any_struct_type) |t| return t; + var field_types = [_]c.LLVMTypeRef{ + c.LLVMInt32TypeInContext(self.context), // type tag + c.LLVMInt64TypeInContext(self.context), // value (fits all primitives) + }; + self.any_struct_type = c.LLVMStructTypeInContext(self.context, &field_types, 2, 0); + return self.any_struct_type.?; + } + + /// Type tag constants for Any type (builtins: 0-6, Type: 10, named types: 7+ dynamic) + const ANY_TAG_VOID: u64 = 0; + const ANY_TAG_BOOL: u64 = 1; + const ANY_TAG_S32: u64 = 2; + const ANY_TAG_S64: u64 = 3; + const ANY_TAG_F32: u64 = 4; + const ANY_TAG_F64: u64 = 5; + const ANY_TAG_STRING: u64 = 6; + const ANY_TAG_TYPE: u64 = 10; + + /// Get or assign a unique type ID for a named type (struct, enum, union, vector, array). + /// IDs start at 7 and are assigned dynamically per compilation. + /// Also populates `any_type_entries` with category and type info. + fn getAnyTypeId(self: *CodeGen, name: []const u8, sx_type: Type) !u64 { + const gop = try self.any_type_id_map.getOrPut(name); + if (!gop.found_existing) { + gop.value_ptr.* = self.next_any_type_id; + self.next_any_type_id += 1; + // Skip over reserved slot 10 (ANY_TAG_TYPE) + if (self.next_any_type_id == ANY_TAG_TYPE) self.next_any_type_id += 1; + + // Determine category from the sx type + const category: TypeCategory = switch (sx_type) { + .struct_type => .struct_cat, + .enum_type => .enum_cat, + .union_type => .union_cat, + .vector_type => .vector_cat, + .array_type => .array_cat, + .slice_type => .slice_cat, + else => .struct_cat, // fallback + }; + try self.any_type_entries.put(name, .{ + .tag_id = gop.value_ptr.*, + .category = category, + .sx_type = sx_type, + }); + } + return gop.value_ptr.*; + } + + /// Build an Any value { tag: i32, value: i64 } from a typed LLVM value. + /// Small values (ints, floats, bools, enums) are stored inline in the i64. + /// Complex values (strings, structs, unions) are stored via pointer (alloca + ptr-to-int). + fn buildAnyValue(self: *CodeGen, val: c.LLVMValueRef, ty: Type) !c.LLVMValueRef { + const any_ty = self.getAnyStructType(); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + const i64_ty = c.LLVMInt64TypeInContext(self.context); + const undef = c.LLVMGetUndef(any_ty); + + // Determine tag + const tag: u64 = switch (ty) { + .void_type => ANY_TAG_VOID, + .boolean => ANY_TAG_BOOL, + .signed => |w| if (w <= 32) ANY_TAG_S32 else ANY_TAG_S64, + .unsigned => |w| if (w <= 32) ANY_TAG_S32 else ANY_TAG_S64, + .f32 => ANY_TAG_F32, + .f64 => ANY_TAG_F64, + .string_type => ANY_TAG_STRING, + .struct_type => |name| try self.getAnyTypeId(name, ty), + .enum_type => |name| try self.getAnyTypeId(name, ty), + .union_type => |name| try self.getAnyTypeId(name, ty), + .vector_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "vec[{d}]{s}", .{ info.length, info.element_name }), ty), + .array_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "[{d}]{s}", .{ info.length, info.element_name }), ty), + .meta_type => ANY_TAG_TYPE, + else => ANY_TAG_S32, + }; + const tag_val = c.LLVMConstInt(i32_ty, tag, 0); + const with_tag = c.LLVMBuildInsertValue(self.builder, undef, tag_val, 0, "any_tag"); + + // Convert value to i64 + const val_as_i64 = switch (ty) { + .boolean => c.LLVMBuildZExt(self.builder, val, i64_ty, "any_bool"), + .signed => |w| if (w <= 32) + c.LLVMBuildSExt(self.builder, val, i64_ty, "any_int") + else + val, + .unsigned => |w| if (w <= 32) + c.LLVMBuildZExt(self.builder, val, i64_ty, "any_uint") + else + val, + .f32 => blk: { + // f32 -> f64 -> bitcast to i64 + const as_f64 = c.LLVMBuildFPExt(self.builder, val, c.LLVMDoubleTypeInContext(self.context), "f32_to_f64"); + break :blk c.LLVMBuildBitCast(self.builder, as_f64, i64_ty, "any_f32"); + }, + .f64 => c.LLVMBuildBitCast(self.builder, val, i64_ty, "any_f64"), + .string_type => blk: { + // String is {ptr, i32} — store to alloca, pass alloca as i64 + const str_alloca = c.LLVMBuildAlloca(self.builder, self.getStringStructType(), "any_str_tmp"); + _ = c.LLVMBuildStore(self.builder, val, str_alloca); + break :blk c.LLVMBuildPtrToInt(self.builder, str_alloca, i64_ty, "any_str"); + }, + .struct_type => |sname| blk: { + // Struct — store to alloca, pass pointer as i64 + const info = self.struct_types.get(sname) orelse + return c.LLVMGetUndef(any_ty); + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, "any_struct_tmp"); + _ = c.LLVMBuildStore(self.builder, val, alloca); + break :blk c.LLVMBuildPtrToInt(self.builder, alloca, i64_ty, "any_struct"); + }, + .enum_type => blk: { + // Enum is i32 tag — extend to i64 + break :blk c.LLVMBuildZExt(self.builder, val, i64_ty, "any_enum"); + }, + .union_type => |uname| blk: { + // Union — store to alloca, pass pointer as i64 + const info = self.union_types.get(uname) orelse + return c.LLVMGetUndef(any_ty); + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, "any_union_tmp"); + _ = c.LLVMBuildStore(self.builder, val, alloca); + break :blk c.LLVMBuildPtrToInt(self.builder, alloca, i64_ty, "any_union"); + }, + .vector_type, .array_type => blk: { + // Vector/Array — store to alloca, pass pointer as i64 + const llvm_ty = self.typeToLLVM(ty); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, "any_vec_tmp"); + _ = c.LLVMBuildStore(self.builder, val, alloca); + break :blk c.LLVMBuildPtrToInt(self.builder, alloca, i64_ty, "any_vec"); + }, + .meta_type => blk: { + // Meta type is a pointer (global string) — convert via ptrtoint + break :blk c.LLVMBuildPtrToInt(self.builder, val, i64_ty, "any_type"); + }, + else => c.LLVMBuildSExt(self.builder, val, i64_ty, "any_val"), + }; + return c.LLVMBuildInsertValue(self.builder, with_tag, val_as_i64, 1, "any_value"); + } + + fn getStringStructType(self: *CodeGen) c.LLVMTypeRef { + if (self.string_struct_type) |t| return t; + var field_types = [_]c.LLVMTypeRef{ + c.LLVMPointerTypeInContext(self.context, 0), // ptr + c.LLVMInt32TypeInContext(self.context), // len + }; + self.string_struct_type = c.LLVMStructTypeInContext(self.context, &field_types, 2, 0); + return self.string_struct_type.?; + } + + /// Build a string slice {ptr, len} from a raw pointer and a constant length. + fn buildStringSlice(self: *CodeGen, ptr: c.LLVMValueRef, len: u32) c.LLVMValueRef { + const str_ty = self.getStringStructType(); + const undef = c.LLVMGetUndef(str_ty); + const with_ptr = c.LLVMBuildInsertValue(self.builder, undef, ptr, 0, "str_ptr"); + const len_val = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), len, 0); + return c.LLVMBuildInsertValue(self.builder, with_ptr, len_val, 1, "str_slice"); + } + + /// Build a string slice {ptr, len} from a raw pointer and a runtime length value. + fn buildStringSliceRT(self: *CodeGen, ptr: c.LLVMValueRef, len_val: c.LLVMValueRef) c.LLVMValueRef { + const str_ty = self.getStringStructType(); + const undef = c.LLVMGetUndef(str_ty); + const with_ptr = c.LLVMBuildInsertValue(self.builder, undef, ptr, 0, "str_ptr"); + return c.LLVMBuildInsertValue(self.builder, with_ptr, len_val, 1, "str_slice"); + } + + fn pushScope(self: *CodeGen) !void { + try self.scope_saves.append(self.allocator, std.ArrayList(ScopeEntry).empty); + try self.defer_stack.append(self.allocator, std.ArrayList(*Node).empty); + } + + fn popScope(self: *CodeGen) !void { + // 1. Execute deferred expressions in LIFO order + if (self.defer_stack.items.len > 0) { + const defers = self.defer_stack.items[self.defer_stack.items.len - 1]; + var i: usize = defers.items.len; + while (i > 0) { + i -= 1; + _ = try self.genExpr(defers.items[i]); + } + _ = self.defer_stack.pop(); + } + + // 2. Restore shadowed variables + if (self.scope_saves.items.len > 0) { + const saves = self.scope_saves.items[self.scope_saves.items.len - 1]; + // Restore in reverse order + var i: usize = saves.items.len; + while (i > 0) { + i -= 1; + const entry = saves.items[i]; + if (entry.prev) |prev| { + self.named_values.putAssumeCapacity(entry.name, prev); + } else { + _ = self.named_values.remove(entry.name); + } + } + _ = self.scope_saves.pop(); + } + } + + /// Emit all pending deferred expressions from all active scopes (LIFO order, + /// innermost scope first). Does NOT pop the stacks — used before `return` + /// so that popScope() can still clean up the data structures later. + fn emitAllDefers(self: *CodeGen) !void { + var i: usize = self.defer_stack.items.len; + while (i > 0) { + i -= 1; + const defers = self.defer_stack.items[i]; + var j: usize = defers.items.len; + while (j > 0) { + j -= 1; + _ = try self.genExpr(defers.items[j]); + } + } + } + + fn saveShadowed(self: *CodeGen, name: []const u8) !void { + if (self.scope_saves.items.len == 0) return; + const top = &self.scope_saves.items[self.scope_saves.items.len - 1]; + const prev = self.named_values.get(name); + try top.append(self.allocator, .{ .name = name, .prev = prev }); + } + + pub fn generate(self: *CodeGen, root: *Node) !void { + if (root.data != .root) return self.emitError("expected root node for code generation"); + // Store root decls for VM on-demand function compilation + self.root_decls = root.data.root.decls; + // Initialize built-in function declarations (printf, etc.) + self.builtins = Builtins.init(self.module, self.context); + + // Pass 1: Register all declarations (signatures only, no bodies) + for (root.data.root.decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + if (fd.body.data == .builtin_expr) { + try self.builtin_functions.put(fd.name, {}); + } else if (fd.type_params.len > 0) { + try self.generic_templates.put(fd.name, .{ .fd = fd }); + } else { + try self.registerFnDecl(fd); + } + }, + .enum_decl => |ed| { + try self.enum_types.put(ed.name, ed.variants); + _ = try self.getAnyTypeId(ed.name, .{ .enum_type = ed.name }); + }, + .struct_decl => |sd| try self.registerStructType(sd), + .union_decl => |ud| try self.registerUnionType(ud), + .const_decl => |cd| { + if (cd.value.data == .builtin_expr) { + // #builtin constant — skip codegen + } else if (cd.value.data == .lambda) { + try self.registerLambdaAsFunction(cd.name, cd.value.data.lambda); + } else if (cd.value.data == .type_expr) { + try self.type_aliases.put(cd.name, cd.value.data.type_expr.name); + } else if (cd.value.data == .call) { + // Check if this is a generic struct or type function instantiation + const callee_name = if (cd.value.data.call.callee.data == .identifier) + cd.value.data.call.callee.data.identifier.name + else + null; + if (callee_name) |cn| { + if (self.generic_struct_templates.get(cn)) |tmpl| { + // Generic struct instantiation: Vec3 :: Vec(3, f32); + const result_ty = try self.instantiateGenericStruct(cn, tmpl, cd.value.data.call.args); + if (result_ty.isStruct()) { + try self.type_aliases.put(cd.name, result_ty.struct_type); + } + } else if (self.generic_templates.get(cn)) |tmpl| { + // Type-returning function: Foo :: Complex(u32); + const result_ty = try self.instantiateTypeFunction(cd.name, cn, tmpl, cd.value.data.call.args); + if (result_ty.isStruct()) { + try self.type_aliases.put(cd.name, result_ty.struct_type); + } else if (result_ty.isUnion()) { + try self.type_aliases.put(cd.name, result_ty.union_type); + } + } else { + try self.registerTopLevelConstant(cd); + } + } else { + try self.registerTopLevelConstant(cd); + } + } else if (cd.value.data == .comptime_expr) { + // Use explicit type annotation if available + const ct_type_override: ?Type = if (cd.type_annotation) |te| Type.fromTypeExpr(te) else null; + try self.registerComptimeGlobal(cd.name, cd.value.data.comptime_expr.expr, ct_type_override); + } else { + // Top-level value constant (e.g., SPECIAL_VALUE :u8: 42;) + try self.registerTopLevelConstant(cd); + } + }, + .comptime_expr => |ct| { + try self.comptime_side_effects.append(self.allocator, ct.expr); + }, + .namespace_decl => |ns| { + try self.registerNamespace(ns); + }, + else => {}, + } + } + + // Pre-register all known types for Any type ID assignment + { + var it = self.struct_types.iterator(); + while (it.next()) |entry| { + _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .struct_type = entry.key_ptr.* }); + } + } + { + var it = self.enum_types.iterator(); + while (it.next()) |entry| { + _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .enum_type = entry.key_ptr.* }); + } + } + { + var it = self.union_types.iterator(); + while (it.next()) |entry| { + _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .union_type = entry.key_ptr.* }); + } + } + + // Pass 2: Generate all function bodies + for (root.data.root.decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + if (fd.body.data == .builtin_expr) { + // skip + } else if (fd.type_params.len == 0) { + try self.genFnBody(fd); + } + }, + .const_decl => |cd| { + if (cd.value.data == .lambda) { + try self.genLambdaBody(cd.name, cd.value.data.lambda); + } + }, + .namespace_decl => |ns| { + try self.genNamespaceBodies(ns); + }, + else => {}, + } + } + + // Execute comptime side effects via bytecode VM (e.g., #run main();) + for (self.comptime_side_effects.items) |expr| { + _ = try self.comptimeEval(expr, .void_type); + } + } + + /// Evaluate a comptime expression using the bytecode VM. + /// No LLVM state save/restore needed — the VM operates independently. + fn comptimeEval(self: *CodeGen, expr: *Node, expected_type: Type) !comptime_mod.Value { + _ = expected_type; // VM infers types from values; expected_type used by caller for LLVM conversion + + var compiler = comptime_mod.Compiler.init(self.allocator, if (self.sema_result) |sr| sr else null, self.root_decls, self); + const chunk = compiler.compile(expr) catch |err| { + return self.emitErrorFmt("comptime compilation failed: {s}", .{@errorName(err)}); + }; + + var vm = comptime_mod.VM.init(self.allocator, if (self.sema_result) |sr| sr else null, self.root_decls, self); + return vm.execute(&chunk) catch |err| { + return self.emitErrorFmt("comptime execution failed: {s}", .{@errorName(err)}); + }; + } + + /// Substitute comptime param identifiers in an AST expression with their literal nodes. + /// Used before comptimeEval in #insert to resolve comptime function params. + fn substituteComptimeNodes(self: *CodeGen, node: *Node) !*Node { + const cpn = self.comptime_param_nodes orelse return node; + + // Direct identifier match + if (node.data == .identifier) { + if (cpn.get(node.data.identifier.name)) |replacement| { + return replacement; + } + } + + // Recurse into call arguments + if (node.data == .call) { + var new_args = try self.allocator.alloc(*Node, node.data.call.args.len); + var changed = false; + for (node.data.call.args, 0..) |arg, i| { + new_args[i] = try self.substituteComptimeNodes(arg); + if (new_args[i] != arg) changed = true; + } + if (changed) { + const new_node = try self.allocator.create(Node); + new_node.* = .{ + .span = node.span, + .data = .{ .call = .{ + .callee = node.data.call.callee, + .args = new_args, + } }, + }; + return new_node; + } + } + + // Recurse into binary ops + if (node.data == .binary_op) { + const new_lhs = try self.substituteComptimeNodes(node.data.binary_op.lhs); + const new_rhs = try self.substituteComptimeNodes(node.data.binary_op.rhs); + if (new_lhs != node.data.binary_op.lhs or new_rhs != node.data.binary_op.rhs) { + const new_node = try self.allocator.create(Node); + new_node.* = .{ + .span = node.span, + .data = .{ .binary_op = .{ + .op = node.data.binary_op.op, + .lhs = new_lhs, + .rhs = new_rhs, + } }, + }; + return new_node; + } + } + + return node; + } + + /// Convert a comptime VM Value to an LLVM constant value. + fn comptimeValueToLLVM(self: *CodeGen, value: comptime_mod.Value, ty: Type) c.LLVMValueRef { + return switch (value) { + .int_val => |v| c.LLVMConstInt(self.typeToLLVM(ty), @bitCast(v), 0), + .float_val => |v| c.LLVMConstReal(c.LLVMDoubleTypeInContext(self.context), v), + .float32_val => |v| c.LLVMConstReal(c.LLVMFloatTypeInContext(self.context), @as(f64, v)), + .bool_val => |v| c.LLVMConstInt(c.LLVMInt1TypeInContext(self.context), if (v) 1 else 0, 0), + .string_val => |v| blk: { + const z = self.allocator.dupeZ(u8, v) catch unreachable; + const ptr = c.LLVMBuildGlobalStringPtr(self.builder, z.ptr, "comptime_str"); + break :blk self.buildStringSlice(ptr, @intCast(v.len)); + }, + .void_val => c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), + .struct_val, .array_val, .type_val, .function_val => unreachable, + }; + } + + /// Lazily resolve a comptime global by evaluating its expression via bytecode VM. + fn resolveComptimeGlobal(self: *CodeGen, ct: *ComptimeGlobal) !void { + const result = try self.comptimeEval(ct.expr, ct.ty); + const const_val = self.comptimeValueToLLVM(result, ct.ty); + c.LLVMSetInitializer(ct.global, const_val); + c.LLVMSetGlobalConstant(ct.global, 1); + ct.is_resolved = true; + } + + fn resolveType(self: *CodeGen, type_node: ?*Node) Type { + if (type_node) |tn| { + if (Type.fromTypeExpr(tn)) |t| return t; + // Array type: [N]T + if (tn.data == .array_type_expr) { + const ate = tn.data.array_type_expr; + const length: u32 = @intCast(ate.length.data.int_literal.value); + const elem_type = self.resolveType(ate.element_type); + const elem_name = elem_type.displayName(self.allocator) catch unreachable; + return .{ .array_type = .{ .element_name = elem_name, .length = length } }; + } + // Parameterized type: Vector(N, T) or generic struct instantiation + if (tn.data == .parameterized_type_expr) { + const pte = tn.data.parameterized_type_expr; + // Direct lookup (unqualified names from flat imports) + if (self.builtin_functions.contains(pte.name)) { + if (self.resolveBuiltinType(pte.name, pte.args)) |ty| return ty; + } + if (self.generic_struct_templates.get(pte.name)) |tmpl| { + return self.instantiateGenericStruct(pte.name, tmpl, pte.args) catch .void_type; + } + // Progressive namespace resolution for dotted names (e.g. "std.Vector") + if (std.mem.indexOfScalar(u8, pte.name, '.')) |dot| { + const ns = pte.name[0..dot]; + if (self.namespaces.contains(ns)) { + // Namespace verified — look up qualified name in registries + if (self.builtin_functions.contains(pte.name)) { + if (self.resolveBuiltinType(pte.name, pte.args)) |ty| return ty; + } + if (self.generic_struct_templates.get(pte.name)) |tmpl| { + return self.instantiateGenericStruct(pte.name, tmpl, pte.args) catch .void_type; + } + } + } + if (self.diagnostics) |diags| diags.addFmt(.err, tn.span, "unresolved type '{s}'", .{pte.name}); + return .void_type; + } + // Call expression as type: Vec(3, f32) → generic struct/type function instantiation + if (tn.data == .call) { + if (tn.data.call.callee.data == .identifier) { + const name = tn.data.call.callee.data.identifier.name; + if (self.generic_struct_templates.get(name)) |tmpl| { + return self.instantiateGenericStruct(name, tmpl, tn.data.call.args) catch .void_type; + } + if (self.generic_templates.get(name)) |tmpl| { + return self.instantiateTypeFunction(name, name, tmpl, tn.data.call.args) catch .void_type; + } + } + return .void_type; + } + // Check type parameter bindings (during generic instantiation) + if (tn.data == .type_expr or tn.data == .identifier) { + const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name; + // Try primitive type name first + if (Type.fromName(name)) |t| return t; + if (self.type_param_bindings) |bindings| { + if (bindings.get(name)) |t| return t; + } + // Check type aliases + if (self.type_aliases.get(name)) |target| { + if (Type.fromName(target)) |t| return t; + if (self.struct_types.contains(target)) return .{ .struct_type = target }; + if (self.union_types.contains(target)) return .{ .union_type = target }; + } + // Check enum types + if (self.enum_types.contains(name)) return .{ .enum_type = name }; + // Check struct types + if (self.struct_types.contains(name)) return .{ .struct_type = name }; + // Check union types + if (self.union_types.contains(name)) return .{ .union_type = name }; + } + return .void_type; + } + return .void_type; + } + + /// Resolve a value argument to an integer — handles int_literal and identifier referencing value_param_bindings. + fn resolveValueArg(self: *CodeGen, node: *Node) i64 { + if (node.data == .int_literal) return node.data.int_literal.value; + if (node.data == .identifier or node.data == .type_expr) { + const name = if (node.data == .identifier) node.data.identifier.name else node.data.type_expr.name; + if (self.value_param_bindings) |bindings| { + if (bindings.get(name)) |val| return val; + } + } + return 0; + } + + /// Instantiate a generic struct template with concrete arguments. + /// Returns the struct_type for the instantiated struct (possibly cached). + fn instantiateGenericStruct(self: *CodeGen, template_name: []const u8, tmpl: GenericStructTemplate, args: []const *Node) !Type { + const sd = tmpl.sd; + + // Build bindings from template params + args + var type_bindings = std.StringHashMap(Type).init(self.allocator); + var val_bindings = std.StringHashMap(i64).init(self.allocator); + + for (sd.type_params, 0..) |tp, i| { + if (i >= args.len) return self.emitErrorFmt("generic struct '{s}' expects {d} type arguments, got {d}", .{ template_name, sd.type_params.len, args.len }); + const constraint_name = if (tp.constraint.data == .type_expr) tp.constraint.data.type_expr.name else ""; + if (std.mem.eql(u8, constraint_name, "Type")) { + // Type parameter: resolve arg as type + const resolved = self.resolveType(args[i]); + try type_bindings.put(tp.name, resolved); + } else { + // Value parameter: resolve arg as integer + const val = self.resolveValueArg(args[i]); + try val_bindings.put(tp.name, val); + } + } + + // Build mangled name: Vec__3_f32 + var mangle_buf = std.ArrayList(u8).empty; + try mangle_buf.appendSlice(self.allocator, template_name); + try mangle_buf.appendSlice(self.allocator, "__"); + for (sd.type_params, 0..) |tp, i| { + if (i > 0) try mangle_buf.append(self.allocator, '_'); + const constraint_name = if (tp.constraint.data == .type_expr) tp.constraint.data.type_expr.name else ""; + if (std.mem.eql(u8, constraint_name, "Type")) { + if (type_bindings.get(tp.name)) |ty| { + const dn = ty.displayName(self.allocator) catch "?"; + try mangle_buf.appendSlice(self.allocator, dn); + } + } else { + if (val_bindings.get(tp.name)) |val| { + var tmp: [20]u8 = undefined; + const s = std.fmt.bufPrint(&tmp, "{d}", .{val}) catch "0"; + try mangle_buf.appendSlice(self.allocator, s); + } + } + } + const mangled_name = try mangle_buf.toOwnedSlice(self.allocator); + + // Check if already instantiated + if (self.struct_types.contains(mangled_name)) { + return .{ .struct_type = mangled_name }; + } + + // Instantiate: resolve field types with bindings active + const saved_type_bindings = self.type_param_bindings; + const saved_value_bindings = self.value_param_bindings; + self.type_param_bindings = type_bindings; + self.value_param_bindings = val_bindings; + defer { + self.type_param_bindings = saved_type_bindings; + self.value_param_bindings = saved_value_bindings; + } + + var field_sx_types = std.ArrayList(Type).empty; + var field_llvm_types = std.ArrayList(c.LLVMTypeRef).empty; + + for (sd.field_types) |ft| { + const sx_ty = self.resolveType(ft); + try field_sx_types.append(self.allocator, sx_ty); + try field_llvm_types.append(self.allocator, self.typeToLLVM(sx_ty)); + } + + const llvm_types_slice = try field_llvm_types.toOwnedSlice(self.allocator); + const name_z = try self.allocator.dupeZ(u8, mangled_name); + const struct_ty = c.LLVMStructCreateNamed(self.context, name_z.ptr); + c.LLVMStructSetBody(struct_ty, if (llvm_types_slice.len > 0) llvm_types_slice.ptr else null, @intCast(llvm_types_slice.len), 0); + + var resolved_defaults = try self.allocator.alloc(?*Node, sd.field_defaults.len); + for (sd.field_defaults, 0..) |fd, i| { + resolved_defaults[i] = fd; + } + + // Build pretty display name: Vec(3,f32) + var display_buf = std.ArrayList(u8).empty; + try display_buf.appendSlice(self.allocator, template_name); + try display_buf.append(self.allocator, '('); + for (sd.type_params, 0..) |tp, i| { + if (i > 0) try display_buf.appendSlice(self.allocator, ","); + const constraint_name = if (tp.constraint.data == .type_expr) tp.constraint.data.type_expr.name else ""; + if (std.mem.eql(u8, constraint_name, "Type")) { + if (type_bindings.get(tp.name)) |ty| { + const dn = ty.displayName(self.allocator) catch "?"; + try display_buf.appendSlice(self.allocator, dn); + } + } else { + if (val_bindings.get(tp.name)) |val| { + var tmp: [20]u8 = undefined; + const s = std.fmt.bufPrint(&tmp, "{d}", .{val}) catch "0"; + try display_buf.appendSlice(self.allocator, s); + } + } + } + try display_buf.append(self.allocator, ')'); + const display_name = try display_buf.toOwnedSlice(self.allocator); + + try self.struct_types.put(mangled_name, .{ + .field_names = sd.field_names, + .field_types = try field_sx_types.toOwnedSlice(self.allocator), + .field_defaults = resolved_defaults, + .llvm_type = struct_ty, + .display_name = display_name, + }); + _ = try self.getAnyTypeId(mangled_name, .{ .struct_type = mangled_name }); + + return .{ .struct_type = mangled_name }; + } + + /// Instantiate a type-returning function (e.g. Complex(u32)) by walking the body AST + /// to find `return struct { ... }` or `return union { ... }` and registering with bindings active. + fn instantiateTypeFunction(self: *CodeGen, alias_name: []const u8, template_name: []const u8, tmpl: GenericTemplate, args: []const *Node) !Type { + const fd = tmpl.fd; + + // Build type bindings from params + args + var type_bindings = std.StringHashMap(Type).init(self.allocator); + for (fd.type_params, 0..) |tp, i| { + if (i >= args.len) return self.emitErrorFmt("type function '{s}' expects {d} type arguments, got {d}", .{ template_name, fd.type_params.len, args.len }); + const resolved = self.resolveType(args[i]); + try type_bindings.put(tp.name, resolved); + } + + // Activate bindings + const saved_type_bindings = self.type_param_bindings; + self.type_param_bindings = type_bindings; + defer self.type_param_bindings = saved_type_bindings; + + // Build mangled name from template + args + var mangle_buf = std.ArrayList(u8).empty; + try mangle_buf.appendSlice(self.allocator, template_name); + try mangle_buf.appendSlice(self.allocator, "__"); + for (fd.type_params, 0..) |tp, i| { + if (i > 0) try mangle_buf.append(self.allocator, '_'); + if (type_bindings.get(tp.name)) |ty| { + const dn = ty.displayName(self.allocator) catch "?"; + try mangle_buf.appendSlice(self.allocator, dn); + } + } + const mangled_name = try mangle_buf.toOwnedSlice(self.allocator); + + // Try struct first + if (self.findStructInBody(fd.body)) |struct_decl| { + if (self.struct_types.contains(mangled_name)) { + return .{ .struct_type = mangled_name }; + } + return self.registerInstantiatedStruct(mangled_name, alias_name, struct_decl); + } + + // Try union + if (self.findUnionInBody(fd.body)) |union_decl| { + if (self.union_types.contains(mangled_name)) { + return .{ .union_type = mangled_name }; + } + return self.registerInstantiatedUnion(mangled_name, union_decl); + } + + return self.emitErrorFmt("type function '{s}' does not return a struct or union", .{template_name}); + } + + fn registerInstantiatedStruct(self: *CodeGen, mangled_name: []const u8, alias_name: []const u8, struct_decl: ast.StructDecl) !Type { + var field_sx_types = std.ArrayList(Type).empty; + var field_llvm_types = std.ArrayList(c.LLVMTypeRef).empty; + + for (struct_decl.field_types) |ft| { + const sx_ty = self.resolveType(ft); + try field_sx_types.append(self.allocator, sx_ty); + try field_llvm_types.append(self.allocator, self.typeToLLVM(sx_ty)); + } + + const llvm_types_slice = try field_llvm_types.toOwnedSlice(self.allocator); + const name_z = try self.allocator.dupeZ(u8, mangled_name); + const struct_ty = c.LLVMStructCreateNamed(self.context, name_z.ptr); + c.LLVMStructSetBody(struct_ty, if (llvm_types_slice.len > 0) llvm_types_slice.ptr else null, @intCast(llvm_types_slice.len), 0); + + var resolved_defaults = try self.allocator.alloc(?*Node, struct_decl.field_defaults.len); + for (struct_decl.field_defaults, 0..) |fd_def, i| { + resolved_defaults[i] = fd_def; + } + + var display_buf = std.ArrayList(u8).empty; + try display_buf.appendSlice(self.allocator, alias_name); + const display_name = try display_buf.toOwnedSlice(self.allocator); + + try self.struct_types.put(mangled_name, .{ + .field_names = struct_decl.field_names, + .field_types = try field_sx_types.toOwnedSlice(self.allocator), + .field_defaults = resolved_defaults, + .llvm_type = struct_ty, + .display_name = display_name, + }); + _ = try self.getAnyTypeId(mangled_name, .{ .struct_type = mangled_name }); + + return .{ .struct_type = mangled_name }; + } + + fn registerInstantiatedUnion(self: *CodeGen, mangled_name: []const u8, union_decl: ast.UnionDecl) !Type { + var variant_sx_types = std.ArrayList(Type).empty; + var max_payload_size: u64 = 0; + const data_layout = c.LLVMGetModuleDataLayout(self.module); + + for (union_decl.variant_types) |vt| { + if (vt) |type_node| { + const sx_ty = self.resolveType(type_node); + try variant_sx_types.append(self.allocator, sx_ty); + const llvm_ty = self.typeToLLVM(sx_ty); + const size = c.LLVMStoreSizeOfType(data_layout, llvm_ty); + if (size > max_payload_size) max_payload_size = size; + } else { + try variant_sx_types.append(self.allocator, .void_type); + } + } + + const name_z = try self.allocator.dupeZ(u8, mangled_name); + const union_ty = c.LLVMStructCreateNamed(self.context, name_z.ptr); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + const i8_ty = c.LLVMInt8TypeInContext(self.context); + const payload_array_ty = c.LLVMArrayType2(i8_ty, max_payload_size); + var fields = [2]c.LLVMTypeRef{ i32_ty, payload_array_ty }; + c.LLVMStructSetBody(union_ty, &fields, 2, 0); + + try self.union_types.put(mangled_name, .{ + .variant_names = union_decl.variant_names, + .variant_types = try variant_sx_types.toOwnedSlice(self.allocator), + .llvm_type = union_ty, + .max_payload_size = max_payload_size, + }); + _ = try self.getAnyTypeId(mangled_name, .{ .union_type = mangled_name }); + + return .{ .union_type = mangled_name }; + } + + /// Walk an AST body to find a struct declaration (from `return struct { ... }` or bare struct expr). + fn findStructInBody(_: *CodeGen, body: *Node) ?ast.StructDecl { + if (body.data == .struct_decl) return body.data.struct_decl; + if (body.data == .block) { + for (body.data.block.stmts) |stmt| { + if (stmt.data == .return_stmt) { + if (stmt.data.return_stmt.value) |val| { + if (val.data == .struct_decl) return val.data.struct_decl; + } + } + if (stmt.data == .struct_decl) return stmt.data.struct_decl; + } + } + return null; + } + + /// Walk an AST body to find a union declaration (from `return union { ... }` or bare union expr). + fn findUnionInBody(_: *CodeGen, body: *Node) ?ast.UnionDecl { + if (body.data == .union_decl) return body.data.union_decl; + if (body.data == .block) { + for (body.data.block.stmts) |stmt| { + if (stmt.data == .return_stmt) { + if (stmt.data.return_stmt.value) |val| { + if (val.data == .union_decl) return val.data.union_decl; + } + } + if (stmt.data == .union_decl) return stmt.data.union_decl; + } + } + return null; + } + + fn buildFnType(self: *CodeGen, params: []const ast.Param, return_type: ?*Node, name: []const u8) !c.LLVMTypeRef { + const ret_sx_type = self.resolveType(return_type); + const is_main = std.mem.eql(u8, name, "main"); + const ret_llvm_type = if (is_main) + c.LLVMInt32TypeInContext(self.context) + else + self.typeToLLVM(ret_sx_type); + + var param_llvm_types = std.ArrayList(c.LLVMTypeRef).empty; + for (params) |param| { + if (param.is_comptime) continue; + if (param.is_variadic) { + // Variadic param becomes a slice {ptr, i32} in the LLVM signature + try param_llvm_types.append(self.allocator, self.getStringStructType()); + } else { + const sx_ty = self.resolveType(param.type_expr); + if (sx_ty == .void_type) return self.emitErrorFmt("parameter '{s}' in function '{s}' has unresolved type", .{ param.name, name }); + try param_llvm_types.append(self.allocator, self.typeToLLVM(sx_ty)); + } + } + const params_slice = try param_llvm_types.toOwnedSlice(self.allocator); + + return c.LLVMFunctionType( + ret_llvm_type, + if (params_slice.len > 0) params_slice.ptr else null, + @intCast(params_slice.len), + 0, + ); + } + + fn registerFnDecl(self: *CodeGen, fd: ast.FnDecl) !void { + return self.registerFnDeclAs(fd, fd.name); + } + + fn registerFnDeclAs(self: *CodeGen, fd: ast.FnDecl, llvm_name: []const u8) !void { + const fn_type = try self.buildFnType(fd.params, fd.return_type, fd.name); + const name_z = try self.allocator.dupeZ(u8, llvm_name); + _ = c.LLVMAddFunction(self.module, name_z.ptr, fn_type); + // Track variadic function info for call site packing + for (fd.params, 0..) |param, i| { + if (param.is_variadic) { + const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32"; + try self.variadic_functions.put(llvm_name, .{ + .fixed_param_count = @intCast(i), + .element_type_name = elem_name, + }); + break; + } + } + } + + fn registerNamespace(self: *CodeGen, ns: ast.NamespaceDecl) !void { + try self.namespaces.put(ns.name, {}); + for (ns.decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + if (fd.body.data == .builtin_expr) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, fd.name }); + try self.builtin_functions.put(qualified, {}); + continue; + } + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, fd.name }); + if (fd.type_params.len > 0) { + try self.generic_templates.put(qualified, .{ .fd = fd }); + } else { + try self.registerFnDeclAs(fd, qualified); + } + }, + .enum_decl => |ed| { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, ed.name }); + try self.enum_types.put(qualified, ed.variants); + }, + .struct_decl => |sd| try self.registerStructType(sd), + .union_decl => |ud| try self.registerUnionType(ud), + .const_decl => |cd| { + if (cd.value.data == .builtin_expr) { + // #builtin constant in namespace — skip codegen + } else if (cd.value.data == .lambda) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, cd.name }); + try self.registerLambdaAsFunction(qualified, cd.value.data.lambda); + } else if (cd.value.data == .type_expr) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, cd.name }); + try self.type_aliases.put(qualified, cd.value.data.type_expr.name); + } + }, + else => {}, + } + } + } + + fn genNamespaceBodies(self: *CodeGen, ns: ast.NamespaceDecl) !void { + const saved_ns = self.current_namespace; + self.current_namespace = ns.name; + defer self.current_namespace = saved_ns; + + for (ns.decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + if (fd.body.data == .builtin_expr) { + // skip + } else if (fd.type_params.len == 0) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, fd.name }); + try self.genFnBodyAs(fd, qualified); + } + }, + .const_decl => |cd| { + if (cd.value.data == .lambda) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, cd.name }); + try self.genLambdaBody(qualified, cd.value.data.lambda); + } + }, + else => {}, + } + } + } + + fn inferComptimeReturnType(self: *CodeGen, expr: *Node) Type { + // xx: see through to inner expression + if (expr.data == .unary_op and expr.data.unary_op.op == .xx) { + return self.inferComptimeReturnType(expr.data.unary_op.operand); + } + // For function calls, look up the registered function's return type + if (expr.data == .call) { + if (self.resolveCalleeName(expr.data.call)) |callee_name| { + const callee_name_z = self.allocator.dupeZ(u8, callee_name) catch return Type.s(32); + const callee_fn = c.LLVMGetNamedFunction(self.module, callee_name_z.ptr) orelse return Type.s(32); + const fn_type = c.LLVMGlobalGetValueType(callee_fn); + const ret_llvm = c.LLVMGetReturnType(fn_type); + return self.llvmTypeToSxType(ret_llvm); + } + } + return self.inferType(expr); + } + + /// Map an LLVM type back to a sx Type + fn llvmTypeToSxType(self: *CodeGen, llvm_ty: c.LLVMTypeRef) Type { + if (llvm_ty == c.LLVMInt1TypeInContext(self.context)) return .boolean; + if (llvm_ty == c.LLVMInt8TypeInContext(self.context)) return Type.s(8); + if (llvm_ty == c.LLVMInt16TypeInContext(self.context)) return Type.s(16); + if (llvm_ty == c.LLVMInt32TypeInContext(self.context)) return Type.s(32); + if (llvm_ty == c.LLVMInt64TypeInContext(self.context)) return Type.s(64); + if (llvm_ty == c.LLVMFloatTypeInContext(self.context)) return .f32; + if (llvm_ty == c.LLVMDoubleTypeInContext(self.context)) return .f64; + if (llvm_ty == c.LLVMVoidTypeInContext(self.context)) return .void_type; + if (llvm_ty == self.getStringStructType()) return .string_type; + if (self.any_struct_type != null and llvm_ty == self.any_struct_type.?) return .any_type; + if (llvm_ty == c.LLVMPointerTypeInContext(self.context, 0)) return .string_type; // raw ptr fallback (meta_type) + // Handle arbitrary-width integer types (e.g. i3, i7, i12) + if (c.LLVMGetTypeKind(llvm_ty) == c.LLVMIntegerTypeKind) { + const width = c.LLVMGetIntTypeWidth(llvm_ty); + if (width > 0 and width <= 64) return Type.s(@intCast(width)); + } + // Check for named struct types + if (c.LLVMGetTypeKind(llvm_ty) == c.LLVMStructTypeKind) { + const name_ptr = c.LLVMGetStructName(llvm_ty); + if (name_ptr != null) { + const name = std.mem.span(name_ptr); + if (self.struct_types.contains(name)) return .{ .struct_type = name }; + if (self.union_types.contains(name)) return .{ .union_type = name }; + } + } + // Check for array types + if (c.LLVMGetTypeKind(llvm_ty) == c.LLVMArrayTypeKind) { + const elem_llvm = c.LLVMGetElementType(llvm_ty); + const length: u32 = @intCast(c.LLVMGetArrayLength2(llvm_ty)); + const elem_ty = self.llvmTypeToSxType(elem_llvm); + const elem_name = elem_ty.displayName(self.allocator) catch return Type.s(32); + return .{ .array_type = .{ .element_name = elem_name, .length = length } }; + } + // Check for vector types + if (c.LLVMGetTypeKind(llvm_ty) == c.LLVMVectorTypeKind) { + const elem_llvm = c.LLVMGetElementType(llvm_ty); + const length = c.LLVMGetVectorSize(llvm_ty); + const elem_ty = self.llvmTypeToSxType(elem_llvm); + const elem_name = elem_ty.displayName(self.allocator) catch return Type.s(32); + return .{ .vector_type = .{ .element_name = elem_name, .length = length } }; + } + return Type.s(32); + } + + fn registerComptimeGlobal(self: *CodeGen, name: []const u8, expr: *Node, type_override: ?Type) !void { + const ty = type_override orelse self.inferComptimeReturnType(expr); + if (ty == .void_type) return self.emitErrorFmt("cannot infer type for comptime global '{s}'", .{name}); + + const llvm_ty = self.typeToLLVM(ty); + const name_z = try self.allocator.dupeZ(u8, name); + const global = c.LLVMAddGlobal(self.module, llvm_ty, name_z.ptr); + c.LLVMSetInitializer(global, c.LLVMConstInt(llvm_ty, 0, 0)); + + try self.comptime_globals.put(name, .{ .global = global, .ty = ty, .expr = expr }); + } + + /// Evaluate a simple constant expression to an LLVM constant value. + /// Returns null for expressions that can't be constant-folded at registration time. + fn evalConstant(self: *CodeGen, node: *Node, target_ty: Type) ?c.LLVMValueRef { + const llvm_ty = self.typeToLLVM(target_ty); + switch (node.data) { + .int_literal => |lit| { + return c.LLVMConstInt(llvm_ty, @bitCast(@as(i64, lit.value)), 0); + }, + .float_literal => |lit| { + return c.LLVMConstReal(llvm_ty, lit.value); + }, + .bool_literal => |lit| { + return c.LLVMConstInt(llvm_ty, if (lit.value) 1 else 0, 0); + }, + else => return null, + } + } + + /// Register a top-level value constant (e.g., `SPECIAL_VALUE :u8: 42;`) as an LLVM global. + fn registerTopLevelConstant(self: *CodeGen, cd: ast.ConstDecl) !void { + const ta = cd.type_annotation orelse return; // need explicit type for top-level constants + const sx_ty = self.resolveType(ta); + if (sx_ty == .void_type) return; + + const const_val = self.evalConstant(cd.value, sx_ty) orelse return; + + const name_z = try self.allocator.dupeZ(u8, cd.name); + const global = c.LLVMAddGlobal(self.module, self.typeToLLVM(sx_ty), name_z.ptr); + c.LLVMSetInitializer(global, const_val); + c.LLVMSetGlobalConstant(global, 1); + + try self.comptime_globals.put(cd.name, .{ + .global = global, + .ty = sx_ty, + .expr = cd.value, + .is_resolved = true, + }); + } + + fn genFnBody(self: *CodeGen, fd: ast.FnDecl) !void { + return self.genFnBodyAs(fd, fd.name); + } + + fn genFnBodyAs(self: *CodeGen, fd: ast.FnDecl, llvm_name: []const u8) !void { + self.named_values.clearRetainingCapacity(); + + const ret_sx_type = self.resolveType(fd.return_type); + const is_main = std.mem.eql(u8, llvm_name, "main"); + const ret_llvm_type = if (is_main) + c.LLVMInt32TypeInContext(self.context) + else + self.typeToLLVM(ret_sx_type); + + self.current_return_type = if (is_main) Type.s(32) else ret_sx_type; + + const name_z = try self.allocator.dupeZ(u8, llvm_name); + const function = c.LLVMGetNamedFunction(self.module, name_z.ptr) orelse return self.emitErrorFmt("function '{s}' not found in LLVM module", .{llvm_name}); + self.current_function = function; + + const entry = c.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + c.LLVMPositionBuilderAtEnd(self.builder, entry); + + // Create allocas for parameters and store incoming values + for (fd.params, 0..) |param, i| { + const sx_ty = if (param.is_variadic) blk: { + const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32"; + break :blk Type{ .slice_type = .{ .element_name = elem_name } }; + } else self.resolveType(param.type_expr); + if (sx_ty == .void_type) return self.emitErrorFmt("parameter '{s}' has unresolved type", .{param.name}); + const llvm_ty = self.typeToLLVM(sx_ty); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, param_name_z.ptr); + const param_val = c.LLVMGetParam(function, @intCast(i)); + _ = c.LLVMBuildStore(self.builder, param_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = sx_ty }); + } + + // Push function-level scope so that function-body defers are tracked + try self.pushScope(); + + // Generate body + const body = fd.body; + if (body.data != .block) return self.emitError("function body must be a block"); + + var last_val: c.LLVMValueRef = null; + for (body.data.block.stmts) |stmt| { + last_val = try self.genStmt(stmt); + } + + // Return — skip if current block already has a terminator (from explicit return) + const current_bb = c.LLVMGetInsertBlock(self.builder); + if (c.LLVMGetBasicBlockTerminator(current_bb) == null) { + // Implicit return path: pop scope (executes defers) then return + try self.popScope(); + // Check if last_val is void-typed (e.g. call to void-returning function) + const effective_last_val: ?c.LLVMValueRef = if (last_val) |val| + (if (c.LLVMTypeOf(val) == c.LLVMVoidTypeInContext(self.context)) null else val) + else + null; + + if (ret_sx_type == .void_type and !is_main) { + _ = c.LLVMBuildRetVoid(self.builder); + } else if (effective_last_val) |val| { + if (ret_sx_type.isStruct()) { + // Struct implicit return: val is an alloca pointer, load the value + const sname = ret_sx_type.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, val, "retval"); + _ = c.LLVMBuildRet(self.builder, loaded); + } else { + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(val)); + const ret_val = self.convertValue(val, src_ty, self.current_return_type); + _ = c.LLVMBuildRet(self.builder, ret_val); + } + } else if (is_main) { + _ = c.LLVMBuildRet(self.builder, c.LLVMConstInt(ret_llvm_type, 0, 0)); + } else if (ret_sx_type != .void_type) { + _ = c.LLVMBuildUnreachable(self.builder); + } else { + _ = c.LLVMBuildRetVoid(self.builder); + } + } else { + // Explicit return already emitted defers; just clean up scope stacks + if (self.defer_stack.items.len > 0) _ = self.defer_stack.pop(); + if (self.scope_saves.items.len > 0) _ = self.scope_saves.pop(); + } + } + + fn registerLambdaAsFunction(self: *CodeGen, name: []const u8, lambda: ast.Lambda) !void { + const ret_sx_type = self.inferType(lambda.body); + const ret_llvm_type = self.typeToLLVM(ret_sx_type); + + var param_llvm_types = std.ArrayList(c.LLVMTypeRef).empty; + for (lambda.params) |param| { + const sx_ty = self.resolveType(param.type_expr); + try param_llvm_types.append(self.allocator, self.typeToLLVM(sx_ty)); + } + const params_slice = try param_llvm_types.toOwnedSlice(self.allocator); + + const fn_type = c.LLVMFunctionType( + ret_llvm_type, + if (params_slice.len > 0) params_slice.ptr else null, + @intCast(params_slice.len), + 0, + ); + + const name_z = try self.allocator.dupeZ(u8, name); + _ = c.LLVMAddFunction(self.module, name_z.ptr, fn_type); + } + + fn genLambdaBody(self: *CodeGen, name: []const u8, lambda: ast.Lambda) !void { + self.named_values.clearRetainingCapacity(); + + const ret_sx_type = self.inferType(lambda.body); + self.current_return_type = ret_sx_type; + + const name_z = try self.allocator.dupeZ(u8, name); + const function = c.LLVMGetNamedFunction(self.module, name_z.ptr) orelse return self.emitErrorFmt("lambda '{s}' not found in LLVM module", .{name}); + self.current_function = function; + + const entry = c.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + c.LLVMPositionBuilderAtEnd(self.builder, entry); + + for (lambda.params, 0..) |param, i| { + const sx_ty = self.resolveType(param.type_expr); + const llvm_ty = self.typeToLLVM(sx_ty); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, param_name_z.ptr); + const param_val = c.LLVMGetParam(function, @intCast(i)); + _ = c.LLVMBuildStore(self.builder, param_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = sx_ty }); + } + + const ret_val = try self.genExpr(lambda.body); + if (ret_val) |val| { + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(val)); + const converted = self.convertValue(val, src_ty, ret_sx_type); + _ = c.LLVMBuildRet(self.builder, converted); + } else { + _ = c.LLVMBuildRetVoid(self.builder); + } + } + + fn genStmt(self: *CodeGen, node: *Node) !c.LLVMValueRef { + self.current_span = node.span; + switch (node.data) { + .var_decl => |vd| { + return self.genVarDecl(vd); + }, + .const_decl => |cd| { + return self.genConstDecl(cd); + }, + .fn_decl => |fd| { + // Local declaration inside a function body + if (fd.type_params.len > 0) { + // Generic template / type function: register for lazy instantiation + try self.generic_templates.put(fd.name, .{ .fd = fd }); + } else { + // Non-generic local function + // Save outer function state + const saved_fn = self.current_function; + const saved_bb = c.LLVMGetInsertBlock(self.builder); + const saved_ret = self.current_return_type; + const saved_named = self.named_values; + self.named_values = std.StringHashMap(NamedValue).init(self.allocator); + + // Register with correct types (null return_type = void) + try self.registerFnDeclAs(fd, fd.name); + + // Generate body inline + const ret_sx_type = self.resolveType(fd.return_type); + self.current_return_type = ret_sx_type; + const name_z = try self.allocator.dupeZ(u8, fd.name); + const function = c.LLVMGetNamedFunction(self.module, name_z.ptr) orelse + return self.emitErrorFmt("local function '{s}' not found", .{fd.name}); + self.current_function = function; + const entry = c.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + c.LLVMPositionBuilderAtEnd(self.builder, entry); + + for (fd.params, 0..) |param, i| { + const sx_ty = self.resolveType(param.type_expr); + const llvm_ty = self.typeToLLVM(sx_ty); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, param_name_z.ptr); + const param_val = c.LLVMGetParam(function, @intCast(i)); + _ = c.LLVMBuildStore(self.builder, param_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = sx_ty }); + } + + var last_val: c.LLVMValueRef = null; + if (fd.body.data == .block) { + for (fd.body.data.block.stmts) |stmt| { + last_val = try self.genStmt(stmt); + } + } else { + last_val = try self.genExpr(fd.body); + } + + const current_bb2 = c.LLVMGetInsertBlock(self.builder); + if (c.LLVMGetBasicBlockTerminator(current_bb2) == null) { + if (ret_sx_type == .void_type) { + _ = c.LLVMBuildRetVoid(self.builder); + } else if (last_val) |val| { + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(val)); + const converted = self.convertValue(val, src_ty, ret_sx_type); + _ = c.LLVMBuildRet(self.builder, converted); + } else { + _ = c.LLVMBuildRetVoid(self.builder); + } + } + + // Restore outer function state + self.named_values = saved_named; + self.current_return_type = saved_ret; + self.current_function = saved_fn; + c.LLVMPositionBuilderAtEnd(self.builder, saved_bb); + } + return null; + }, + .struct_decl => |sd| { + try self.registerStructType(sd); + return null; + }, + .union_decl => |ud| { + try self.registerUnionType(ud); + return null; + }, + .assignment => |asgn| { + return self.genAssignment(asgn); + }, + .return_stmt => |rs| { + // Evaluate return value first, then emit all defers, then return + if (rs.value) |val_node| { + const raw_val = try self.genExpr(val_node); + if (self.current_return_type.isStruct()) { + // Struct return: raw_val is an alloca pointer, load the value + const sname = self.current_return_type.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, raw_val, "retval"); + try self.emitAllDefers(); + _ = c.LLVMBuildRet(self.builder, loaded); + } else { + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(raw_val)); + const val = self.convertValue(raw_val, src_ty, self.current_return_type); + try self.emitAllDefers(); + _ = c.LLVMBuildRet(self.builder, val); + } + } else { + try self.emitAllDefers(); + _ = c.LLVMBuildRetVoid(self.builder); + } + // Create a dead basic block for any subsequent instructions + const dead_bb = c.LLVMAppendBasicBlockInContext(self.context, self.current_function, "after_ret"); + c.LLVMPositionBuilderAtEnd(self.builder, dead_bb); + return null; + }, + .defer_stmt => |ds| { + // Don't generate now — push onto current defer list for later execution + if (self.defer_stack.items.len > 0) { + const top = &self.defer_stack.items[self.defer_stack.items.len - 1]; + try top.append(self.allocator, ds.expr); + } + return null; + }, + .insert_expr => |ins| { + // Substitute comptime param nodes before evaluation (e.g., replace $fmt identifier with literal) + const expr = if (self.comptime_param_nodes != null) + try self.substituteComptimeNodes(ins.expr) + else + ins.expr; + // Evaluate the inner expression via bytecode VM to get a string, parse it, generate inline + const result = try self.comptimeEval(expr, .string_type); + const code_z = try self.allocator.dupeZ(u8, result.string_val); + var parser = Parser.init(self.allocator, code_z); + var last_val: c.LLVMValueRef = null; + while (parser.current.tag != .eof) { + const stmt = try parser.parseStmt(); + last_val = try self.genStmt(stmt); + } + return last_val; + }, + else => { + return self.genExpr(node); + }, + } + } + + fn genVarDecl(self: *CodeGen, vd: ast.VarDecl) !c.LLVMValueRef { + // Meta type variable: x := f64 or x := Vec4 → runtime string holding the type name + if (vd.value) |val| { + const meta_name = self.asTypeName(val); + if (meta_name) |raw_name| { + const type_name = self.resolveDisplayName(raw_name); + const name_z = try self.allocator.dupeZ(u8, vd.name); + const ptr_ty = c.LLVMPointerTypeInContext(self.context, 0); + const alloca = c.LLVMBuildAlloca(self.builder, ptr_ty, name_z.ptr); + const str_val = c.LLVMBuildGlobalStringPtr(self.builder, type_name.ptr, "type_name"); + _ = c.LLVMBuildStore(self.builder, str_val, alloca); + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = alloca, .ty = .{ .meta_type = .{ .name = raw_name } } }); + return null; + } + } + + var sx_ty: Type = Type.s(32); + + if (vd.type_annotation) |ta| { + sx_ty = self.resolveType(ta); + } else if (vd.value) |val| { + // Infer type from value + if (val.data == .struct_literal) { + const sl = val.data.struct_literal; + if (sl.struct_name) |name| { + sx_ty = .{ .struct_type = name }; + } else if (sl.type_expr) |te| { + sx_ty = self.resolveType(te); + } else { + return self.emitError("cannot infer struct type from untyped struct literal"); + } + } else { + sx_ty = self.inferType(val); + } + } else { + return self.emitErrorFmt("variable '{s}' has no type annotation and no initializer", .{vd.name}); + } + + // Struct-typed variable + if (sx_ty.isStruct()) { + // Resolve type aliases (e.g. Vec3 -> Vec__3_f32) + const sname = self.type_aliases.get(sx_ty.struct_type) orelse sx_ty.struct_type; + sx_ty = .{ .struct_type = sname }; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const name_z = try self.allocator.dupeZ(u8, vd.name); + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, name_z.ptr); + + if (vd.value == null) { + // Default-init: per-field defaults or zero + try self.genStructDefaultInit(alloca, info); + } else if (vd.value.?.data == .undef_literal) { + // Undef-init: entire struct is undefined + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(info.llvm_type), alloca); + } else if (vd.value.?.data == .struct_literal) { + // Struct literal codegen returns an alloca — use it directly instead + const lit_alloca = try self.genStructLiteral(vd.value.?.data.struct_literal, sname); + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = lit_alloca, .ty = sx_ty }); + return null; + } else if (vd.value.?.data == .call) { + // Function call returning a struct — result is a value, store to alloca + const val = try self.genExpr(vd.value.?); + _ = c.LLVMBuildStore(self.builder, val, alloca); + } else { + // General expression (xx cast, identifier, etc.) — evaluate as target type + const val = try self.genExprAsType(vd.value.?, sx_ty); + _ = c.LLVMBuildStore(self.builder, val, alloca); + } + + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = alloca, .ty = sx_ty }); + return null; + } + + // Union-typed variable + if (sx_ty.isUnion()) { + const uname = self.type_aliases.get(sx_ty.union_type) orelse sx_ty.union_type; + sx_ty = .{ .union_type = uname }; + const info = self.union_types.get(uname) orelse return self.emitErrorFmt("unknown union type '{s}'", .{uname}); + const name_z = try self.allocator.dupeZ(u8, vd.name); + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, name_z.ptr); + + if (vd.value == null) { + // Zero-init: tag=0, payload zeroed + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(info.llvm_type), alloca); + } else if (vd.value.?.data == .undef_literal) { + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(info.llvm_type), alloca); + } else if (vd.value.?.data == .union_literal) { + const lit_alloca = try self.genUnionLiteral(vd.value.?.data.union_literal, uname); + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = lit_alloca, .ty = sx_ty }); + return null; + } else if (vd.value.?.data == .enum_literal) { + // Void variant: .none assigned to union variable + const ul = ast.UnionLiteral{ + .union_name = uname, + .variant_name = vd.value.?.data.enum_literal.name, + .payload = null, + }; + const lit_alloca = try self.genUnionLiteral(ul, uname); + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = lit_alloca, .ty = sx_ty }); + return null; + } else if (vd.value.?.data == .call) { + // Call returning a union (e.g., Shape.circle(3.14)) — genExpr returns alloca + const result_alloca = try self.genExpr(vd.value.?); + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, result_alloca, "union_load"); + _ = c.LLVMBuildStore(self.builder, loaded, alloca); + } else { + // Other expression — try genExprAsType + const result_alloca = try self.genExprAsType(vd.value.?, sx_ty); + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, result_alloca, "union_load"); + _ = c.LLVMBuildStore(self.builder, loaded, alloca); + } + + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = alloca, .ty = sx_ty }); + return null; + } + + // Array-typed variable + if (sx_ty.isArray()) { + const arr_info = sx_ty.array_type; + const llvm_arr_ty = self.typeToLLVM(sx_ty); + const arr_name_z = try self.allocator.dupeZ(u8, vd.name); + const arr_alloca = c.LLVMBuildAlloca(self.builder, llvm_arr_ty, arr_name_z.ptr); + + if (vd.value == null) { + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(llvm_arr_ty), arr_alloca); + } else if (vd.value.?.data == .undef_literal) { + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(llvm_arr_ty), arr_alloca); + } else if (vd.value.?.data == .array_literal) { + const al = vd.value.?.data.array_literal; + const elem_sx_ty = Type.fromName(arr_info.element_name) orelse return self.emitErrorFmt("unknown array element type '{s}'", .{arr_info.element_name}); + const elem_llvm_ty = self.typeToLLVM(elem_sx_ty); + const len = @min(al.elements.len, arr_info.length); + for (0..len) |i| { + const val = try self.genExprAsType(al.elements[i], elem_sx_ty); + var indices = [_]c.LLVMValueRef{ + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(i), 0), + }; + const gep = c.LLVMBuildGEP2(self.builder, llvm_arr_ty, arr_alloca, &indices, 2, "arr_elem"); + _ = c.LLVMBuildStore(self.builder, val, gep); + } + // Zero-init remaining elements + for (len..arr_info.length) |i| { + var indices = [_]c.LLVMValueRef{ + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(i), 0), + }; + const gep = c.LLVMBuildGEP2(self.builder, llvm_arr_ty, arr_alloca, &indices, 2, "arr_elem"); + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(elem_llvm_ty), gep); + } + } else { + return self.emitErrorFmt("unsupported initializer for array variable '{s}'", .{vd.name}); + } + + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = arr_alloca, .ty = sx_ty }); + return null; + } + + // Vector-typed variable + if (sx_ty.isVector()) { + const llvm_vec_ty = self.typeToLLVM(sx_ty); + const vec_name_z = try self.allocator.dupeZ(u8, vd.name); + const vec_alloca = c.LLVMBuildAlloca(self.builder, llvm_vec_ty, vec_name_z.ptr); + + if (vd.value == null) { + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(llvm_vec_ty), vec_alloca); + } else if (vd.value.?.data == .undef_literal) { + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(llvm_vec_ty), vec_alloca); + } else if (vd.value.?.data == .array_literal) { + const vec_val = try self.genVectorLiteral(vd.value.?.data.array_literal, sx_ty); + _ = c.LLVMBuildStore(self.builder, vec_val, vec_alloca); + } else { + // Expression (e.g. function call) returning a vector + const val = try self.genExpr(vd.value.?); + _ = c.LLVMBuildStore(self.builder, val, vec_alloca); + } + + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = vec_alloca, .ty = sx_ty }); + return null; + } + + // Guard: void type cannot be allocated (would crash LLVM) + if (sx_ty == .void_type) { + return self.emitErrorFmt("cannot declare variable '{s}' with void type", .{vd.name}); + } + + // Non-struct types + const llvm_ty = self.typeToLLVM(sx_ty); + const name_z = try self.allocator.dupeZ(u8, vd.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, name_z.ptr); + + if (vd.value == null) { + // Default-init: zero + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(llvm_ty), alloca); + } else if (vd.value.?.data == .undef_literal) { + // Undef-init + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(llvm_ty), alloca); + } else { + const val = vd.value.?; + const enum_name: ?[]const u8 = if (sx_ty.isEnum()) sx_ty.enum_type else null; + const init_val = if (val.data == .enum_literal and enum_name != null) + self.genEnumLiteral(val.data.enum_literal.name, enum_name.?) + else if (vd.type_annotation != null) + try self.genExprAsType(val, sx_ty) + else + try self.genExpr(val); + _ = c.LLVMBuildStore(self.builder, init_val, alloca); + } + + try self.saveShadowed(vd.name); + try self.named_values.put(vd.name, .{ .ptr = alloca, .ty = sx_ty }); + return null; + } + + fn genStructDefaultInit(self: *CodeGen, alloca: c.LLVMValueRef, info: StructInfo) !void { + for (info.field_names, 0..) |_, fi| { + const ft = info.field_types[fi]; + const ft_llvm = self.typeToLLVM(ft); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, @intCast(fi), "dinit"); + + if (info.field_defaults.len > fi and info.field_defaults[fi] != null) { + const default_node = info.field_defaults[fi].?; + if (default_node.data == .undef_literal) { + // Field default is --- → store undef + _ = c.LLVMBuildStore(self.builder, c.LLVMGetUndef(ft_llvm), gep); + } else { + // Field has expression default → evaluate and convert + const val = try self.genExprAsType(default_node, ft); + _ = c.LLVMBuildStore(self.builder, val, gep); + } + } else { + // No default → zero + _ = c.LLVMBuildStore(self.builder, c.LLVMConstNull(ft_llvm), gep); + } + } + } + + fn genConstDecl(self: *CodeGen, cd: ast.ConstDecl) !c.LLVMValueRef { + // Compile-time evaluation: register as comptime global for JIT + if (cd.value.data == .comptime_expr) { + const ct_type_override: ?Type = if (cd.type_annotation) |te| Type.fromTypeExpr(te) else null; + try self.registerComptimeGlobal(cd.name, cd.value.data.comptime_expr.expr, ct_type_override); + return null; + } + + var sx_ty: Type = Type.s(32); + + if (cd.type_annotation) |ta| { + sx_ty = self.resolveType(ta); + } else { + sx_ty = self.inferType(cd.value); + } + + // Union-typed constant: delegate to genExprAsType which handles enum_literal + union_literal + if (sx_ty.isUnion()) { + const val = try self.genExprAsType(cd.value, sx_ty); + try self.saveShadowed(cd.name); + try self.named_values.put(cd.name, .{ .ptr = val, .ty = sx_ty }); + return null; + } + + const enum_name: ?[]const u8 = if (sx_ty.isEnum()) sx_ty.enum_type else null; + const init_val = if (cd.value.data == .enum_literal and enum_name != null) + self.genEnumLiteral(cd.value.data.enum_literal.name, enum_name.?) + else + try self.genExpr(cd.value); + + const llvm_ty = self.typeToLLVM(sx_ty); + const name_z = try self.allocator.dupeZ(u8, cd.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, name_z.ptr); + _ = c.LLVMBuildStore(self.builder, init_val, alloca); + try self.saveShadowed(cd.name); + try self.named_values.put(cd.name, .{ .ptr = alloca, .ty = sx_ty }); + return null; + } + + fn genAssignment(self: *CodeGen, asgn: ast.Assignment) !c.LLVMValueRef { + // Field assignment: expr.field = value; + if (asgn.target.data == .field_access) { + return self.genFieldAssignment(asgn); + } + + // Index assignment: expr[i] = value; + if (asgn.target.data == .index_expr) { + return self.genIndexAssignment(asgn); + } + + // Target must be an identifier + if (asgn.target.data != .identifier) return self.emitError("assignment target must be a variable"); + const name = asgn.target.data.identifier.name; + const entry = self.named_values.get(name) orelse { + return self.emitErrorFmt("undefined variable '{s}'", .{name}); + }; + + // Meta type reassignment: x = Vec4 or x = f64 + if (entry.ty == .meta_type and asgn.op == .assign) { + if (self.asTypeName(asgn.value)) |raw_name| { + const type_name = self.resolveDisplayName(raw_name); + const str_val = c.LLVMBuildGlobalStringPtr(self.builder, type_name.ptr, "type_name"); + _ = c.LLVMBuildStore(self.builder, str_val, entry.ptr); + if (self.named_values.getPtr(name)) |entry_ptr| { + entry_ptr.ty = .{ .meta_type = .{ .name = raw_name } }; + } + return null; + } + } + + // Union reassignment: s = .circle(3.14) or s = .none + if (entry.ty.isUnion() and asgn.op == .assign) { + const new_alloca = try self.genExprAsType(asgn.value, entry.ty); + // Copy from new alloca to existing alloca + const info = self.union_types.get(entry.ty.union_type).?; + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, new_alloca, "union_load"); + _ = c.LLVMBuildStore(self.builder, loaded, entry.ptr); + return null; + } + + const new_val = try self.genExpr(asgn.value); + const llvm_ty = self.typeToLLVM(entry.ty); + + const store_val = switch (asgn.op) { + .assign => new_val, + .add_assign => blk: { + const cur = c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "cur"); + break :blk if (entry.ty.isFloat()) + c.LLVMBuildFAdd(self.builder, cur, new_val, "addtmp") + else + c.LLVMBuildAdd(self.builder, cur, new_val, "addtmp"); + }, + .sub_assign => blk: { + const cur = c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "cur"); + break :blk if (entry.ty.isFloat()) + c.LLVMBuildFSub(self.builder, cur, new_val, "subtmp") + else + c.LLVMBuildSub(self.builder, cur, new_val, "subtmp"); + }, + .mul_assign => blk: { + const cur = c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "cur"); + break :blk if (entry.ty.isFloat()) + c.LLVMBuildFMul(self.builder, cur, new_val, "multmp") + else + c.LLVMBuildMul(self.builder, cur, new_val, "multmp"); + }, + .div_assign => blk: { + const cur = c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "cur"); + break :blk if (entry.ty.isFloat()) + c.LLVMBuildFDiv(self.builder, cur, new_val, "divtmp") + else if (entry.ty.isUnsigned()) + c.LLVMBuildUDiv(self.builder, cur, new_val, "divtmp") + else + c.LLVMBuildSDiv(self.builder, cur, new_val, "divtmp"); + }, + .mod_assign => blk: { + const cur = c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "cur"); + break :blk if (entry.ty.isFloat()) + c.LLVMBuildFRem(self.builder, cur, new_val, "modtmp") + else if (entry.ty.isUnsigned()) + c.LLVMBuildURem(self.builder, cur, new_val, "modtmp") + else + c.LLVMBuildSRem(self.builder, cur, new_val, "modtmp"); + }, + }; + + _ = c.LLVMBuildStore(self.builder, store_val, entry.ptr); + return null; + } + + fn genFieldAssignment(self: *CodeGen, asgn: ast.Assignment) !c.LLVMValueRef { + const fa = asgn.target.data.field_access; + // Object must be an identifier for now + if (fa.object.data != .identifier) return self.emitError("field assignment target must be a variable"); + const obj_name = fa.object.data.identifier.name; + const entry = self.named_values.get(obj_name) orelse return self.emitErrorFmt("undefined variable '{s}'", .{obj_name}); + if (!entry.ty.isStruct()) return self.emitErrorFmt("field access on non-struct variable '{s}'", .{obj_name}); + + const sname = entry.ty.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const fi = self.findFieldIndex(info, fa.field) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fa.field, sname }); + const field_ty = info.field_types[fi]; + + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, entry.ptr, @intCast(fi), "fassign"); + + // Generate RHS and convert to field type + const rhs = try self.genExprAsType(asgn.value, field_ty); + + if (asgn.op == .assign) { + _ = c.LLVMBuildStore(self.builder, rhs, gep); + } else { + // Compound assignment on struct field + const field_llvm_ty = self.typeToLLVM(field_ty); + const cur = c.LLVMBuildLoad2(self.builder, field_llvm_ty, gep, "fcur"); + const store_val = switch (asgn.op) { + .add_assign => if (field_ty.isFloat()) + c.LLVMBuildFAdd(self.builder, cur, rhs, "faddtmp") + else + c.LLVMBuildAdd(self.builder, cur, rhs, "faddtmp"), + .sub_assign => if (field_ty.isFloat()) + c.LLVMBuildFSub(self.builder, cur, rhs, "fsubtmp") + else + c.LLVMBuildSub(self.builder, cur, rhs, "fsubtmp"), + .mul_assign => if (field_ty.isFloat()) + c.LLVMBuildFMul(self.builder, cur, rhs, "fmultmp") + else + c.LLVMBuildMul(self.builder, cur, rhs, "fmultmp"), + .div_assign => if (field_ty.isFloat()) + c.LLVMBuildFDiv(self.builder, cur, rhs, "fdivtmp") + else if (field_ty.isUnsigned()) + c.LLVMBuildUDiv(self.builder, cur, rhs, "fdivtmp") + else + c.LLVMBuildSDiv(self.builder, cur, rhs, "fdivtmp"), + .mod_assign => if (field_ty.isFloat()) + c.LLVMBuildFRem(self.builder, cur, rhs, "fmodtmp") + else if (field_ty.isUnsigned()) + c.LLVMBuildURem(self.builder, cur, rhs, "fmodtmp") + else + c.LLVMBuildSRem(self.builder, cur, rhs, "fmodtmp"), + .assign => unreachable, + }; + _ = c.LLVMBuildStore(self.builder, store_val, gep); + } + return null; + } + + fn genIndexAssignment(self: *CodeGen, asgn: ast.Assignment) !c.LLVMValueRef { + const ie = asgn.target.data.index_expr; + const obj_ty = self.inferType(ie.object); + if (obj_ty == .string_type) { + // String index assignment: s[i] = c + const str_val = try self.genExpr(ie.object); + const ptr = c.LLVMBuildExtractValue(self.builder, str_val, 0, "str_ptr"); + const idx = try self.genExpr(ie.index); + const val = try self.genExpr(asgn.value); + const i8_type = c.LLVMInt8TypeInContext(self.context); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep_ptr = c.LLVMBuildGEP2(self.builder, i8_type, ptr, &gep_indices, 1, "stridx"); + const byte_val = c.LLVMBuildTrunc(self.builder, val, i8_type, "trunc_byte"); + _ = c.LLVMBuildStore(self.builder, byte_val, gep_ptr); + return null; + } + if (obj_ty.isArray()) { + if (ie.object.data == .identifier) { + if (self.named_values.get(ie.object.data.identifier.name)) |entry| { + const arr_info = obj_ty.array_type; + const elem_ty = Type.fromName(arr_info.element_name) orelse return self.emitError("unknown array element type"); + const idx = try self.genExpr(ie.index); + const val = try self.genExpr(asgn.value); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var indices = [_]c.LLVMValueRef{ zero, idx }; + const gep_ptr = c.LLVMBuildGEP2(self.builder, self.typeToLLVM(obj_ty), entry.ptr, &indices, 2, "arridx"); + _ = c.LLVMBuildStore(self.builder, val, gep_ptr); + _ = elem_ty; + return null; + } + } + } + return self.emitError("index assignment requires a string or array target"); + } + + fn unescapeString(allocator: std.mem.Allocator, raw: []const u8) ![]u8 { + var result = try allocator.alloc(u8, raw.len); + var i: usize = 0; + var j: usize = 0; + while (i < raw.len) { + if (raw[i] == '\\' and i + 1 < raw.len) { + i += 1; + switch (raw[i]) { + 'n' => { + result[j] = '\n'; + }, + 't' => { + result[j] = '\t'; + }, + 'r' => { + result[j] = '\r'; + }, + '\\' => { + result[j] = '\\'; + }, + '"' => { + result[j] = '"'; + }, + '0' => { + result[j] = 0; + }, + else => { + result[j] = raw[i]; + }, + } + j += 1; + i += 1; + } else { + result[j] = raw[i]; + j += 1; + i += 1; + } + } + return result[0..j]; + } + + fn genExpr(self: *CodeGen, node: *Node) anyerror!c.LLVMValueRef { + self.current_span = node.span; + switch (node.data) { + .int_literal => |lit| { + const i32_type = c.LLVMInt32TypeInContext(self.context); + return c.LLVMConstInt(i32_type, @bitCast(@as(i64, lit.value)), 0); + }, + .float_literal => |lit| { + const f32_type = c.LLVMFloatTypeInContext(self.context); + return c.LLVMConstReal(f32_type, lit.value); + }, + .bool_literal => |lit| { + const i1_type = c.LLVMInt1TypeInContext(self.context); + return c.LLVMConstInt(i1_type, if (lit.value) 1 else 0, 0); + }, + .string_literal => |lit| { + const unescaped = try unescapeString(self.allocator, lit.raw); + const str_z = try self.allocator.dupeZ(u8, unescaped); + const ptr = c.LLVMBuildGlobalStringPtr(self.builder, str_z.ptr, "str"); + return self.buildStringSlice(ptr, @intCast(unescaped.len)); + }, + .identifier => |ident| { + if (self.named_values.get(ident.name)) |entry| { + const llvm_ty = self.typeToLLVM(entry.ty); + return c.LLVMBuildLoad2(self.builder, llvm_ty, entry.ptr, "loadtmp"); + } + // Fall back to comptime globals (lazy resolution) + if (self.comptime_globals.getPtr(ident.name)) |ct| { + if (!ct.is_resolved) { + try self.resolveComptimeGlobal(ct); + } + const llvm_ty = self.typeToLLVM(ct.ty); + return c.LLVMBuildLoad2(self.builder, llvm_ty, ct.global, "ct_load"); + } + return self.emitErrorFmt("undefined identifier '{s}'", .{ident.name}); + }, + .binary_op => |binop| { + if (binop.op == .and_op) return self.genAndOp(binop); + if (binop.op == .or_op) return self.genOrOp(binop); + const lhs_ty = self.inferType(binop.lhs); + const rhs_ty = self.inferType(binop.rhs); + const result_type = Type.widen(lhs_ty, rhs_ty); + const lhs = try self.genExprAsType(binop.lhs, result_type); + const rhs = try self.genExprAsType(binop.rhs, result_type); + return self.genBinaryOp(binop.op, lhs, rhs, result_type); + }, + .chained_comparison => |chain| { + return self.genChainedComparison(chain); + }, + .unary_op => |unop| { + if (unop.op == .xx) { + // xx requires a target type context (assignment, declaration, argument, return) + return self.emitError("'xx' cast requires a target type context"); + } + const operand = try self.genExpr(unop.operand); + return switch (unop.op) { + .negate => blk: { + const operand_ty = self.inferType(unop.operand); + if (operand_ty.isVector()) { + const elem_ty = operand_ty.vectorElementType(); + break :blk if (elem_ty != null and elem_ty.?.isFloat()) + c.LLVMBuildFNeg(self.builder, operand, "vnegtmp") + else + c.LLVMBuildNeg(self.builder, operand, "vnegtmp"); + } + break :blk if (self.exprIsFloat(unop.operand)) + c.LLVMBuildFNeg(self.builder, operand, "negtmp") + else + c.LLVMBuildNeg(self.builder, operand, "negtmp"); + }, + .not => c.LLVMBuildNot(self.builder, operand, "nottmp"), + .xx => unreachable, + }; + }, + .struct_literal => |sl| { + const ctx_name: ?[]const u8 = if (self.current_return_type.isStruct()) self.current_return_type.struct_type else null; + return self.genStructLiteral(sl, ctx_name); + }, + .union_literal => |ul| { + return self.genUnionLiteral(ul, null); + }, + .array_literal => |al| { + // If current return type is vector, build as vector SSA value + if (self.current_return_type.isVector()) { + return self.genVectorLiteral(al, self.current_return_type); + } + return self.genArrayLiteral(al, null); + }, + .field_access => |fa| { + return self.genFieldAccess(fa); + }, + .index_expr => |ie| { + return self.genIndexExpr(ie); + }, + .call => |call_node| { + return self.genCall(call_node); + }, + .if_expr => |ie| { + return self.genIfExpr(ie); + }, + .match_expr => |me| { + return self.genMatchExpr(me); + }, + .while_expr => |we| { + return self.genWhileExpr(we); + }, + .for_expr => |fe| { + return self.genForExpr(fe); + }, + .break_expr => { + if (self.loop_break_bb) |break_bb| { + _ = c.LLVMBuildBr(self.builder, break_bb); + const dead_bb = c.LLVMAppendBasicBlockInContext(self.context, self.current_function, "after_break"); + c.LLVMPositionBuilderAtEnd(self.builder, dead_bb); + return null; + } + return self.emitError("'break' outside of loop"); + }, + .continue_expr => { + if (self.loop_continue_bb) |continue_bb| { + _ = c.LLVMBuildBr(self.builder, continue_bb); + const dead_bb = c.LLVMAppendBasicBlockInContext(self.context, self.current_function, "after_continue"); + c.LLVMPositionBuilderAtEnd(self.builder, dead_bb); + return null; + } + return self.emitError("'continue' outside of loop"); + }, + .block => |blk| { + try self.pushScope(); + var last_val: c.LLVMValueRef = null; + for (blk.stmts) |stmt| { + last_val = try self.genStmt(stmt); + } + try self.popScope(); + return last_val; + }, + .var_decl => |vd| { + return self.genVarDecl(vd); + }, + .const_decl => |cd| { + return self.genConstDecl(cd); + }, + .assignment => |asgn| { + return self.genAssignment(asgn); + }, + .return_stmt => |rs| { + if (rs.value) |val_node| { + const raw_val = try self.genExpr(val_node); + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(raw_val)); + const val = self.convertValue(raw_val, src_ty, self.current_return_type); + try self.emitAllDefers(); + _ = c.LLVMBuildRet(self.builder, val); + } else { + try self.emitAllDefers(); + _ = c.LLVMBuildRetVoid(self.builder); + } + const dead_bb = c.LLVMAppendBasicBlockInContext(self.context, self.current_function, "after_ret"); + c.LLVMPositionBuilderAtEnd(self.builder, dead_bb); + return null; + }, + .comptime_expr => |ct| { + return self.genExpr(ct.expr); + }, + else => return self.emitError("unsupported expression"), + } + } + + fn registerStructType(self: *CodeGen, sd: ast.StructDecl) !void { + // Generic struct: store as template instead of registering now + if (sd.type_params.len > 0) { + try self.generic_struct_templates.put(sd.name, .{ .sd = sd }); + return; + } + + var field_sx_types = std.ArrayList(Type).empty; + var field_llvm_types = std.ArrayList(c.LLVMTypeRef).empty; + + for (sd.field_types) |ft| { + const sx_ty = self.resolveType(ft); + try field_sx_types.append(self.allocator, sx_ty); + try field_llvm_types.append(self.allocator, self.typeToLLVM(sx_ty)); + } + + const llvm_types_slice = try field_llvm_types.toOwnedSlice(self.allocator); + const name_z = try self.allocator.dupeZ(u8, sd.name); + const struct_ty = c.LLVMStructCreateNamed(self.context, name_z.ptr); + c.LLVMStructSetBody(struct_ty, if (llvm_types_slice.len > 0) llvm_types_slice.ptr else null, @intCast(llvm_types_slice.len), 0); + + // Process field defaults: replace #run expressions with comptime global references + var resolved_defaults = try self.allocator.alloc(?*Node, sd.field_defaults.len); + for (sd.field_defaults, 0..) |fd, i| { + if (fd != null and fd.?.data == .comptime_expr) { + // Register as anonymous comptime global for JIT evaluation + const synthetic_name = try std.fmt.allocPrint(self.allocator, "__struct_{s}_field_{d}", .{ sd.name, i }); + const field_type_override: ?Type = if (i < field_sx_types.items.len) field_sx_types.items[i] else null; + try self.registerComptimeGlobal(synthetic_name, fd.?.data.comptime_expr.expr, field_type_override); + // Replace with identifier node referencing the comptime global + const id_node = try self.allocator.create(Node); + id_node.* = .{ .span = .{ .start = 0, .end = 0 }, .data = .{ .identifier = .{ .name = synthetic_name } } }; + resolved_defaults[i] = id_node; + } else { + resolved_defaults[i] = fd; + } + } + + try self.struct_types.put(sd.name, .{ + .field_names = sd.field_names, + .field_types = try field_sx_types.toOwnedSlice(self.allocator), + .field_defaults = resolved_defaults, + .llvm_type = struct_ty, + }); + _ = try self.getAnyTypeId(sd.name, .{ .struct_type = sd.name }); + } + + fn registerUnionType(self: *CodeGen, ud: ast.UnionDecl) !void { + var variant_sx_types = std.ArrayList(Type).empty; + var max_payload_size: u64 = 0; + const data_layout = c.LLVMGetModuleDataLayout(self.module); + + for (ud.variant_types) |vt| { + if (vt) |type_node| { + const sx_ty = self.resolveType(type_node); + try variant_sx_types.append(self.allocator, sx_ty); + const llvm_ty = self.typeToLLVM(sx_ty); + const size = c.LLVMStoreSizeOfType(data_layout, llvm_ty); + if (size > max_payload_size) max_payload_size = size; + } else { + try variant_sx_types.append(self.allocator, .void_type); + } + } + + // Union LLVM type: { i32, [max_payload_size x i8] } + const name_z = try self.allocator.dupeZ(u8, ud.name); + const union_ty = c.LLVMStructCreateNamed(self.context, name_z.ptr); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + const i8_ty = c.LLVMInt8TypeInContext(self.context); + const payload_array_ty = c.LLVMArrayType2(i8_ty, max_payload_size); + var fields = [2]c.LLVMTypeRef{ i32_ty, payload_array_ty }; + c.LLVMStructSetBody(union_ty, &fields, 2, 0); + + try self.union_types.put(ud.name, .{ + .variant_names = ud.variant_names, + .variant_types = try variant_sx_types.toOwnedSlice(self.allocator), + .llvm_type = union_ty, + .max_payload_size = max_payload_size, + }); + _ = try self.getAnyTypeId(ud.name, .{ .union_type = ud.name }); + } + + fn genUnionLiteral(self: *CodeGen, ul: ast.UnionLiteral, expected_union_name: ?[]const u8) !c.LLVMValueRef { + const uname = ul.union_name orelse expected_union_name orelse + (if (self.current_return_type.isUnion()) self.current_return_type.union_type else null) orelse + return self.emitError("cannot infer union type for literal"); + const resolved_name = self.type_aliases.get(uname) orelse uname; + const info = self.union_types.get(resolved_name) orelse return self.emitErrorFmt("unknown union type '{s}'", .{resolved_name}); + + // Find variant index + var variant_idx: ?u32 = null; + for (info.variant_names, 0..) |vn, i| { + if (std.mem.eql(u8, vn, ul.variant_name)) { + variant_idx = @intCast(i); + break; + } + } + const idx = variant_idx orelse return self.emitErrorFmt("no variant '{s}' in union '{s}'", .{ ul.variant_name, resolved_name }); + + // Alloca union + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, "union_tmp"); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + + // Store tag (field 0) + const tag_gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, 0, "tag"); + _ = c.LLVMBuildStore(self.builder, c.LLVMConstInt(i32_ty, idx, 0), tag_gep); + + // Store payload (field 1) if not void + if (ul.payload) |payload_node| { + const variant_ty = info.variant_types[idx]; + if (variant_ty != .void_type) { + const payload_val = try self.genExprAsType(payload_node, variant_ty); + const payload_gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, 1, "payload"); + const payload_llvm_ty = self.typeToLLVM(variant_ty); + // Bitcast payload area to the variant's type pointer and store + if (variant_ty.isStruct()) { + // Struct payload: load from alloca, store to payload area + const struct_val = c.LLVMBuildLoad2(self.builder, payload_llvm_ty, payload_val, "struct_load"); + _ = c.LLVMBuildStore(self.builder, struct_val, payload_gep); + } else { + _ = c.LLVMBuildStore(self.builder, payload_val, payload_gep); + } + } + } + + return alloca; + } + + fn genStructLiteral(self: *CodeGen, sl: ast.StructLiteral, expected_struct_name: ?[]const u8) !c.LLVMValueRef { + const raw_name = sl.struct_name orelse blk: { + if (sl.type_expr) |te| { + const ty = self.resolveType(te); + if (ty.isStruct()) break :blk ty.struct_type; + } + break :blk expected_struct_name orelse return self.emitError("cannot infer struct type for literal"); + }; + // Resolve type aliases (e.g. Vec3 -> Vec__3_f32) + const sname = self.type_aliases.get(raw_name) orelse raw_name; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + + // Alloca the struct and default-init all fields (zero or declared defaults) + const name_z = try self.allocator.dupeZ(u8, sname); + const alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, name_z.ptr); + try self.genStructDefaultInit(alloca, info); + + // Determine if this is named or positional mode + var has_named = false; + for (sl.field_inits) |fi| { + if (fi.name != null) { + has_named = true; + break; + } + } + + if (has_named) { + // Named/shorthand mode: map by field name + for (sl.field_inits) |fi| { + const fname = fi.name orelse { + // Positional field mixed with named — treat as identifier shorthand + if (fi.value.data == .identifier) { + const idx = self.findFieldIndex(info, fi.value.data.identifier.name) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fi.value.data.identifier.name, sname }); + const val = try self.genExprAsType(fi.value, info.field_types[idx]); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, @intCast(idx), "field"); + _ = c.LLVMBuildStore(self.builder, val, gep); + continue; + } + return self.emitError("mixed positional and named fields in struct literal"); + }; + const idx = self.findFieldIndex(info, fname) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fname, sname }); + const val = try self.genExprAsType(fi.value, info.field_types[idx]); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, @intCast(idx), "field"); + _ = c.LLVMBuildStore(self.builder, val, gep); + } + } else { + // Positional mode: assign in order + for (sl.field_inits, 0..) |fi, i| { + if (i >= info.field_names.len) return self.emitErrorFmt("too many fields in struct literal (expected {d})", .{info.field_names.len}); + const val = try self.genExprAsType(fi.value, info.field_types[i]); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, alloca, @intCast(i), "field"); + _ = c.LLVMBuildStore(self.builder, val, gep); + } + } + + return alloca; + } + + /// Generate an array literal as an alloca with elements stored via GEP. + /// If target_ty is provided, elements are converted to the array's element type. + /// Otherwise, element type is inferred from the first element. + fn genArrayLiteral(self: *CodeGen, al: ast.ArrayLiteral, target_ty_opt: ?Type) !c.LLVMValueRef { + const arr_ty: Type = target_ty_opt orelse blk: { + // Infer from first element + if (al.elements.len == 0) return self.emitError("cannot infer type of empty array literal"); + const elem_ty = self.inferType(al.elements[0]); + const elem_name = try elem_ty.displayName(self.allocator); + break :blk .{ .array_type = .{ .element_name = elem_name, .length = @intCast(al.elements.len) } }; + }; + const arr_info = arr_ty.array_type; + const elem_sx_ty = Type.fromName(arr_info.element_name) orelse return self.emitErrorFmt("unknown array element type '{s}'", .{arr_info.element_name}); + const llvm_arr_ty = self.typeToLLVM(arr_ty); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_arr_ty, "arr"); + + const len = @min(al.elements.len, arr_info.length); + for (0..len) |i| { + const val = try self.genExprAsType(al.elements[i], elem_sx_ty); + var indices = [_]c.LLVMValueRef{ + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(i), 0), + }; + const gep = c.LLVMBuildGEP2(self.builder, llvm_arr_ty, alloca, &indices, 2, "arr_elem"); + _ = c.LLVMBuildStore(self.builder, val, gep); + } + return alloca; + } + + fn genVectorLiteral(self: *CodeGen, al: ast.ArrayLiteral, vec_ty: Type) !c.LLVMValueRef { + const vec_info = vec_ty.vector_type; + const elem_sx_ty = Type.fromName(vec_info.element_name) orelse return self.emitErrorFmt("unknown vector element type '{s}'", .{vec_info.element_name}); + const llvm_vec_ty = self.typeToLLVM(vec_ty); + var vec_val = c.LLVMGetUndef(llvm_vec_ty); + + const len = @min(al.elements.len, vec_info.length); + for (0..len) |i| { + const elem_val = try self.genExprAsType(al.elements[i], elem_sx_ty); + const idx = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(i), 0); + vec_val = c.LLVMBuildInsertElement(self.builder, vec_val, elem_val, idx, "vec_ins"); + } + return vec_val; + } + + fn broadcastScalar(self: *CodeGen, scalar: c.LLVMValueRef, vec_ty: Type) c.LLVMValueRef { + const vec_info = vec_ty.vector_type; + const llvm_vec_ty = self.typeToLLVM(vec_ty); + // Insert scalar at index 0 of undef vector + var vec = c.LLVMGetUndef(llvm_vec_ty); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + vec = c.LLVMBuildInsertElement(self.builder, vec, scalar, zero, "splat_ins"); + // Shuffle with zeroinitializer mask to broadcast element 0 to all lanes + const mask_ty = c.LLVMVectorType(c.LLVMInt32TypeInContext(self.context), vec_info.length); + const mask = c.LLVMConstNull(mask_ty); + return c.LLVMBuildShuffleVector(self.builder, vec, c.LLVMGetUndef(llvm_vec_ty), mask, "splat"); + } + + fn genExprAsType(self: *CodeGen, node: *Node, target_ty: Type) !c.LLVMValueRef { + self.current_span = node.span; + // xx prefix: unwrap and convert freely (explicit cast) + if (node.data == .unary_op and node.data.unary_op.op == .xx) { + const inner = node.data.unary_op.operand; + const val = try self.genExpr(inner); + const src_ty = self.inferType(inner); + return self.convertValue(val, src_ty, target_ty); + } + + // Enum literal assigned to union type: construct tag-only (void variant) union + if (node.data == .enum_literal and target_ty.isUnion()) { + const ul = ast.UnionLiteral{ + .union_name = null, + .variant_name = node.data.enum_literal.name, + .payload = null, + }; + return self.genUnionLiteral(ul, target_ty.union_type); + } + + // Union literal with target union type: pass context + if (node.data == .union_literal and target_ty.isUnion()) { + return self.genUnionLiteral(node.data.union_literal, target_ty.union_type); + } + + // Array literal with target array type: generate with element conversion + if (node.data == .array_literal and target_ty.isArray()) { + return self.genArrayLiteral(node.data.array_literal, target_ty); + } + + // Array literal with target vector type: build via undef + InsertElement + if (node.data == .array_literal and target_ty.isVector()) { + return self.genVectorLiteral(node.data.array_literal, target_ty); + } + + const val = try self.genExpr(node); + const src_ty = self.inferType(node); + + // Scalar to vector broadcast + if (target_ty.isVector() and !src_ty.isVector()) { + const elem_ty = target_ty.vectorElementType() orelse return self.emitError("cannot determine vector element type"); + const converted = self.convertValue(val, src_ty, elem_ty); + return self.broadcastScalar(converted, target_ty); + } + + // Literals are exempt from narrowing checks + if (node.data == .int_literal or node.data == .float_literal) { + return self.convertValue(val, src_ty, target_ty); + } + + // Check for narrowing conversion + if (!src_ty.isImplicitlyConvertibleTo(target_ty)) { + // Narrowing without xx — compile error + return self.emitErrorFmt("narrowing conversion from '{s}' to '{s}' requires explicit 'xx' cast", .{ + src_ty.displayName(self.allocator) catch "?", + target_ty.displayName(self.allocator) catch "?", + }); + } + + return self.convertValue(val, src_ty, target_ty); + } + + /// Convert an LLVM value from src_ty to target_ty, emitting appropriate casts. + fn convertValue(self: *CodeGen, val: c.LLVMValueRef, src_ty: Type, target_ty: Type) c.LLVMValueRef { + // Same type → return as-is + if (std.meta.eql(src_ty, target_ty)) return val; + + const target_llvm = self.typeToLLVM(target_ty); + + // Any → concrete type: extract the i64 value and convert + if (src_ty.isAny()) { + const i64_val = c.LLVMBuildExtractValue(self.builder, val, 1, "any_extract"); + if (target_ty.isInt()) { + if (target_ty.bitWidth() < 64) { + return c.LLVMBuildTrunc(self.builder, i64_val, target_llvm, "any_to_int"); + } + return i64_val; + } + if (target_ty == .boolean) { + return c.LLVMBuildTrunc(self.builder, i64_val, c.LLVMInt1TypeInContext(self.context), "any_to_bool"); + } + if (target_ty == .f64) { + return c.LLVMBuildBitCast(self.builder, i64_val, c.LLVMDoubleTypeInContext(self.context), "any_to_f64"); + } + if (target_ty == .f32) { + const as_f64 = c.LLVMBuildBitCast(self.builder, i64_val, c.LLVMDoubleTypeInContext(self.context), "any_f64_tmp"); + return c.LLVMBuildFPTrunc(self.builder, as_f64, c.LLVMFloatTypeInContext(self.context), "any_to_f32"); + } + if (target_ty == .string_type) { + // i64 is a pointer to {ptr, i32} on the stack + const ptr = c.LLVMBuildIntToPtr(self.builder, i64_val, c.LLVMPointerTypeInContext(self.context, 0), "any_str_ptr"); + return c.LLVMBuildLoad2(self.builder, self.getStringStructType(), ptr, "any_to_str"); + } + if (target_ty.isStruct()) { + const sname = target_ty.struct_type; + if (self.struct_types.get(sname)) |info| { + const ptr = c.LLVMBuildIntToPtr(self.builder, i64_val, c.LLVMPointerTypeInContext(self.context, 0), "any_struct_ptr"); + return c.LLVMBuildLoad2(self.builder, info.llvm_type, ptr, "any_to_struct"); + } + } + if (target_ty.isEnum()) { + return c.LLVMBuildTrunc(self.builder, i64_val, c.LLVMInt32TypeInContext(self.context), "any_to_enum"); + } + if (target_ty.isUnion()) { + const uname = target_ty.union_type; + if (self.union_types.get(uname)) |info| { + const ptr = c.LLVMBuildIntToPtr(self.builder, i64_val, c.LLVMPointerTypeInContext(self.context, 0), "any_union_ptr"); + return c.LLVMBuildLoad2(self.builder, info.llvm_type, ptr, "any_to_union"); + } + } + return i64_val; + } + + // Float → float conversions + if (src_ty.isFloat() and target_ty.isFloat()) { + if (target_ty.bitWidth() > src_ty.bitWidth()) { + return c.LLVMBuildFPExt(self.builder, val, target_llvm, "fext"); + } else { + return c.LLVMBuildFPTrunc(self.builder, val, target_llvm, "ftrunc"); + } + } + + // Int → float conversions + if (src_ty.isInt() and target_ty.isFloat()) { + if (src_ty.isSigned()) { + return c.LLVMBuildSIToFP(self.builder, val, target_llvm, "sitofp"); + } else { + return c.LLVMBuildUIToFP(self.builder, val, target_llvm, "uitofp"); + } + } + + // Float → int conversions + if (src_ty.isFloat() and target_ty.isInt()) { + if (target_ty.isSigned()) { + return c.LLVMBuildFPToSI(self.builder, val, target_llvm, "fptosi"); + } else { + return c.LLVMBuildFPToUI(self.builder, val, target_llvm, "fptoui"); + } + } + + // Union → int: extract the tag field (index 0) + if (src_ty.isUnion() and target_ty.isInt()) { + const uname = src_ty.union_type; + if (self.union_types.get(uname)) |info| { + const tmp = c.LLVMBuildAlloca(self.builder, info.llvm_type, "union_cast"); + _ = c.LLVMBuildStore(self.builder, val, tmp); + const tag_ptr = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, tmp, 0, "tag_ptr"); + const tag_val = c.LLVMBuildLoad2(self.builder, c.LLVMInt32TypeInContext(self.context), tag_ptr, "tag_val"); + if (target_ty.bitWidth() == 32) return tag_val; + if (target_ty.bitWidth() > 32) return c.LLVMBuildSExt(self.builder, tag_val, target_llvm, "tag_ext"); + return c.LLVMBuildTrunc(self.builder, tag_val, target_llvm, "tag_trunc"); + } + } + + // Int → int conversions + if (src_ty.isInt() and target_ty.isInt()) { + const sw = src_ty.bitWidth(); + const tw = target_ty.bitWidth(); + if (tw > sw) { + // Extend — use SExt if source is signed, ZExt if unsigned + if (src_ty.isSigned()) { + return c.LLVMBuildSExt(self.builder, val, target_llvm, "sext"); + } else { + return c.LLVMBuildZExt(self.builder, val, target_llvm, "zext"); + } + } else if (tw < sw) { + // Truncate + return c.LLVMBuildTrunc(self.builder, val, target_llvm, "trunc"); + } + // Same width, different signedness — no-op (bit pattern is the same) + return val; + } + + return val; + } + + fn findFieldIndex(_: *CodeGen, info: StructInfo, name: []const u8) ?usize { + for (info.field_names, 0..) |fn_name, i| { + if (std.mem.eql(u8, fn_name, name)) return i; + } + return null; + } + + fn componentToIndex(ch: u8) ?u32 { + return switch (ch) { + 'x', 'r', 'u' => 0, + 'y', 'g', 'v' => 1, + 'z', 'b' => 2, + 'w', 'a' => 3, + else => null, + }; + } + + fn genSqrt(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 1) return self.emitError("sqrt expects exactly 1 argument"); + const arg_val = try self.genExpr(call_node.args[0]); + const arg_ty = self.inferType(call_node.args[0]); + + // Pick the right LLVM intrinsic based on float type + const intrinsic_name: [*c]const u8 = if (std.meta.eql(arg_ty, Type.f64)) "llvm.sqrt.f64" else "llvm.sqrt.f32"; + const llvm_float_ty = if (std.meta.eql(arg_ty, Type.f64)) + c.LLVMDoubleTypeInContext(self.context) + else + c.LLVMFloatTypeInContext(self.context); + + // Get or declare the intrinsic + var intrinsic_fn = c.LLVMGetNamedFunction(self.module, intrinsic_name); + if (intrinsic_fn == null) { + var param_types = [_]c.LLVMTypeRef{llvm_float_ty}; + const fn_type = c.LLVMFunctionType(llvm_float_ty, ¶m_types, 1, 0); + intrinsic_fn = c.LLVMAddFunction(self.module, intrinsic_name, fn_type); + } + + var args = [_]c.LLVMValueRef{arg_val}; + return c.LLVMBuildCall2( + self.builder, + c.LLVMGlobalGetValueType(intrinsic_fn.?), + intrinsic_fn.?, + &args, + 1, + "sqrt", + ); + } + + fn genSizeOf(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 1) return self.emitError("size_of expects exactly 1 argument"); + const ty = self.resolveType(call_node.args[0]); + if (std.meta.eql(ty, Type.void_type)) { + return c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + } + const llvm_ty = self.typeToLLVM(ty); + const data_layout = c.LLVMGetModuleDataLayout(self.module); + const size = c.LLVMStoreSizeOfType(data_layout, llvm_ty); + return c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), size, 0); + } + + fn genTypeOf(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 1) return self.emitError("type_of expects exactly 1 argument"); + const arg = call_node.args[0]; + const arg_ty = self.inferType(arg); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + + // For Any values: extract the runtime tag (field 0) + if (arg_ty.isAny()) { + const val = try self.genExpr(arg); + return c.LLVMBuildExtractValue(self.builder, val, 0, "type_of"); + } + + // For known types: return the constant tag value + const tag: u64 = switch (arg_ty) { + .void_type => ANY_TAG_VOID, + .boolean => ANY_TAG_BOOL, + .signed => |w| if (w <= 32) ANY_TAG_S32 else ANY_TAG_S64, + .unsigned => |w| if (w <= 32) ANY_TAG_S32 else ANY_TAG_S64, + .f32 => ANY_TAG_F32, + .f64 => ANY_TAG_F64, + .string_type => ANY_TAG_STRING, + .struct_type => |name| try self.getAnyTypeId(name, arg_ty), + .enum_type => |name| try self.getAnyTypeId(name, arg_ty), + .union_type => |name| try self.getAnyTypeId(name, arg_ty), + .meta_type => ANY_TAG_TYPE, + else => ANY_TAG_S32, + }; + return c.LLVMConstInt(i32_ty, tag, 0); + } + + fn genTypeName(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 1) return self.emitError("type_name expects exactly 1 argument"); + const ty = self.resolveType(call_node.args[0]); + const name = try ty.displayName(self.allocator); + return self.buildConstStr(name); + } + + fn genFieldCount(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 1) return self.emitError("field_count expects exactly 1 argument"); + const ty = self.resolveType(call_node.args[0]); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + if (ty.isStruct()) { + const info = self.struct_types.get(ty.struct_type) orelse + return self.emitErrorFmt("unknown struct type '{s}'", .{ty.struct_type}); + return c.LLVMConstInt(i32_ty, info.field_names.len, 0); + } + if (ty.isEnum()) { + const variants = self.enum_types.get(ty.enum_type) orelse + return self.emitErrorFmt("unknown enum type '{s}'", .{ty.enum_type}); + return c.LLVMConstInt(i32_ty, variants.len, 0); + } + if (ty.isVector()) { + return c.LLVMConstInt(i32_ty, ty.vector_type.length, 0); + } + if (ty.isUnion()) { + const info = self.union_types.get(ty.union_type) orelse + return self.emitErrorFmt("unknown union type '{s}'", .{ty.union_type}); + return c.LLVMConstInt(i32_ty, info.variant_names.len, 0); + } + if (ty.isArray()) { + return c.LLVMConstInt(i32_ty, ty.array_type.length, 0); + } + return self.emitError("field_count requires a struct, enum, vector, union, or array type"); + } + + fn genFieldName(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 2) return self.emitError("field_name expects 2 arguments: field_name(T, idx)"); + const ty = self.resolveType(call_node.args[0]); + + // Get the name list and type key + const names: []const []const u8, const type_key: []const u8 = if (ty.isStruct()) blk: { + const info = self.struct_types.get(ty.struct_type) orelse + return self.emitErrorFmt("unknown struct type '{s}'", .{ty.struct_type}); + break :blk .{ info.field_names, ty.struct_type }; + } else if (ty.isEnum()) blk: { + const variants = self.enum_types.get(ty.enum_type) orelse + return self.emitErrorFmt("unknown enum type '{s}'", .{ty.enum_type}); + break :blk .{ variants, ty.enum_type }; + } else if (ty.isUnion()) blk: { + const info = self.union_types.get(ty.union_type) orelse + return self.emitErrorFmt("unknown union type '{s}'", .{ty.union_type}); + break :blk .{ info.variant_names, ty.union_type }; + } else return self.emitError("field_name requires a struct, enum, or union type"); + + // Build a global array of string slices + const n = names.len; + const str_ty = self.getStringStructType(); + const arr_ty = c.LLVMArrayType2(str_ty, n); + + const vals = try self.allocator.alloc(c.LLVMValueRef, n); + for (names, 0..) |name, i| { + vals[i] = self.buildConstStrGlobal(name); + } + const arr_init = c.LLVMConstArray2(str_ty, vals.ptr, @intCast(n)); + const global_name = try self.allocator.dupeZ(u8, try std.fmt.allocPrint(self.allocator, "field_names.{s}", .{type_key})); + var global = c.LLVMGetNamedGlobal(self.module, global_name.ptr); + if (global == null) { + global = c.LLVMAddGlobal(self.module, arr_ty, global_name.ptr); + c.LLVMSetInitializer(global, arr_init); + c.LLVMSetGlobalConstant(global, 1); + c.LLVMSetLinkage(global, c.LLVMPrivateLinkage); + } + + // GEP into the array with runtime index + const idx = try self.genExpr(call_node.args[1]); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var indices = [_]c.LLVMValueRef{ zero, idx }; + const elem_ptr = c.LLVMBuildGEP2(self.builder, arr_ty, global, &indices, 2, "field_name_ptr"); + return c.LLVMBuildLoad2(self.builder, str_ty, elem_ptr, "field_name"); + } + + fn genFieldValue(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 2) return self.emitError("field_value expects 2 arguments: field_value(s, idx)"); + + const val = try self.genExpr(call_node.args[0]); + const val_ty = self.inferType(call_node.args[0]); + + // Vector: extractelement + box as Any + if (val_ty.isVector()) { + const info = val_ty.vector_type; + const elem_ty = Type.fromName(info.element_name) orelse + return self.emitErrorFmt("unknown vector element type '{s}'", .{info.element_name}); + const idx = try self.genExpr(call_node.args[1]); + const elem = c.LLVMBuildExtractElement(self.builder, val, idx, "vec_elem"); + return self.buildAnyValue(elem, elem_ty); + } + + // Union: switch over tag, extract payload with correct type + if (val_ty.isUnion()) { + const uinfo = self.union_types.get(val_ty.union_type) orelse + return self.emitErrorFmt("unknown union type '{s}'", .{val_ty.union_type}); + + const union_alloca = c.LLVMBuildAlloca(self.builder, uinfo.llvm_type, "fv_union"); + _ = c.LLVMBuildStore(self.builder, val, union_alloca); + + // Read tag (field 0) + const tag_ptr = c.LLVMBuildStructGEP2(self.builder, uinfo.llvm_type, union_alloca, 0, "fv_tag_ptr"); + const tag_val = c.LLVMBuildLoad2(self.builder, c.LLVMInt32TypeInContext(self.context), tag_ptr, "fv_tag"); + const payload_ptr = c.LLVMBuildStructGEP2(self.builder, uinfo.llvm_type, union_alloca, 1, "fv_payload_ptr"); + + const n = uinfo.variant_names.len; + const function = self.current_function; + const any_ty = self.getAnyStructType(); + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_merge"); + const default_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_default"); + const sw = c.LLVMBuildSwitch(self.builder, tag_val, default_bb, @intCast(n)); + + var phi_vals = std.ArrayList(c.LLVMValueRef).empty; + var phi_bbs = std.ArrayList(c.LLVMBasicBlockRef).empty; + + for (uinfo.variant_types, 0..) |vty, vi| { + const case_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_ucase"); + c.LLVMAddCase(sw, c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(vi), 0), case_bb); + c.LLVMPositionBuilderAtEnd(self.builder, case_bb); + + const any_val = if (vty == .void_type) blk: { + // Void variant: return Any with void tag + const undef = c.LLVMGetUndef(any_ty); + const void_tag = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), ANY_TAG_VOID, 0); + const with_tag = c.LLVMBuildInsertValue(self.builder, undef, void_tag, 0, "void_tag"); + const zero_val = c.LLVMConstInt(c.LLVMInt64TypeInContext(self.context), 0, 0); + break :blk c.LLVMBuildInsertValue(self.builder, with_tag, zero_val, 1, "void_any"); + } else blk: { + const payload = c.LLVMBuildLoad2(self.builder, self.typeToLLVM(vty), payload_ptr, "fv_payload"); + break :blk try self.buildAnyValue(payload, vty); + }; + try phi_vals.append(self.allocator, any_val); + try phi_bbs.append(self.allocator, c.LLVMGetInsertBlock(self.builder)); + _ = c.LLVMBuildBr(self.builder, merge_bb); + } + + // Default: undef + c.LLVMPositionBuilderAtEnd(self.builder, default_bb); + try phi_vals.append(self.allocator, c.LLVMGetUndef(any_ty)); + try phi_bbs.append(self.allocator, default_bb); + _ = c.LLVMBuildBr(self.builder, merge_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + const vals_slice = try phi_vals.toOwnedSlice(self.allocator); + const bbs_slice = try phi_bbs.toOwnedSlice(self.allocator); + const phi = c.LLVMBuildPhi(self.builder, any_ty, "fv_uresult"); + c.LLVMAddIncoming(phi, vals_slice.ptr, bbs_slice.ptr, @intCast(vals_slice.len)); + return phi; + } + + // Array: GEP + load + box as Any + if (val_ty.isArray()) { + const ainfo = val_ty.array_type; + const elem_ty = Type.fromName(ainfo.element_name) orelse + return self.emitErrorFmt("unknown array element type '{s}'", .{ainfo.element_name}); + const arr_llvm_ty = self.typeToLLVM(val_ty); + const elem_llvm_ty = self.typeToLLVM(elem_ty); + const arr_alloca = c.LLVMBuildAlloca(self.builder, arr_llvm_ty, "fv_arr"); + _ = c.LLVMBuildStore(self.builder, val, arr_alloca); + const idx = try self.genExpr(call_node.args[1]); + var gep_indices = [_]c.LLVMValueRef{ + c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), + idx, + }; + const elem_ptr = c.LLVMBuildGEP2(self.builder, arr_llvm_ty, arr_alloca, &gep_indices, 2, "fv_aelem"); + const elem = c.LLVMBuildLoad2(self.builder, elem_llvm_ty, elem_ptr, "fv_aeval"); + return self.buildAnyValue(elem, elem_ty); + } + + // Struct: switch over field indices + const struct_val = val; + const struct_ty = val_ty; + if (!struct_ty.isStruct()) return self.emitError("field_value requires a struct, vector, union, or array value"); + + const info = self.struct_types.get(struct_ty.struct_type) orelse + return self.emitErrorFmt("unknown struct type '{s}'", .{struct_ty.struct_type}); + + const idx = try self.genExpr(call_node.args[1]); + const n = info.field_names.len; + + // Store struct to alloca BEFORE the switch (switch is a terminator) + const struct_alloca = c.LLVMBuildAlloca(self.builder, info.llvm_type, "fv_struct"); + _ = c.LLVMBuildStore(self.builder, struct_val, struct_alloca); + + // Generate switch on idx with N cases + const function = self.current_function; + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_merge"); + const default_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_default"); + const sw = c.LLVMBuildSwitch(self.builder, idx, default_bb, @intCast(n)); + + const any_ty = self.getAnyStructType(); + var phi_vals = std.ArrayList(c.LLVMValueRef).empty; + var phi_bbs = std.ArrayList(c.LLVMBasicBlockRef).empty; + + for (0..n) |i| { + const case_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "fv_case"); + const case_val = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), i, 0); + c.LLVMAddCase(sw, case_val, case_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, case_bb); + // Extract field i via GEP + load + const field_ptr = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, struct_alloca, @intCast(i), "fv_field_ptr"); + const field_llvm_ty = c.LLVMStructGetTypeAtIndex(info.llvm_type, @intCast(i)); + const field_val = c.LLVMBuildLoad2(self.builder, field_llvm_ty, field_ptr, "fv_field"); + const any_val = try self.buildAnyValue(field_val, info.field_types[i]); + try phi_vals.append(self.allocator, any_val); + try phi_bbs.append(self.allocator, c.LLVMGetInsertBlock(self.builder)); + _ = c.LLVMBuildBr(self.builder, merge_bb); + } + + // Default: return undef Any + c.LLVMPositionBuilderAtEnd(self.builder, default_bb); + try phi_vals.append(self.allocator, c.LLVMGetUndef(any_ty)); + try phi_bbs.append(self.allocator, default_bb); + _ = c.LLVMBuildBr(self.builder, merge_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + const vals_slice = try phi_vals.toOwnedSlice(self.allocator); + const bbs_slice = try phi_bbs.toOwnedSlice(self.allocator); + const phi = c.LLVMBuildPhi(self.builder, any_ty, "fv_result"); + c.LLVMAddIncoming(phi, vals_slice.ptr, bbs_slice.ptr, @intCast(vals_slice.len)); + return phi; + } + + fn genCast(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + if (call_node.args.len != 2) return self.emitError("cast expects: cast(Type) expr"); + const target_ty = self.resolveType(call_node.args[0]); + const src_ty = self.inferType(call_node.args[1]); + const val = try self.genExpr(call_node.args[1]); + return self.convertValue(val, src_ty, target_ty); + } + + fn genAlloc(self: *CodeGen, args: []const *Node) !c.LLVMValueRef { + if (args.len != 1) return self.emitError("alloc expects exactly 1 argument: alloc(size)"); + const builtins = self.builtins orelse return self.emitError("builtins not available (missing #builtin import)"); + const size_val = try self.genExpr(args[0]); + const i64_type = c.LLVMInt64TypeInContext(self.context); + // Extend size to i64 for calloc + const size_i64 = c.LLVMBuildSExt(self.builder, size_val, i64_type, "size64"); + // calloc(size + 1, 1) — extra byte for null terminator + const one_i64 = c.LLVMConstInt(i64_type, 1, 0); + const size_plus_one = c.LLVMBuildAdd(self.builder, size_i64, one_i64, "szp1"); + const calloc_fn = builtins.calloc_fn; + const calloc_ty = c.LLVMGlobalGetValueType(calloc_fn); + var calloc_args = [_]c.LLVMValueRef{ size_plus_one, one_i64 }; + const ptr = c.LLVMBuildCall2(self.builder, calloc_ty, calloc_fn, &calloc_args, 2, "alloc_ptr"); + // Build string slice: {ptr, size} + return self.buildStringSliceRT(ptr, size_val); + } + + fn genVectorExtract(self: *CodeGen, vec_val: c.LLVMValueRef, field: []const u8) !c.LLVMValueRef { + if (field.len == 1) { + const idx_val = componentToIndex(field[0]) orelse return self.emitErrorFmt("invalid vector component '{c}'", .{field[0]}); + const idx = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), idx_val, 0); + return c.LLVMBuildExtractElement(self.builder, vec_val, idx, "comp"); + } + return self.emitErrorFmt("unsupported vector swizzle '{s}'", .{field}); + } + + fn genFieldAccess(self: *CodeGen, fa: ast.FieldAccess) !c.LLVMValueRef { + // Check if the object is a struct or vector variable + if (fa.object.data == .identifier) { + if (self.named_values.get(fa.object.data.identifier.name)) |entry| { + if (entry.ty.isStruct()) { + const sname = entry.ty.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const idx = self.findFieldIndex(info, fa.field) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fa.field, sname }); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, entry.ptr, @intCast(idx), "field"); + return c.LLVMBuildLoad2(self.builder, self.typeToLLVM(info.field_types[idx]), gep, "fieldval"); + } + if (entry.ty.isUnion()) { + const uname = entry.ty.union_type; + const info = self.union_types.get(uname) orelse return self.emitErrorFmt("unknown union type '{s}'", .{uname}); + // Find variant by name to determine payload type + var vidx: ?usize = null; + for (info.variant_names, 0..) |vn, i| { + if (std.mem.eql(u8, vn, fa.field)) { + vidx = i; + break; + } + } + const idx = vidx orelse return self.emitErrorFmt("no variant '{s}' in union '{s}'", .{ fa.field, uname }); + const variant_ty = info.variant_types[idx]; + if (variant_ty == .void_type) return self.emitErrorFmt("cannot access payload of void variant '{s}'", .{fa.field}); + // GEP to field 1 (payload area), load as variant type + const payload_gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, entry.ptr, 1, "payload"); + return c.LLVMBuildLoad2(self.builder, self.typeToLLVM(variant_ty), payload_gep, "union_payload"); + } + if (entry.ty.isVector()) { + const vec_val = c.LLVMBuildLoad2(self.builder, self.typeToLLVM(entry.ty), entry.ptr, "vec_load"); + return self.genVectorExtract(vec_val, fa.field); + } + if (entry.ty == .string_type) { + const str_val = c.LLVMBuildLoad2(self.builder, self.getStringStructType(), entry.ptr, "str_load"); + if (std.mem.eql(u8, fa.field, "len")) { + return c.LLVMBuildExtractValue(self.builder, str_val, 1, "str_len"); + } + if (std.mem.eql(u8, fa.field, "ptr")) { + return c.LLVMBuildExtractValue(self.builder, str_val, 0, "str_ptr"); + } + return self.emitErrorFmt("no field '{s}' on string (available: .len, .ptr)", .{fa.field}); + } + if (entry.ty.isSlice()) { + const slice_val = c.LLVMBuildLoad2(self.builder, self.getStringStructType(), entry.ptr, "slice_load"); + if (std.mem.eql(u8, fa.field, "len")) { + return c.LLVMBuildExtractValue(self.builder, slice_val, 1, "slice_len"); + } + if (std.mem.eql(u8, fa.field, "ptr")) { + return c.LLVMBuildExtractValue(self.builder, slice_val, 0, "slice_ptr"); + } + return self.emitErrorFmt("no field '{s}' on slice (available: .len, .ptr)", .{fa.field}); + } + if (entry.ty.isAny()) { + const any_val = c.LLVMBuildLoad2(self.builder, self.getAnyStructType(), entry.ptr, "any_load"); + if (std.mem.eql(u8, fa.field, "tag")) { + return c.LLVMBuildExtractValue(self.builder, any_val, 0, "any_tag"); + } + if (std.mem.eql(u8, fa.field, "value")) { + return c.LLVMBuildExtractValue(self.builder, any_val, 1, "any_value"); + } + return self.emitErrorFmt("no field '{s}' on Any (available: .tag, .value)", .{fa.field}); + } + } + } + // Non-identifier object: evaluate expression and check type + const obj_val = try self.genExpr(fa.object); + const obj_ty = self.inferType(fa.object); + if (obj_ty.isVector()) { + return self.genVectorExtract(obj_val, fa.field); + } + if (obj_ty == .string_type) { + if (std.mem.eql(u8, fa.field, "len")) { + return c.LLVMBuildExtractValue(self.builder, obj_val, 1, "str_len"); + } + if (std.mem.eql(u8, fa.field, "ptr")) { + return c.LLVMBuildExtractValue(self.builder, obj_val, 0, "str_ptr"); + } + return self.emitErrorFmt("no field '{s}' on string (available: .len, .ptr)", .{fa.field}); + } + return self.emitError("field access on non-struct/non-vector expression"); + } + + fn genVectorComparison(self: *CodeGen, op: ast.BinaryOp.Op, lhs: c.LLVMValueRef, rhs: c.LLVMValueRef, vec_ty: Type, elem_ty: Type) c.LLVMValueRef { + const vec_info = vec_ty.vector_type; + const cmp = if (elem_ty.isFloat()) + (if (op == .eq) c.LLVMBuildFCmp(self.builder, c.LLVMRealOEQ, lhs, rhs, "vcmp") else c.LLVMBuildFCmp(self.builder, c.LLVMRealONE, lhs, rhs, "vcmp")) + else + (if (op == .eq) c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, lhs, rhs, "vcmp") else c.LLVMBuildICmp(self.builder, c.LLVMIntNE, lhs, rhs, "vcmp")); + // Reduce: extract each i1 and AND (eq) or OR (neq) + var result = c.LLVMBuildExtractElement(self.builder, cmp, c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0), "cmp0"); + for (1..vec_info.length) |i| { + const elem = c.LLVMBuildExtractElement(self.builder, cmp, c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(i), 0), "cmpi"); + result = if (op == .eq) + c.LLVMBuildAnd(self.builder, result, elem, "andcmp") + else + c.LLVMBuildOr(self.builder, result, elem, "orcmp"); + } + return result; + } + + fn genIndexExpr(self: *CodeGen, ie: ast.IndexExpr) !c.LLVMValueRef { + const obj_ty = self.inferType(ie.object); + if (obj_ty.isVector()) { + const vec_val = try self.genExpr(ie.object); + const idx = try self.genExpr(ie.index); + return c.LLVMBuildExtractElement(self.builder, vec_val, idx, "vidx"); + } + if (obj_ty.isArray()) { + // Array index: load from GEP + if (ie.object.data == .identifier) { + if (self.named_values.get(ie.object.data.identifier.name)) |entry| { + const arr_info = obj_ty.array_type; + const elem_ty = Type.fromName(arr_info.element_name) orelse return self.emitErrorFmt("unknown array element type '{s}'", .{arr_info.element_name}); + const idx = try self.genExpr(ie.index); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var indices = [_]c.LLVMValueRef{ zero, idx }; + const gep = c.LLVMBuildGEP2(self.builder, self.typeToLLVM(obj_ty), entry.ptr, &indices, 2, "arridx"); + return c.LLVMBuildLoad2(self.builder, self.typeToLLVM(elem_ty), gep, "arrval"); + } + } + } + if (obj_ty == .string_type) { + // String indexing: extract ptr from slice, GEP + load i8 + zext to i32 + const str_val = try self.genExpr(ie.object); + const ptr = c.LLVMBuildExtractValue(self.builder, str_val, 0, "str_ptr"); + const idx = try self.genExpr(ie.index); + const i8_type = c.LLVMInt8TypeInContext(self.context); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep = c.LLVMBuildGEP2(self.builder, i8_type, ptr, &gep_indices, 1, "stridx"); + const byte = c.LLVMBuildLoad2(self.builder, i8_type, gep, "byte"); + return c.LLVMBuildZExt(self.builder, byte, c.LLVMInt32TypeInContext(self.context), "char"); + } + if (obj_ty.isSlice()) { + // Slice indexing: extract ptr, GEP with element type, load + const slice_info = obj_ty.slice_type; + const elem_ty = Type.fromName(slice_info.element_name) orelse return self.emitErrorFmt("unknown slice element type '{s}'", .{slice_info.element_name}); + const elem_llvm_ty = self.typeToLLVM(elem_ty); + // For identifier objects, load the slice from alloca + if (ie.object.data == .identifier) { + if (self.named_values.get(ie.object.data.identifier.name)) |entry| { + const slice_val = c.LLVMBuildLoad2(self.builder, self.getStringStructType(), entry.ptr, "slice_load"); + const ptr = c.LLVMBuildExtractValue(self.builder, slice_val, 0, "slice_ptr"); + const idx = try self.genExpr(ie.index); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, ptr, &gep_indices, 1, "sliceidx"); + return c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "sliceval"); + } + } + // Fallback for non-identifier slice expressions + const slice_val = try self.genExpr(ie.object); + const ptr = c.LLVMBuildExtractValue(self.builder, slice_val, 0, "slice_ptr"); + const idx = try self.genExpr(ie.index); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, ptr, &gep_indices, 1, "sliceidx"); + return c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "sliceval"); + } + return self.emitError("index expression requires an array, vector, string, or slice"); + } + + fn genBinaryOp(self: *CodeGen, op: ast.BinaryOp.Op, lhs: c.LLVMValueRef, rhs: c.LLVMValueRef, result_type: Type) c.LLVMValueRef { + // Vector types: dispatch based on element type (LLVM does element-wise automatically) + if (result_type.isVector()) { + const elem_ty = result_type.vectorElementType() orelse return lhs; + if (op == .eq or op == .neq) { + return self.genVectorComparison(op, lhs, rhs, result_type, elem_ty); + } + if (elem_ty.isFloat()) { + return switch (op) { + .add => c.LLVMBuildFAdd(self.builder, lhs, rhs, "vaddtmp"), + .sub => c.LLVMBuildFSub(self.builder, lhs, rhs, "vsubtmp"), + .mul => c.LLVMBuildFMul(self.builder, lhs, rhs, "vmultmp"), + .div => c.LLVMBuildFDiv(self.builder, lhs, rhs, "vdivtmp"), + .mod => c.LLVMBuildFRem(self.builder, lhs, rhs, "vmodtmp"), + else => lhs, + }; + } else if (elem_ty.isUnsigned()) { + return switch (op) { + .add => c.LLVMBuildAdd(self.builder, lhs, rhs, "vaddtmp"), + .sub => c.LLVMBuildSub(self.builder, lhs, rhs, "vsubtmp"), + .mul => c.LLVMBuildMul(self.builder, lhs, rhs, "vmultmp"), + .div => c.LLVMBuildUDiv(self.builder, lhs, rhs, "vdivtmp"), + .mod => c.LLVMBuildURem(self.builder, lhs, rhs, "vmodtmp"), + else => lhs, + }; + } else { + return switch (op) { + .add => c.LLVMBuildAdd(self.builder, lhs, rhs, "vaddtmp"), + .sub => c.LLVMBuildSub(self.builder, lhs, rhs, "vsubtmp"), + .mul => c.LLVMBuildMul(self.builder, lhs, rhs, "vmultmp"), + .div => c.LLVMBuildSDiv(self.builder, lhs, rhs, "vdivtmp"), + .mod => c.LLVMBuildSRem(self.builder, lhs, rhs, "vmodtmp"), + else => lhs, + }; + } + } + if (result_type.isFloat()) { + return switch (op) { + .add => c.LLVMBuildFAdd(self.builder, lhs, rhs, "addtmp"), + .sub => c.LLVMBuildFSub(self.builder, lhs, rhs, "subtmp"), + .mul => c.LLVMBuildFMul(self.builder, lhs, rhs, "multmp"), + .div => c.LLVMBuildFDiv(self.builder, lhs, rhs, "divtmp"), + .mod => c.LLVMBuildFRem(self.builder, lhs, rhs, "modtmp"), + .eq => c.LLVMBuildFCmp(self.builder, c.LLVMRealOEQ, lhs, rhs, "eqtmp"), + .neq => c.LLVMBuildFCmp(self.builder, c.LLVMRealONE, lhs, rhs, "neqtmp"), + .lt => c.LLVMBuildFCmp(self.builder, c.LLVMRealOLT, lhs, rhs, "lttmp"), + .lte => c.LLVMBuildFCmp(self.builder, c.LLVMRealOLE, lhs, rhs, "letmp"), + .gt => c.LLVMBuildFCmp(self.builder, c.LLVMRealOGT, lhs, rhs, "gttmp"), + .gte => c.LLVMBuildFCmp(self.builder, c.LLVMRealOGE, lhs, rhs, "getmp"), + .and_op, .or_op => unreachable, + }; + } else if (result_type.isUnsigned()) { + return switch (op) { + .add => c.LLVMBuildAdd(self.builder, lhs, rhs, "addtmp"), + .sub => c.LLVMBuildSub(self.builder, lhs, rhs, "subtmp"), + .mul => c.LLVMBuildMul(self.builder, lhs, rhs, "multmp"), + .div => c.LLVMBuildUDiv(self.builder, lhs, rhs, "divtmp"), + .mod => c.LLVMBuildURem(self.builder, lhs, rhs, "modtmp"), + .eq => c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, lhs, rhs, "eqtmp"), + .neq => c.LLVMBuildICmp(self.builder, c.LLVMIntNE, lhs, rhs, "neqtmp"), + .lt => c.LLVMBuildICmp(self.builder, c.LLVMIntULT, lhs, rhs, "lttmp"), + .lte => c.LLVMBuildICmp(self.builder, c.LLVMIntULE, lhs, rhs, "letmp"), + .gt => c.LLVMBuildICmp(self.builder, c.LLVMIntUGT, lhs, rhs, "gttmp"), + .gte => c.LLVMBuildICmp(self.builder, c.LLVMIntUGE, lhs, rhs, "getmp"), + .and_op, .or_op => unreachable, + }; + } else { + // signed int (default) + return switch (op) { + .add => c.LLVMBuildAdd(self.builder, lhs, rhs, "addtmp"), + .sub => c.LLVMBuildSub(self.builder, lhs, rhs, "subtmp"), + .mul => c.LLVMBuildMul(self.builder, lhs, rhs, "multmp"), + .div => c.LLVMBuildSDiv(self.builder, lhs, rhs, "divtmp"), + .mod => c.LLVMBuildSRem(self.builder, lhs, rhs, "modtmp"), + .eq => c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, lhs, rhs, "eqtmp"), + .neq => c.LLVMBuildICmp(self.builder, c.LLVMIntNE, lhs, rhs, "neqtmp"), + .lt => c.LLVMBuildICmp(self.builder, c.LLVMIntSLT, lhs, rhs, "lttmp"), + .lte => c.LLVMBuildICmp(self.builder, c.LLVMIntSLE, lhs, rhs, "letmp"), + .gt => c.LLVMBuildICmp(self.builder, c.LLVMIntSGT, lhs, rhs, "gttmp"), + .gte => c.LLVMBuildICmp(self.builder, c.LLVMIntSGE, lhs, rhs, "getmp"), + .and_op, .or_op => unreachable, + }; + } + } + + fn genAndOp(self: *CodeGen, binop: ast.BinaryOp) !c.LLVMValueRef { + const function = self.current_function; + const i1_type = c.LLVMInt1TypeInContext(self.context); + + var lhs_val = try self.genExpr(binop.lhs); + if (c.LLVMTypeOf(lhs_val) != i1_type) { + lhs_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, lhs_val, c.LLVMConstInt(c.LLVMTypeOf(lhs_val), 0, 0), "tobool"); + } + const lhs_bb = c.LLVMGetInsertBlock(self.builder); + + const rhs_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "and.rhs"); + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "and.merge"); + + _ = c.LLVMBuildCondBr(self.builder, lhs_val, rhs_bb, merge_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, rhs_bb); + var rhs_val = try self.genExpr(binop.rhs); + if (c.LLVMTypeOf(rhs_val) != i1_type) { + rhs_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, rhs_val, c.LLVMConstInt(c.LLVMTypeOf(rhs_val), 0, 0), "tobool"); + } + const rhs_end_bb = c.LLVMGetInsertBlock(self.builder); + _ = c.LLVMBuildBr(self.builder, merge_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + const phi = c.LLVMBuildPhi(self.builder, i1_type, "and.result"); + var vals = [2]c.LLVMValueRef{ c.LLVMConstInt(i1_type, 0, 0), rhs_val }; + var blocks = [2]c.LLVMBasicBlockRef{ lhs_bb, rhs_end_bb }; + c.LLVMAddIncoming(phi, &vals, &blocks, 2); + + return phi; + } + + fn genOrOp(self: *CodeGen, binop: ast.BinaryOp) !c.LLVMValueRef { + const function = self.current_function; + const i1_type = c.LLVMInt1TypeInContext(self.context); + + var lhs_val = try self.genExpr(binop.lhs); + if (c.LLVMTypeOf(lhs_val) != i1_type) { + lhs_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, lhs_val, c.LLVMConstInt(c.LLVMTypeOf(lhs_val), 0, 0), "tobool"); + } + const lhs_bb = c.LLVMGetInsertBlock(self.builder); + + const rhs_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "or.rhs"); + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "or.merge"); + + _ = c.LLVMBuildCondBr(self.builder, lhs_val, merge_bb, rhs_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, rhs_bb); + var rhs_val = try self.genExpr(binop.rhs); + if (c.LLVMTypeOf(rhs_val) != i1_type) { + rhs_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, rhs_val, c.LLVMConstInt(c.LLVMTypeOf(rhs_val), 0, 0), "tobool"); + } + const rhs_end_bb = c.LLVMGetInsertBlock(self.builder); + _ = c.LLVMBuildBr(self.builder, merge_bb); + + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + const phi = c.LLVMBuildPhi(self.builder, i1_type, "or.result"); + var vals = [2]c.LLVMValueRef{ c.LLVMConstInt(i1_type, 1, 0), rhs_val }; + var blocks = [2]c.LLVMBasicBlockRef{ lhs_bb, rhs_end_bb }; + c.LLVMAddIncoming(phi, &vals, &blocks, 2); + + return phi; + } + + fn genChainedComparison(self: *CodeGen, chain: ast.ChainedComparison) !c.LLVMValueRef { + // Evaluate all operands exactly once + var operand_vals = std.ArrayList(c.LLVMValueRef).empty; + for (chain.operands) |operand| { + const val = try self.genExpr(operand); + try operand_vals.append(self.allocator, val); + } + + // Compare pairwise and AND results together + var result: c.LLVMValueRef = undefined; + for (chain.ops, 0..) |op, i| { + const lhs_ty = self.inferType(chain.operands[i]); + const rhs_ty = self.inferType(chain.operands[i + 1]); + const cmp_type = Type.widen(lhs_ty, rhs_ty); + + const lhs_conv = self.convertValue(operand_vals.items[i], lhs_ty, cmp_type); + const rhs_conv = self.convertValue(operand_vals.items[i + 1], rhs_ty, cmp_type); + + const cmp = self.genBinaryOp(op, lhs_conv, rhs_conv, cmp_type); + + if (i == 0) { + result = cmp; + } else { + result = c.LLVMBuildAnd(self.builder, result, cmp, "chain.and"); + } + } + + return result; + } + + fn genCall(self: *CodeGen, call_node: ast.Call) !c.LLVMValueRef { + // Handle union construction: Shape.variant(payload) + if (call_node.callee.data == .field_access) { + const fa = call_node.callee.data.field_access; + // Resolve the object to a type name (identifier, call, or field_access chain) + const resolved_type: ?Type = blk: { + if (fa.object.data == .identifier) { + const name = self.type_aliases.get(fa.object.data.identifier.name) orelse fa.object.data.identifier.name; + if (self.union_types.contains(name)) break :blk .{ .union_type = name }; + if (self.struct_types.contains(name)) break :blk .{ .struct_type = name }; + } else { + const ty = self.resolveType(fa.object); + if (ty.isUnion() or ty.isStruct()) break :blk ty; + } + break :blk null; + }; + if (resolved_type) |rty| { + if (rty.isUnion()) { + const type_name = rty.union_type; + const payload_node: ?*Node = if (call_node.args.len > 0) call_node.args[0] else null; + const ul = ast.UnionLiteral{ + .union_name = type_name, + .variant_name = fa.field, + .payload = payload_node, + }; + return self.genUnionLiteral(ul, type_name); + } + } + } + + // Handle namespaced calls: namespace.func(args) + if (call_node.callee.data == .field_access) { + const fa = call_node.callee.data.field_access; + if (fa.object.data == .identifier) { + const ns_name = fa.object.data.identifier.name; + if (self.namespaces.contains(ns_name)) { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns_name, fa.field }); + return self.genCallByName(qualified, call_node); + } + } + } + + // Resolve callee — must be an identifier + if (call_node.callee.data != .identifier) return self.emitError("callee must be an identifier"); + const callee_name = call_node.callee.data.identifier.name; + return self.genCallByName(callee_name, call_node); + } + + fn genCallByName(self: *CodeGen, callee_name: []const u8, call_node: ast.Call) !c.LLVMValueRef { + // Check if this is a generic function call + if (self.generic_templates.get(callee_name)) |template| { + return self.genGenericCall(callee_name, template, call_node); + } + // Intra-namespace fallback for generic templates + if (self.current_namespace) |ns| { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }); + if (self.generic_templates.get(qualified)) |template| { + return self.genGenericCall(qualified, template, call_node); + } + } + + // Check for #builtin function (only available when imported) + if (self.builtin_functions.contains(callee_name)) { + return self.dispatchBuiltin(callee_name, call_node); + } + // Intra-namespace fallback for builtins + if (self.current_namespace) |ns| { + const qualified_builtin = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }); + if (self.builtin_functions.contains(qualified_builtin)) { + return self.dispatchBuiltin(qualified_builtin, call_node); + } + } + + // Compiler intrinsics (always available, no #builtin declaration needed) + if (std.mem.eql(u8, callee_name, "sqrt")) { + return self.genSqrt(call_node); + } + if (std.mem.eql(u8, callee_name, "cast")) { + return self.genCast(call_node); + } + + const name_z = try self.allocator.dupeZ(u8, callee_name); + var callee_fn = c.LLVMGetNamedFunction(self.module, name_z.ptr); + // Intra-namespace fallback: try qualified name + if (callee_fn == null) { + if (self.current_namespace) |ns| { + const qualified2 = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }); + const qualified_z = try self.allocator.dupeZ(u8, qualified2); + callee_fn = c.LLVMGetNamedFunction(self.module, qualified_z.ptr); + } + } + if (callee_fn == null) return self.emitErrorFmt("undefined function '{s}'", .{callee_name}); + + // Get function type (opaque pointers: use LLVMGlobalGetValueType) + const fn_type = c.LLVMGlobalGetValueType(callee_fn.?); + + // Check if this is a variadic function call + const var_info = self.variadic_functions.get(callee_name) orelse blk: { + // Try qualified name lookup + if (self.current_namespace) |ns| { + const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }); + break :blk self.variadic_functions.get(qualified); + } + break :blk null; + }; + + // Generate arguments with type conversion to match parameter types + const num_params = c.LLVMCountParamTypes(fn_type); + var param_llvm_types: [64]c.LLVMTypeRef = undefined; + if (num_params > 0) { + c.LLVMGetParamTypes(fn_type, ¶m_llvm_types); + } + + var arg_vals = std.ArrayList(c.LLVMValueRef).empty; + + if (var_info) |vi| { + // Variadic call: generate fixed args, then pack remaining into slice + const fixed_count = vi.fixed_param_count; + // Generate fixed args + for (0..fixed_count) |i| { + if (i < call_node.args.len) { + const param_ty = self.llvmTypeToSxType(param_llvm_types[i]); + try arg_vals.append(self.allocator, try self.genExprAsType(call_node.args[i], param_ty)); + } + } + // Pack variadic args into a slice {ptr, len} + const elem_ty = Type.fromName(vi.element_type_name) orelse Type.s(32); + const elem_llvm_ty = self.typeToLLVM(elem_ty); + const var_arg_count = if (call_node.args.len > fixed_count) call_node.args.len - fixed_count else 0; + + // Check for spread operator: fn(..array) — single spread arg + if (var_arg_count == 1 and call_node.args[fixed_count].data == .spread_expr) { + const spread_operand = call_node.args[fixed_count].data.spread_expr.operand; + const spread_ty = self.inferType(spread_operand); + if (spread_ty.isArray()) { + // Spread an array: construct slice from array pointer + known length + const arr_info = spread_ty.array_type; + if (spread_operand.data == .identifier) { + if (self.named_values.get(spread_operand.data.identifier.name)) |entry| { + const arr_llvm_ty = self.typeToLLVM(spread_ty); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var ptr_indices = [_]c.LLVMValueRef{ zero, zero }; + const arr_ptr = c.LLVMBuildGEP2(self.builder, arr_llvm_ty, entry.ptr, &ptr_indices, 2, "spread_ptr"); + const len_val = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), arr_info.length, 0); + const slice_val = self.buildStringSliceRT(arr_ptr, len_val); + try arg_vals.append(self.allocator, slice_val); + } else { + return self.emitError("spread operand not found"); + } + } else { + return self.emitError("spread operator requires a named variable"); + } + } else if (spread_ty.isSlice()) { + // Spread a slice: pass through as-is + const slice_val = try self.genExpr(spread_operand); + try arg_vals.append(self.allocator, slice_val); + } else { + return self.emitError("spread operator requires an array or slice"); + } + } else if (var_arg_count > 0) { + // Allocate array on stack: [N x elem_type] + const arr_ty = c.LLVMArrayType2(elem_llvm_ty, @intCast(var_arg_count)); + const arr_alloca = c.LLVMBuildAlloca(self.builder, arr_ty, "varargs_arr"); + // Store each variadic arg + for (0..var_arg_count) |vi_idx| { + const arg_val = if (elem_ty.isAny()) blk: { + // ..Any: wrap each arg in Any{tag, value} + const raw_val = try self.genExpr(call_node.args[fixed_count + vi_idx]); + const arg_ty = self.inferType(call_node.args[fixed_count + vi_idx]); + break :blk try self.buildAnyValue(raw_val, arg_ty); + } else try self.genExprAsType(call_node.args[fixed_count + vi_idx], elem_ty); + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + const idx_val = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(vi_idx), 0); + var indices = [_]c.LLVMValueRef{ zero, idx_val }; + const gep = c.LLVMBuildGEP2(self.builder, arr_ty, arr_alloca, &indices, 2, "vararg_elem"); + _ = c.LLVMBuildStore(self.builder, arg_val, gep); + } + // Build slice: {ptr, len} + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var ptr_indices = [_]c.LLVMValueRef{ zero, zero }; + const arr_ptr = c.LLVMBuildGEP2(self.builder, arr_ty, arr_alloca, &ptr_indices, 2, "varargs_ptr"); + const len_val = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), @intCast(var_arg_count), 0); + const slice_val = self.buildStringSliceRT(arr_ptr, len_val); + try arg_vals.append(self.allocator, slice_val); + } else { + // Zero variadic args: pass empty slice {null, 0} + const null_ptr = c.LLVMConstNull(c.LLVMPointerTypeInContext(self.context, 0)); + const zero_len = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + const slice_val = self.buildStringSliceRT(null_ptr, zero_len); + try arg_vals.append(self.allocator, slice_val); + } + } else { + // Normal (non-variadic) call + for (call_node.args, 0..) |arg, i| { + if (i < num_params) { + const param_ty = self.llvmTypeToSxType(param_llvm_types[i]); + try arg_vals.append(self.allocator, try self.genExprAsType(arg, param_ty)); + } else { + try arg_vals.append(self.allocator, try self.genExpr(arg)); + } + } + } + const args_slice = try arg_vals.toOwnedSlice(self.allocator); + + const ret_ty = c.LLVMGetReturnType(fn_type); + const call_name: [*c]const u8 = if (ret_ty == c.LLVMVoidTypeInContext(self.context)) "" else "calltmp"; + return c.LLVMBuildCall2( + self.builder, + fn_type, + callee_fn.?, + if (args_slice.len > 0) args_slice.ptr else null, + @intCast(args_slice.len), + call_name, + ); + } + + fn genGenericCall(self: *CodeGen, qualified_name: []const u8, template: GenericTemplate, call_node: ast.Call) !c.LLVMValueRef { + const fd = template.fd; + + // Check for runtime type dispatch: cast(runtime_type_var, any_val) as argument + if (self.current_match_tags) |match_tags| { + if (match_tags.len > 0) { + for (call_node.args) |arg| { + if (arg.data == .call) { + if (arg.data.call.callee.data == .identifier) { + const cast_name = arg.data.call.callee.data.identifier.name; + if (std.mem.eql(u8, cast_name, "cast") or std.mem.eql(u8, cast_name, "std.cast")) { + if (arg.data.call.args.len == 2) { + const type_arg = arg.data.call.args[0]; + // Check if first arg of cast is a runtime variable (not a type expression) + if (type_arg.data == .identifier) { + const name = type_arg.data.identifier.name; + // It's a runtime type if it's a named_value, not a type name + if (self.named_values.contains(name) and + Type.fromName(name) == null and + !self.struct_types.contains(name) and + !self.enum_types.contains(name) and + !self.union_types.contains(name) and + !self.type_aliases.contains(name)) + { + return self.genGenericCallWithRuntimeDispatch(template, call_node, match_tags); + } + } + } + } + } + } + } + } + } + + // Check for comptime value params + var has_comptime_values = false; + var comptime_nodes = std.StringHashMap(*Node).init(self.allocator); + for (fd.type_params) |tp| { + const constraint_name = if (tp.constraint.data == .type_expr) tp.constraint.data.type_expr.name else ""; + if (!std.mem.eql(u8, constraint_name, "Type")) { + // Value param — extract comptime value from call arg + has_comptime_values = true; + for (fd.params, 0..) |param, pi| { + if (std.mem.eql(u8, param.name, tp.name)) { + if (pi < call_node.args.len) { + try comptime_nodes.put(tp.name, @constCast(call_node.args[pi])); + } + break; + } + } + } + } + + // Normal generic call: Infer type bindings from arguments, widening across all args for the same type param + var bindings = std.StringHashMap(Type).init(self.allocator); + for (fd.params, 0..) |param, i| { + if (param.is_comptime) continue; + if (param.type_expr.data == .type_expr) { + const type_name = param.type_expr.data.type_expr.name; + // Check if this type name is a type parameter + for (fd.type_params) |tp| { + if (std.mem.eql(u8, tp.name, type_name)) { + if (i < call_node.args.len) { + const arg_ty = self.inferType(call_node.args[i]); + if (bindings.get(type_name)) |existing| { + // Widen to the broader type to avoid data loss + try bindings.put(type_name, Type.widen(existing, arg_ty)); + } else { + try bindings.put(type_name, arg_ty); + } + } + break; + } + } + } + } + + if (has_comptime_values) { + return self.genComptimeCall(qualified_name, fd, call_node, bindings, comptime_nodes); + } + + // Generate mangled name + const mangled = try self.mangleGenericName(fd.name, fd.type_params, bindings); + + // Check cache + const callee_fn = if (self.generic_instances.get(mangled)) |cached| + cached + else + try self.instantiateGeneric(fd, bindings, mangled); + + // Generate arguments with type conversion to match parameter types + self.type_param_bindings = bindings; + var arg_vals = std.ArrayList(c.LLVMValueRef).empty; + for (call_node.args, 0..) |arg, i| { + if (i < fd.params.len) { + const param_ty = self.resolveType(fd.params[i].type_expr); + try arg_vals.append(self.allocator, try self.genExprAsType(arg, param_ty)); + } else { + try arg_vals.append(self.allocator, try self.genExpr(arg)); + } + } + self.type_param_bindings = null; + const args_slice = try arg_vals.toOwnedSlice(self.allocator); + + const fn_type = c.LLVMGlobalGetValueType(callee_fn); + return c.LLVMBuildCall2( + self.builder, + fn_type, + callee_fn, + if (args_slice.len > 0) args_slice.ptr else null, + @intCast(args_slice.len), + "calltmp", + ); + } + + /// Generate a call to a generic function with comptime value parameters. + /// Instantiates the function with the specific comptime values, then delegates to genCallByName + /// with the mangled name and adjusted args (comptime args removed). + fn genComptimeCall( + self: *CodeGen, + qualified_name: []const u8, + fd: ast.FnDecl, + call_node: ast.Call, + type_bindings: std.StringHashMap(Type), + comptime_nodes: std.StringHashMap(*Node), + ) !c.LLVMValueRef { + // Build mangled name including comptime values (use qualified name for namespace) + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(self.allocator, qualified_name); + try buf.appendSlice(self.allocator, "__"); + for (fd.type_params, 0..) |tp, i| { + if (i > 0) try buf.append(self.allocator, '_'); + const constraint_name = if (tp.constraint.data == .type_expr) tp.constraint.data.type_expr.name else ""; + if (std.mem.eql(u8, constraint_name, "Type")) { + // Type param — include type name + if (type_bindings.get(tp.name)) |ty| { + const name = try ty.displayName(self.allocator); + try buf.appendSlice(self.allocator, name); + } + } else { + // Value param — include hash of value + if (comptime_nodes.get(tp.name)) |node| { + if (node.data == .string_literal) { + const hash = std.hash.Wyhash.hash(0, node.data.string_literal.raw); + var hash_buf: [16]u8 = undefined; + const hash_str = std.fmt.bufPrint(&hash_buf, "{x}", .{hash}) catch "0"; + try buf.appendSlice(self.allocator, hash_str); + } else if (node.data == .int_literal) { + var int_buf: [20]u8 = undefined; + const int_str = std.fmt.bufPrint(&int_buf, "{d}", .{node.data.int_literal.value}) catch "0"; + try buf.appendSlice(self.allocator, int_str); + } + } + } + } + const mangled = try buf.toOwnedSlice(self.allocator); + + // Instantiate if not cached + if (!self.generic_instances.contains(mangled)) { + // Set comptime param nodes for #insert substitution + const saved_comptime_nodes = self.comptime_param_nodes; + self.comptime_param_nodes = comptime_nodes; + defer self.comptime_param_nodes = saved_comptime_nodes; + + // Set namespace context if the qualified name is namespaced (e.g. "std.print") + const saved_namespace = self.current_namespace; + if (std.mem.indexOfScalar(u8, qualified_name, '.')) |dot_pos| { + self.current_namespace = qualified_name[0..dot_pos]; + } + defer self.current_namespace = saved_namespace; + + _ = try self.instantiateGeneric(fd, type_bindings, mangled); + + // Register variadic info for the mangled function (adjusted for removed comptime params) + var comptime_before_variadic: u32 = 0; + for (fd.params) |param| { + if (param.is_variadic) break; + if (param.is_comptime) comptime_before_variadic += 1; + } + for (fd.params, 0..) |param, i| { + if (param.is_variadic) { + const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32"; + try self.variadic_functions.put(mangled, .{ + .fixed_param_count = @intCast(i - comptime_before_variadic), + .element_type_name = elem_name, + }); + break; + } + } + } + + // Build adjusted call args (skip comptime args) + var adjusted_args = std.ArrayList(*Node).empty; + for (call_node.args, 0..) |arg, i| { + if (i < fd.params.len and fd.params[i].is_comptime) continue; + try adjusted_args.append(self.allocator, @constCast(arg)); + } + const adjusted_args_slice = try adjusted_args.toOwnedSlice(self.allocator); + + const adjusted_call = ast.Call{ + .callee = call_node.callee, + .args = adjusted_args_slice, + }; + + // Call the instantiated function through normal path (handles variadic packing etc.) + return self.genCallByName(mangled, adjusted_call); + } + + /// Generate a generic function call with runtime type dispatch. + /// For each type tag in match_tags, monomorphize the generic function and dispatch via switch. + fn genGenericCallWithRuntimeDispatch( + self: *CodeGen, + template: GenericTemplate, + call_node: ast.Call, + match_tags: []const u64, + ) !c.LLVMValueRef { + const fd = template.fd; + + // Find the cast argument and extract the runtime type tag + any value source + var cast_arg_idx: usize = 0; + var type_tag_node: *Node = undefined; + var any_val_node: *Node = undefined; + for (call_node.args, 0..) |arg, i| { + if (arg.data == .call and arg.data.call.callee.data == .identifier) { + const name = arg.data.call.callee.data.identifier.name; + if ((std.mem.eql(u8, name, "cast") or std.mem.eql(u8, name, "std.cast")) and arg.data.call.args.len == 2) { + cast_arg_idx = i; + type_tag_node = arg.data.call.args[0]; + any_val_node = arg.data.call.args[1]; + break; + } + } + } + + // Generate the runtime type tag value and the Any value + const type_tag_val = try self.genExpr(type_tag_node); + const any_val = try self.genExpr(any_val_node); + // Generate non-cast arguments (evaluated once, before the switch) + var other_arg_vals = std.ArrayList(?c.LLVMValueRef).empty; + for (call_node.args, 0..) |arg, i| { + if (i == cast_arg_idx) { + try other_arg_vals.append(self.allocator, null); // placeholder + } else { + try other_arg_vals.append(self.allocator, try self.genExpr(arg)); + } + } + + // Extract Any value i64 BEFORE the switch (switch is a terminator, nothing can follow it in the same BB) + const any_i64 = c.LLVMBuildExtractValue(self.builder, any_val, 1, "any_payload"); + + // Build dispatch switch + const function = self.current_function; + const dispatch_merge = c.LLVMAppendBasicBlockInContext(self.context, function, "dispatch_merge"); + const dispatch_default = c.LLVMAppendBasicBlockInContext(self.context, function, "dispatch_default"); + const sw = c.LLVMBuildSwitch(self.builder, type_tag_val, dispatch_default, @intCast(match_tags.len)); + + // Determine return type from function signature + const ret_ty = if (fd.return_type) |rt| self.resolveType(rt) else Type.void_type; + // We'll use the first monomorphized function's return to determine LLVM type + var result_llvm_ty: c.LLVMTypeRef = null; + + var phi_vals = std.ArrayList(c.LLVMValueRef).empty; + var phi_bbs = std.ArrayList(c.LLVMBasicBlockRef).empty; + + for (match_tags) |tag| { + // Find the AnyTypeEntry for this tag + var entry_type: ?Type = null; + var it = self.any_type_entries.iterator(); + while (it.next()) |entry| { + if (entry.value_ptr.tag_id == tag) { + entry_type = entry.value_ptr.sx_type; + break; + } + } + const sx_type = entry_type orelse continue; + + // Create case BB + const case_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "dispatch_case"); + c.LLVMAddCase(sw, c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), tag, 0), case_bb); + c.LLVMPositionBuilderAtEnd(self.builder, case_bb); + + // Convert Any payload to the concrete type + const concrete_val = try self.extractAnyToConcreteType(any_i64, sx_type); + + // Monomorphize the generic function with this type + var bindings = std.StringHashMap(Type).init(self.allocator); + // Bind the type parameter from the cast argument's position + if (cast_arg_idx < fd.params.len) { + if (fd.params[cast_arg_idx].type_expr.data == .type_expr) { + const tp_name = fd.params[cast_arg_idx].type_expr.data.type_expr.name; + for (fd.type_params) |tp| { + if (std.mem.eql(u8, tp.name, tp_name)) { + try bindings.put(tp.name, sx_type); + break; + } + } + } + } + + const mangled = try self.mangleGenericName(fd.name, fd.type_params, bindings); + const callee_fn = if (self.generic_instances.get(mangled)) |cached| + cached + else + try self.instantiateGeneric(fd, bindings, mangled); + + // Build argument list + self.type_param_bindings = bindings; + var arg_vals_list = std.ArrayList(c.LLVMValueRef).empty; + for (other_arg_vals.items, 0..) |maybe_val, ai| { + if (ai == cast_arg_idx) { + // Use the converted concrete value + try arg_vals_list.append(self.allocator, concrete_val); + } else if (maybe_val) |v| { + try arg_vals_list.append(self.allocator, v); + } + } + self.type_param_bindings = null; + + const args_slice = try arg_vals_list.toOwnedSlice(self.allocator); + const fn_type = c.LLVMGlobalGetValueType(callee_fn); + const call_result = c.LLVMBuildCall2( + self.builder, + fn_type, + callee_fn, + if (args_slice.len > 0) args_slice.ptr else null, + @intCast(args_slice.len), + if (ret_ty != .void_type) "dispatch_result" else "", + ); + + if (result_llvm_ty == null and ret_ty != .void_type) { + result_llvm_ty = c.LLVMTypeOf(call_result); + } + + if (ret_ty != .void_type) { + try phi_vals.append(self.allocator, call_result); + try phi_bbs.append(self.allocator, c.LLVMGetInsertBlock(self.builder)); + } + _ = c.LLVMBuildBr(self.builder, dispatch_merge); + } + + // Default case: return undef (should not be reached) + c.LLVMPositionBuilderAtEnd(self.builder, dispatch_default); + if (ret_ty != .void_type and result_llvm_ty != null) { + try phi_vals.append(self.allocator, c.LLVMGetUndef(result_llvm_ty.?)); + try phi_bbs.append(self.allocator, dispatch_default); + } + _ = c.LLVMBuildBr(self.builder, dispatch_merge); + + // Merge + c.LLVMPositionBuilderAtEnd(self.builder, dispatch_merge); + if (ret_ty != .void_type and result_llvm_ty != null) { + const vals_slice = try phi_vals.toOwnedSlice(self.allocator); + const bbs_slice = try phi_bbs.toOwnedSlice(self.allocator); + const phi = c.LLVMBuildPhi(self.builder, result_llvm_ty.?, "dispatch_phi"); + c.LLVMAddIncoming(phi, vals_slice.ptr, bbs_slice.ptr, @intCast(vals_slice.len)); + return phi; + } + + return null; + } + + /// Extract a concrete typed value from an Any i64 payload. + fn extractAnyToConcreteType(self: *CodeGen, any_i64: c.LLVMValueRef, sx_type: Type) !c.LLVMValueRef { + return switch (sx_type) { + .boolean => c.LLVMBuildTrunc(self.builder, any_i64, c.LLVMInt1TypeInContext(self.context), "any_to_bool"), + .signed => |w| if (w <= 32) + c.LLVMBuildTrunc(self.builder, any_i64, c.LLVMIntTypeInContext(self.context, w), "any_to_int") + else + any_i64, + .unsigned => |w| if (w <= 32) + c.LLVMBuildTrunc(self.builder, any_i64, c.LLVMIntTypeInContext(self.context, w), "any_to_uint") + else + any_i64, + .f32 => blk: { + const as_f64 = c.LLVMBuildBitCast(self.builder, any_i64, c.LLVMDoubleTypeInContext(self.context), "i64_to_f64"); + break :blk c.LLVMBuildFPTrunc(self.builder, as_f64, c.LLVMFloatTypeInContext(self.context), "any_to_f32"); + }, + .f64 => c.LLVMBuildBitCast(self.builder, any_i64, c.LLVMDoubleTypeInContext(self.context), "any_to_f64"), + .string_type => blk: { + const ptr = c.LLVMBuildIntToPtr(self.builder, any_i64, c.LLVMPointerTypeInContext(self.context, 0), "any_to_str_ptr"); + break :blk c.LLVMBuildLoad2(self.builder, self.getStringStructType(), ptr, "any_to_str"); + }, + .struct_type => |sname| blk: { + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct '{s}'", .{sname}); + const ptr = c.LLVMBuildIntToPtr(self.builder, any_i64, c.LLVMPointerTypeInContext(self.context, 0), "any_to_struct_ptr"); + break :blk c.LLVMBuildLoad2(self.builder, info.llvm_type, ptr, "any_to_struct"); + }, + .enum_type => c.LLVMBuildTrunc(self.builder, any_i64, c.LLVMInt32TypeInContext(self.context), "any_to_enum"), + .union_type => |uname| blk: { + const info = self.union_types.get(uname) orelse return self.emitErrorFmt("unknown union '{s}'", .{uname}); + const ptr = c.LLVMBuildIntToPtr(self.builder, any_i64, c.LLVMPointerTypeInContext(self.context, 0), "any_to_union_ptr"); + break :blk c.LLVMBuildLoad2(self.builder, info.llvm_type, ptr, "any_to_union"); + }, + .vector_type, .array_type => blk: { + const llvm_ty = self.typeToLLVM(sx_type); + const ptr = c.LLVMBuildIntToPtr(self.builder, any_i64, c.LLVMPointerTypeInContext(self.context, 0), "any_to_vec_ptr"); + break :blk c.LLVMBuildLoad2(self.builder, llvm_ty, ptr, "any_to_vec"); + }, + else => c.LLVMBuildTrunc(self.builder, any_i64, c.LLVMInt32TypeInContext(self.context), "any_to_default"), + }; + } + + fn mangleGenericName(self: *CodeGen, base: []const u8, type_params: []const ast.StructTypeParam, bindings: std.StringHashMap(Type)) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(self.allocator, base); + try buf.appendSlice(self.allocator, "__"); + for (type_params, 0..) |tp, i| { + if (i > 0) try buf.append(self.allocator, '_'); + if (bindings.get(tp.name)) |ty| { + const name = try ty.displayName(self.allocator); + try buf.appendSlice(self.allocator, name); + } + } + return try buf.toOwnedSlice(self.allocator); + } + + fn instantiateGeneric(self: *CodeGen, fd: ast.FnDecl, bindings: std.StringHashMap(Type), mangled: []const u8) !c.LLVMValueRef { + // Save current codegen state + const saved_function = self.current_function; + const saved_return_type = self.current_return_type; + const saved_insert_bb = c.LLVMGetInsertBlock(self.builder); + + // Save named_values + var saved_named_values = std.StringHashMap(NamedValue).init(self.allocator); + var nv_iter = self.named_values.iterator(); + while (nv_iter.next()) |entry| { + try saved_named_values.put(entry.key_ptr.*, entry.value_ptr.*); + } + + // Save scope_saves and defer_stack — generic body must not pollute caller's scope tracking + const saved_scope_saves = self.scope_saves; + const saved_defer_stack = self.defer_stack; + self.scope_saves = std.ArrayList(std.ArrayList(ScopeEntry)).empty; + self.defer_stack = std.ArrayList(std.ArrayList(*Node)).empty; + + // Set type param bindings + self.type_param_bindings = bindings; + defer self.type_param_bindings = null; + + // Build the specialized function type + const fn_type = try self.buildFnType(fd.params, fd.return_type, mangled); + const mangled_z = try self.allocator.dupeZ(u8, mangled); + const function = c.LLVMAddFunction(self.module, mangled_z.ptr, fn_type); + + // Cache before generating body (in case of recursion) + try self.generic_instances.put(mangled, function); + + // Generate body + self.named_values.clearRetainingCapacity(); + self.current_function = function; + + const entry_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + c.LLVMPositionBuilderAtEnd(self.builder, entry_bb); + + // Create allocas for parameters + var llvm_param_idx: u32 = 0; + for (fd.params) |param| { + if (param.is_comptime) { + // Comptime param: create a constant in named_values from the call-site value + if (self.comptime_param_nodes) |cpn| { + if (cpn.get(param.name)) |node| { + if (node.data == .string_literal) { + const raw = node.data.string_literal.raw; + const inner = if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"') + raw[1 .. raw.len - 1] + else + raw; + const unescaped = try unescapeString(self.allocator, inner); + const str_val = self.buildConstStr(unescaped); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, self.getStringStructType(), param_name_z.ptr); + _ = c.LLVMBuildStore(self.builder, str_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = .string_type }); + } else if (node.data == .int_literal) { + const ct_sx_ty = self.resolveType(param.type_expr); + const ct_llvm_ty = self.typeToLLVM(ct_sx_ty); + const const_val = c.LLVMConstInt(ct_llvm_ty, @bitCast(node.data.int_literal.value), 0); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, ct_llvm_ty, param_name_z.ptr); + _ = c.LLVMBuildStore(self.builder, const_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = ct_sx_ty }); + } + } + } + continue; + } + // Variadic params: use slice_type (same as genFnBodyAs) + const sx_ty = if (param.is_variadic) blk: { + const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32"; + break :blk Type{ .slice_type = .{ .element_name = elem_name } }; + } else self.resolveType(param.type_expr); + const llvm_ty = self.typeToLLVM(sx_ty); + const param_name_z = try self.allocator.dupeZ(u8, param.name); + const alloca = c.LLVMBuildAlloca(self.builder, llvm_ty, param_name_z.ptr); + const param_val = c.LLVMGetParam(function, llvm_param_idx); + _ = c.LLVMBuildStore(self.builder, param_val, alloca); + try self.named_values.put(param.name, .{ .ptr = alloca, .ty = sx_ty }); + llvm_param_idx += 1; + } + + // Generate body statements + const body = fd.body; + if (body.data != .block) return self.emitError("generic function body must be a block"); + + const ret_sx_type = self.resolveType(fd.return_type); + self.current_return_type = ret_sx_type; + + var last_val: c.LLVMValueRef = null; + for (body.data.block.stmts) |stmt| { + last_val = try self.genStmt(stmt); + } + + // Emit return if current block has no terminator + const current_bb = c.LLVMGetInsertBlock(self.builder); + if (c.LLVMGetBasicBlockTerminator(current_bb) == null) { + if (ret_sx_type == .void_type) { + _ = c.LLVMBuildRetVoid(self.builder); + } else if (last_val) |val| { + if (ret_sx_type.isStruct()) { + const sname = ret_sx_type.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const loaded = c.LLVMBuildLoad2(self.builder, info.llvm_type, val, "retval"); + _ = c.LLVMBuildRet(self.builder, loaded); + } else { + const src_ty = self.llvmTypeToSxType(c.LLVMTypeOf(val)); + const converted = self.convertValue(val, src_ty, ret_sx_type); + _ = c.LLVMBuildRet(self.builder, converted); + } + } else { + _ = c.LLVMBuildUnreachable(self.builder); + } + } + + // Restore codegen state + self.current_function = saved_function; + self.current_return_type = saved_return_type; + if (saved_insert_bb) |bb| { + c.LLVMPositionBuilderAtEnd(self.builder, bb); + } + self.named_values.clearRetainingCapacity(); + var restore_iter = saved_named_values.iterator(); + while (restore_iter.next()) |entry| { + try self.named_values.put(entry.key_ptr.*, entry.value_ptr.*); + } + saved_named_values.deinit(); + + // Restore scope_saves and defer_stack + self.scope_saves = saved_scope_saves; + self.defer_stack = saved_defer_stack; + + return function; + } + + fn genIfExpr(self: *CodeGen, if_expr: ast.IfExpr) !c.LLVMValueRef { + // Generate condition + var cond_val = try self.genExpr(if_expr.condition); + + // Ensure condition is i1 (bool) + const cond_type = c.LLVMTypeOf(cond_val); + const i1_type = c.LLVMInt1TypeInContext(self.context); + if (cond_type != i1_type) { + cond_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, cond_val, c.LLVMConstInt(cond_type, 0, 0), "tobool"); + } + + const function = self.current_function; + const has_else = if_expr.else_branch != null; + + var then_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "then"); + var else_bb: c.LLVMBasicBlockRef = if (has_else) + c.LLVMAppendBasicBlockInContext(self.context, function, "else") + else + null; + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "merge"); + + const false_dest = if (has_else) else_bb else merge_bb; + _ = c.LLVMBuildCondBr(self.builder, cond_val, then_bb, false_dest); + + // Then branch + c.LLVMPositionBuilderAtEnd(self.builder, then_bb); + const then_val = try self.genExpr(if_expr.then_branch); + then_bb = c.LLVMGetInsertBlock(self.builder); // may have changed due to nested control flow + _ = c.LLVMBuildBr(self.builder, merge_bb); + + // Else branch + var else_val: c.LLVMValueRef = null; + if (if_expr.else_branch) |else_branch| { + c.LLVMPositionBuilderAtEnd(self.builder, else_bb); + else_val = try self.genExpr(else_branch); + else_bb = c.LLVMGetInsertBlock(self.builder); + _ = c.LLVMBuildBr(self.builder, merge_bb); + } + + // Merge block + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + + // PHI node if both branches produced values + if (then_val != null and else_val != null) { + const phi = c.LLVMBuildPhi(self.builder, c.LLVMTypeOf(then_val), "iftmp"); + var vals = [2]c.LLVMValueRef{ then_val, else_val }; + var blocks = [2]c.LLVMBasicBlockRef{ then_bb, else_bb }; + c.LLVMAddIncoming(phi, &vals, &blocks, 2); + return phi; + } + + return null; + } + + fn genWhileExpr(self: *CodeGen, while_expr: ast.WhileExpr) !c.LLVMValueRef { + const function = self.current_function; + + // Create basic blocks: condition, body, after + const cond_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "while.cond"); + const body_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "while.body"); + const after_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "while.after"); + + // Branch from current block to condition check + _ = c.LLVMBuildBr(self.builder, cond_bb); + + // Condition block + c.LLVMPositionBuilderAtEnd(self.builder, cond_bb); + var cond_val = try self.genExpr(while_expr.condition); + + // Ensure condition is i1 (bool) + const cond_type = c.LLVMTypeOf(cond_val); + const i1_type = c.LLVMInt1TypeInContext(self.context); + if (cond_type != i1_type) { + cond_val = c.LLVMBuildICmp(self.builder, c.LLVMIntNE, cond_val, c.LLVMConstInt(cond_type, 0, 0), "tobool"); + } + + _ = c.LLVMBuildCondBr(self.builder, cond_val, body_bb, after_bb); + + // Body block — save and set loop context for break/continue + c.LLVMPositionBuilderAtEnd(self.builder, body_bb); + const saved_break_bb = self.loop_break_bb; + const saved_continue_bb = self.loop_continue_bb; + self.loop_break_bb = after_bb; + self.loop_continue_bb = cond_bb; + + _ = try self.genExpr(while_expr.body); + + // Restore loop context + self.loop_break_bb = saved_break_bb; + self.loop_continue_bb = saved_continue_bb; + + // Branch back to condition (if not already terminated by break/return) + const current_bb = c.LLVMGetInsertBlock(self.builder); + if (c.LLVMGetBasicBlockTerminator(current_bb) == null) { + _ = c.LLVMBuildBr(self.builder, cond_bb); + } + + // Position at after block + c.LLVMPositionBuilderAtEnd(self.builder, after_bb); + + return null; + } + + fn genForExpr(self: *CodeGen, for_expr: ast.ForExpr) !c.LLVMValueRef { + const function = self.current_function; + const i32_type = c.LLVMInt32TypeInContext(self.context); + + // Determine iterable type and get length + element access info + const iter_ty = self.inferType(for_expr.iterable); + var len_val: c.LLVMValueRef = undefined; + var elem_ty: Type = Type.s(32); + var iter_ptr: c.LLVMValueRef = undefined; // pointer to data + var is_slice = false; + + if (iter_ty.isSlice()) { + is_slice = true; + const info = iter_ty.slice_type; + elem_ty = Type.fromName(info.element_name) orelse Type.s(32); + // Load slice value from alloca + if (for_expr.iterable.data == .identifier) { + if (self.named_values.get(for_expr.iterable.data.identifier.name)) |entry| { + const slice_val = c.LLVMBuildLoad2(self.builder, self.getStringStructType(), entry.ptr, "for_slice"); + iter_ptr = c.LLVMBuildExtractValue(self.builder, slice_val, 0, "for_ptr"); + len_val = c.LLVMBuildExtractValue(self.builder, slice_val, 1, "for_len"); + } else return self.emitError("for: iterable not found"); + } else return self.emitError("for: slice iterable must be a variable"); + } else if (iter_ty.isArray()) { + const info = iter_ty.array_type; + elem_ty = Type.fromName(info.element_name) orelse Type.s(32); + len_val = c.LLVMConstInt(i32_type, info.length, 0); + // Get pointer to array + if (for_expr.iterable.data == .identifier) { + if (self.named_values.get(for_expr.iterable.data.identifier.name)) |entry| { + iter_ptr = entry.ptr; + } else return self.emitError("for: iterable not found"); + } else return self.emitError("for: array iterable must be a variable"); + } else { + return self.emitError("for loop requires a slice or array iterable"); + } + + const elem_llvm_ty = self.typeToLLVM(elem_ty); + + // Allocate it_index (s32) and it (element type) + const idx_alloca = c.LLVMBuildAlloca(self.builder, i32_type, "it_index"); + _ = c.LLVMBuildStore(self.builder, c.LLVMConstInt(i32_type, 0, 0), idx_alloca); + const it_alloca = c.LLVMBuildAlloca(self.builder, elem_llvm_ty, "it"); + + // Push scope and bind it, it_index + try self.pushScope(); + try self.named_values.put("it", .{ .ptr = it_alloca, .ty = elem_ty }); + try self.named_values.put("it_index", .{ .ptr = idx_alloca, .ty = Type.s(32) }); + + // Create basic blocks + const cond_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "for.cond"); + const body_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "for.body"); + const after_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "for.after"); + + _ = c.LLVMBuildBr(self.builder, cond_bb); + + // Condition: it_index < len + c.LLVMPositionBuilderAtEnd(self.builder, cond_bb); + const cur_idx = c.LLVMBuildLoad2(self.builder, i32_type, idx_alloca, "cur_idx"); + const cond_val = c.LLVMBuildICmp(self.builder, c.LLVMIntSLT, cur_idx, len_val, "for_cond"); + _ = c.LLVMBuildCondBr(self.builder, cond_val, body_bb, after_bb); + + // Body: load it = iterable[it_index], then execute body + c.LLVMPositionBuilderAtEnd(self.builder, body_bb); + const body_idx = c.LLVMBuildLoad2(self.builder, i32_type, idx_alloca, "body_idx"); + + if (is_slice) { + // Slice: GEP through data pointer + var gep_indices = [_]c.LLVMValueRef{body_idx}; + const gep = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, iter_ptr, &gep_indices, 1, "for_elem"); + const elem_val = c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "it_val"); + _ = c.LLVMBuildStore(self.builder, elem_val, it_alloca); + } else { + // Array: GEP with [0, idx] + const arr_llvm_ty = self.typeToLLVM(iter_ty); + const zero = c.LLVMConstInt(i32_type, 0, 0); + var indices = [_]c.LLVMValueRef{ zero, body_idx }; + const gep = c.LLVMBuildGEP2(self.builder, arr_llvm_ty, iter_ptr, &indices, 2, "for_elem"); + const elem_val = c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "it_val"); + _ = c.LLVMBuildStore(self.builder, elem_val, it_alloca); + } + + // Save and set loop context for break/continue + const saved_break_bb = self.loop_break_bb; + const saved_continue_bb = self.loop_continue_bb; + self.loop_break_bb = after_bb; + self.loop_continue_bb = cond_bb; + + _ = try self.genExpr(for_expr.body); + + self.loop_break_bb = saved_break_bb; + self.loop_continue_bb = saved_continue_bb; + + // Increment it_index + const current_bb = c.LLVMGetInsertBlock(self.builder); + if (c.LLVMGetBasicBlockTerminator(current_bb) == null) { + const inc_idx = c.LLVMBuildLoad2(self.builder, i32_type, idx_alloca, "inc_idx"); + const next_idx = c.LLVMBuildAdd(self.builder, inc_idx, c.LLVMConstInt(i32_type, 1, 0), "next_idx"); + _ = c.LLVMBuildStore(self.builder, next_idx, idx_alloca); + _ = c.LLVMBuildBr(self.builder, cond_bb); + } + + c.LLVMPositionBuilderAtEnd(self.builder, after_bb); + + try self.popScope(); + + return null; + } + + fn genEnumLiteral(self: *CodeGen, variant_name: []const u8, enum_type_name: []const u8) c.LLVMValueRef { + const i32_type = c.LLVMInt32TypeInContext(self.context); + const variants = self.enum_types.get(enum_type_name) orelse return c.LLVMConstInt(i32_type, 0, 0); + for (variants, 0..) |v, i| { + if (std.mem.eql(u8, v, variant_name)) { + return c.LLVMConstInt(i32_type, @intCast(i), 0); + } + } + return c.LLVMConstInt(i32_type, 0, 0); + } + + fn lookupVariantIndex(variants: ?[]const []const u8, name: []const u8) u64 { + if (variants) |vs| { + for (vs, 0..) |v, i| { + if (std.mem.eql(u8, v, name)) return i; + } + } + return 0; + } + + fn genMatchExpr(self: *CodeGen, match: ast.MatchExpr) !c.LLVMValueRef { + // Determine subject type for enum vs union dispatch + var enum_name: ?[]const u8 = null; + var union_name: ?[]const u8 = null; + if (match.subject.data == .identifier) { + if (self.named_values.get(match.subject.data.identifier.name)) |entry| { + if (entry.ty.isEnum()) enum_name = entry.ty.enum_type; + if (entry.ty.isUnion()) union_name = entry.ty.union_type; + } + } + + // Get the switch value: for unions, load the tag from field 0; for enums, use the value directly + const subject_val: c.LLVMValueRef = if (union_name != null) blk: { + // Union: load tag from field 0 of the alloca + const entry = self.named_values.get(match.subject.data.identifier.name).?; + const info = self.union_types.get(union_name.?).?; + const tag_gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, entry.ptr, 0, "tag"); + break :blk c.LLVMBuildLoad2(self.builder, c.LLVMInt32TypeInContext(self.context), tag_gep, "tag_val"); + } else try self.genExpr(match.subject); + + const variants: ?[]const []const u8 = if (union_name) |un| + (if (self.union_types.get(un)) |info| info.variant_names else null) + else if (enum_name) |en| + self.enum_types.get(en) + else + null; + + const function = self.current_function; + const i32_type = c.LLVMInt32TypeInContext(self.context); + const merge_bb = c.LLVMAppendBasicBlockInContext(self.context, function, "match_end"); + + // Create case basic blocks + var case_bbs = std.ArrayList(c.LLVMBasicBlockRef).empty; + for (match.arms) |_| { + try case_bbs.append(self.allocator, c.LLVMAppendBasicBlockInContext(self.context, function, "case")); + } + + // Find else arm (null pattern) — use its BB as the switch default + var else_arm_idx: ?usize = null; + for (match.arms, 0..) |arm, i| { + if (arm.pattern == null) { + else_arm_idx = i; + break; + } + } + const default_bb = if (else_arm_idx) |idx| + case_bbs.items[idx] + else + c.LLVMAppendBasicBlockInContext(self.context, function, "match_default"); + + // Build switch instruction + const sw = c.LLVMBuildSwitch(self.builder, subject_val, default_bb, @intCast(match.arms.len)); + for (match.arms, 0..) |arm, i| { + const pat = arm.pattern orelse continue; // skip else arm + if (pat.data == .enum_literal) { + const idx = lookupVariantIndex(variants, pat.data.enum_literal.name); + const case_val = c.LLVMConstInt(i32_type, idx, 0); + c.LLVMAddCase(sw, case_val, case_bbs.items[i]); + } else if (pat.data == .type_expr) { + // Type-match: resolve type name to Any tag value(s) + const tag_values = try self.resolveTypeMatchTags(pat.data.type_expr.name); + for (tag_values) |tag| { + c.LLVMAddCase(sw, c.LLVMConstInt(i32_type, tag, 0), case_bbs.items[i]); + } + } else if (pat.data == .identifier) { + // Named type (struct/enum/union name) or category (int/float) + const tag_values = try self.resolveTypeMatchTags(pat.data.identifier.name); + for (tag_values) |tag| { + c.LLVMAddCase(sw, c.LLVMConstInt(i32_type, tag, 0), case_bbs.items[i]); + } + } + } + + // Generate arm bodies and collect PHI info + var phi_vals = std.ArrayList(c.LLVMValueRef).empty; + var phi_bbs = std.ArrayList(c.LLVMBasicBlockRef).empty; + var has_value = false; + var value_type: c.LLVMTypeRef = null; + + // Pre-collect tag values for each arm (for runtime dispatch context) + var arm_tag_values = std.ArrayList([]const u64).empty; + for (match.arms) |arm| { + const tag_values: []const u64 = if (arm.pattern) |pat| blk: { + break :blk if (pat.data == .type_expr) + try self.resolveTypeMatchTags(pat.data.type_expr.name) + else if (pat.data == .identifier) + try self.resolveTypeMatchTags(pat.data.identifier.name) + else + &.{}; + } else &.{}; + try arm_tag_values.append(self.allocator, tag_values); + } + + for (match.arms, 0..) |arm, i| { + c.LLVMPositionBuilderAtEnd(self.builder, case_bbs.items[i]); + if (arm.is_break) { + _ = c.LLVMBuildBr(self.builder, merge_bb); + } else if (arm.pattern != null and arm_tag_values.items[i].len == 0 and + (arm.pattern.?.data == .identifier or arm.pattern.?.data == .type_expr)) + { + // Category/type arm with no matching types — BB is unreachable, skip body + _ = c.LLVMBuildBr(self.builder, merge_bb); + } else { + // Set match arm context for runtime type dispatch + const saved_match_tags = self.current_match_tags; + self.current_match_tags = arm_tag_values.items[i]; + const val = try self.genExpr(arm.body); + self.current_match_tags = saved_match_tags; + const bb = c.LLVMGetInsertBlock(self.builder); + _ = c.LLVMBuildBr(self.builder, merge_bb); + if (val != null and c.LLVMGetTypeKind(c.LLVMTypeOf(val)) != c.LLVMVoidTypeKind) { + has_value = true; + if (value_type == null) value_type = c.LLVMTypeOf(val); + try phi_vals.append(self.allocator, val); + try phi_bbs.append(self.allocator, bb); + } + } + } + + // Default block branches to merge (only if no else arm — else arm's body already generated above) + if (else_arm_idx == null) { + c.LLVMPositionBuilderAtEnd(self.builder, default_bb); + _ = c.LLVMBuildBr(self.builder, merge_bb); + } + + // Merge block + c.LLVMPositionBuilderAtEnd(self.builder, merge_bb); + + if (has_value and value_type != null) { + const undef_val = c.LLVMGetUndef(value_type); + // Add undef entries for break arms and default block + for (match.arms, 0..) |arm, i| { + if (arm.is_break) { + try phi_vals.append(self.allocator, undef_val); + try phi_bbs.append(self.allocator, case_bbs.items[i]); + } + } + if (else_arm_idx == null) { + try phi_vals.append(self.allocator, undef_val); + try phi_bbs.append(self.allocator, default_bb); + } + + const vals_slice = try phi_vals.toOwnedSlice(self.allocator); + const bbs_slice = try phi_bbs.toOwnedSlice(self.allocator); + const phi = c.LLVMBuildPhi(self.builder, value_type, "matchtmp"); + c.LLVMAddIncoming(phi, vals_slice.ptr, bbs_slice.ptr, @intCast(vals_slice.len)); + return phi; + } + + return null; + } + + /// Resolve a type name to one or more Any tag values for type-switch matching. + /// Categories: "int" matches s32+s64, "float" matches f32+f64. + /// Specific types: "s32", "f64", "string", "bool", "Type". + /// Named types: struct/enum/union names get dynamic IDs. + fn resolveTypeMatchTags(self: *CodeGen, name: []const u8) ![]const u64 { + // Category aliases + if (std.mem.eql(u8, name, "int")) { + const tags = try self.allocator.alloc(u64, 2); + tags[0] = ANY_TAG_S32; + tags[1] = ANY_TAG_S64; + return tags; + } + if (std.mem.eql(u8, name, "float")) { + const tags = try self.allocator.alloc(u64, 2); + tags[0] = ANY_TAG_F32; + tags[1] = ANY_TAG_F64; + return tags; + } + // Type category aliases: "struct", "enum", "union", "vector", "array", "slice" + const category: ?TypeCategory = if (std.mem.eql(u8, name, "struct")) + .struct_cat + else if (std.mem.eql(u8, name, "enum")) + .enum_cat + else if (std.mem.eql(u8, name, "union")) + .union_cat + else if (std.mem.eql(u8, name, "vector")) + .vector_cat + else if (std.mem.eql(u8, name, "array")) + .array_cat + else if (std.mem.eql(u8, name, "slice")) + .slice_cat + else + null; + if (category) |cat| { + var tag_list = std.ArrayList(u64).empty; + var it = self.any_type_entries.iterator(); + while (it.next()) |entry| { + if (entry.value_ptr.category == cat) { + try tag_list.append(self.allocator, entry.value_ptr.tag_id); + } + } + if (tag_list.items.len > 0) { + return try tag_list.toOwnedSlice(self.allocator); + } + // No types registered for this category — return empty slice + return &.{}; + } + // Specific builtin types + const single_tag: ?u64 = if (std.mem.eql(u8, name, "bool")) + ANY_TAG_BOOL + else if (std.mem.eql(u8, name, "s32")) + ANY_TAG_S32 + else if (std.mem.eql(u8, name, "s64")) + ANY_TAG_S64 + else if (std.mem.eql(u8, name, "f32")) + ANY_TAG_F32 + else if (std.mem.eql(u8, name, "f64")) + ANY_TAG_F64 + else if (std.mem.eql(u8, name, "string")) + ANY_TAG_STRING + else if (std.mem.eql(u8, name, "Type")) + ANY_TAG_TYPE + else if (std.mem.eql(u8, name, "void")) + ANY_TAG_VOID + else + null; + if (single_tag) |t| { + const tags = try self.allocator.alloc(u64, 1); + tags[0] = t; + return tags; + } + // Named type (struct/enum/union) — get dynamic ID + const sx_type: Type = if (self.struct_types.contains(name)) + .{ .struct_type = name } + else if (self.enum_types.contains(name)) + .{ .enum_type = name } + else if (self.union_types.contains(name)) + .{ .union_type = name } + else + .{ .struct_type = name }; // fallback + const id = try self.getAnyTypeId(name, sx_type); + const tags = try self.allocator.alloc(u64, 1); + tags[0] = id; + return tags; + } + + /// Resolve a callee node to a function name string for type inference. + /// Handles identifiers, namespaced calls, and intra-namespace fallback. + fn resolveCalleeName(self: *CodeGen, call_node: ast.Call) ?[]const u8 { + if (call_node.callee.data == .identifier) { + return call_node.callee.data.identifier.name; + } + if (call_node.callee.data == .field_access) { + const fa = call_node.callee.data.field_access; + if (fa.object.data == .identifier) { + if (self.namespaces.contains(fa.object.data.identifier.name)) { + return std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ fa.object.data.identifier.name, fa.field }) catch return null; + } + } + } + return null; + } + + /// Resolve a builtin parameterized type (e.g. Vector(3, f32)). + /// Strips namespace prefix to get base name, then dispatches. + fn resolveBuiltinType(self: *CodeGen, name: []const u8, args: []const *Node) ?Type { + const base = if (std.mem.lastIndexOfScalar(u8, name, '.')) |idx| name[idx + 1 ..] else name; + if (std.mem.eql(u8, base, "Vector")) { + if (args.len >= 2) { + const n: u32 = @intCast(self.resolveValueArg(args[0])); + const elem = self.resolveType(args[1]); + const elem_name = elem.displayName(self.allocator) catch return null; + const ty: Type = .{ .vector_type = .{ .element_name = elem_name, .length = n } }; + // Pre-register in any_type_entries so runtime dispatch knows about this type + const any_name = std.fmt.allocPrint(self.allocator, "vec[{d}]{s}", .{ n, elem_name }) catch return null; + _ = self.getAnyTypeId(any_name, ty) catch return null; + return ty; + } + } + return null; + } + + fn dispatchBuiltin(self: *CodeGen, name: []const u8, call_node: ast.Call) !c.LLVMValueRef { + // Extract base name (strip namespace prefix) + const base = if (std.mem.lastIndexOfScalar(u8, name, '.')) |idx| name[idx + 1 ..] else name; + if (std.mem.eql(u8, base, "write")) return self.genWriteCall(call_node.args); + if (std.mem.eql(u8, base, "sqrt")) return self.genSqrt(call_node); + if (std.mem.eql(u8, base, "size_of")) return self.genSizeOf(call_node); + if (std.mem.eql(u8, base, "cast")) return self.genCast(call_node); + if (std.mem.eql(u8, base, "alloc")) return self.genAlloc(call_node.args); + if (std.mem.eql(u8, base, "type_of")) return self.genTypeOf(call_node); + if (std.mem.eql(u8, base, "type_name")) return self.genTypeName(call_node); + if (std.mem.eql(u8, base, "field_count")) return self.genFieldCount(call_node); + if (std.mem.eql(u8, base, "field_name")) return self.genFieldName(call_node); + if (std.mem.eql(u8, base, "field_value")) return self.genFieldValue(call_node); + return self.emitErrorFmt("unknown builtin function '{s}'", .{name}); + } + + fn genWriteCall(self: *CodeGen, args: []const *Node) !c.LLVMValueRef { + if (args.len != 1) return self.emitError("write expects exactly 1 argument"); + const builtins = self.builtins orelse return self.emitError("builtins not available (missing #builtin import)"); + const val = try self.genExpr(args[0]); + // Extract ptr and len from string slice + const ptr = c.LLVMBuildExtractValue(self.builder, val, 0, "str_ptr"); + const len = c.LLVMBuildExtractValue(self.builder, val, 1, "str_len"); + const fmt = c.LLVMBuildGlobalStringPtr(self.builder, "%.*s", "write_fmt"); + const printf_fn = builtins.printf_fn; + const fn_type = c.LLVMGlobalGetValueType(printf_fn); + var call_args = [_]c.LLVMValueRef{ fmt, len, ptr }; + _ = c.LLVMBuildCall2(self.builder, fn_type, printf_fn, &call_args, 3, ""); + return null; + } + + /// Helper: build a constant string slice in the current function + fn buildConstStr(self: *CodeGen, s: []const u8) c.LLVMValueRef { + const sz = self.allocator.dupeZ(u8, s) catch unreachable; + const ptr = c.LLVMBuildGlobalStringPtr(self.builder, sz.ptr, "cstr"); + return self.buildStringSlice(ptr, @intCast(s.len)); + } + + /// Helper: build a constant string slice as a global constant (no builder needed). + fn buildConstStrGlobal(self: *CodeGen, s: []const u8) c.LLVMValueRef { + const sz = self.allocator.dupeZ(u8, s) catch unreachable; + const i32_ty = c.LLVMInt32TypeInContext(self.context); + const i8_ty = c.LLVMInt8TypeInContext(self.context); + // Create a global string constant + const str_const = c.LLVMConstStringInContext(self.context, sz.ptr, @intCast(s.len), 0); + const global_name = (self.allocator.dupeZ(u8, std.fmt.allocPrint(self.allocator, ".str.{s}", .{s}) catch unreachable)) catch unreachable; + var global = c.LLVMGetNamedGlobal(self.module, global_name.ptr); + if (global == null) { + const arr_ty = c.LLVMArrayType2(i8_ty, s.len + 1); + global = c.LLVMAddGlobal(self.module, arr_ty, global_name.ptr); + c.LLVMSetInitializer(global, str_const); + c.LLVMSetGlobalConstant(global, 1); + c.LLVMSetLinkage(global, c.LLVMPrivateLinkage); + } + // Build constant struct {ptr, i32} + var fields = [_]c.LLVMValueRef{ + c.LLVMConstBitCast(global.?, c.LLVMPointerTypeInContext(self.context, 0)), + c.LLVMConstInt(i32_ty, s.len, 0), + }; + return c.LLVMConstStructInContext(self.context, &fields, 2, 0); + } + + /// Check if a node refers to a type name. Returns the raw name or null. + fn asTypeName(self: *CodeGen, node: *const Node) ?[]const u8 { + if (node.data == .type_expr) return node.data.type_expr.name; + if (node.data == .identifier) { + const id = node.data.identifier.name; + if (self.resolveTypeName(id) != null) return id; + } + return null; + } + + /// Resolve a type name to its display string (null-terminated) for runtime use. + fn resolveDisplayName(self: *CodeGen, name: []const u8) [:0]const u8 { + // Type aliases → follow to target + if (self.type_aliases.get(name)) |target| { + if (self.struct_types.get(target)) |info| + return self.allocator.dupeZ(u8, info.display_name orelse target) catch unreachable; + return self.allocator.dupeZ(u8, target) catch unreachable; + } + // Struct types → use display name + if (self.struct_types.get(name)) |info| + return self.allocator.dupeZ(u8, info.display_name orelse name) catch unreachable; + // Primitive / enum / anything else → use as-is + return self.allocator.dupeZ(u8, name) catch unreachable; + } + + /// Resolve a name to a type display string, or null if not a type. + fn resolveTypeName(self: *CodeGen, name: []const u8) ?[]const u8 { + // Type aliases (e.g. x := f64, Vec3 :: Vec(3, f32)) — follow alias first + if (self.type_aliases.get(name)) |target| { + // Check if target is a struct with a display name + if (self.struct_types.get(target)) |info| return info.display_name orelse target; + return target; + } + // Primitive types + if (Type.fromName(name) != null) return name; + // Struct types + if (self.struct_types.get(name)) |info| return info.display_name orelse name; + // Enum types + if (self.enum_types.get(name) != null) return name; + // Union types + if (self.union_types.get(name) != null) return name; + return null; + } + + fn inferType(self: *CodeGen, node: *Node) Type { + return switch (node.data) { + .int_literal => Type.s(32), + .float_literal => .f32, + .bool_literal => .boolean, + .string_literal => .string_type, + .insert_expr => .void_type, + .comptime_expr => |ct| self.inferType(ct.expr), + .binary_op => |binop| { + switch (binop.op) { + .eq, .neq, .lt, .lte, .gt, .gte, .and_op, .or_op => return .boolean, + else => { + const lhs_ty = self.inferType(binop.lhs); + const rhs_ty = self.inferType(binop.rhs); + return Type.widen(lhs_ty, rhs_ty); + }, + } + }, + .chained_comparison => return .boolean, + .identifier => |ident| { + if (self.named_values.get(ident.name)) |entry| return entry.ty; + if (self.comptime_globals.get(ident.name)) |ct| return ct.ty; + return Type.s(32); + }, + .if_expr => |ie| { + return self.inferType(ie.then_branch); + }, + .block => |blk| { + if (blk.stmts.len > 0) { + return self.inferType(blk.stmts[blk.stmts.len - 1]); + } + return .void_type; + }, + .union_literal => |ul| { + if (ul.union_name) |uname| return .{ .union_type = uname }; + if (self.current_return_type.isUnion()) return self.current_return_type; + return .void_type; + }, + .enum_literal => { + if (self.current_return_type.isEnum()) return self.current_return_type; + if (self.current_return_type.isUnion()) return self.current_return_type; + return .{ .enum_type = "" }; + }, + .match_expr => |me| { + for (me.arms) |arm| { + if (!arm.is_break) return self.inferType(arm.body); + } + return .void_type; + }, + .call => |call_node| { + // Check for union literal pattern: Type.variant(payload) + if (call_node.callee.data == .field_access) { + const fa = call_node.callee.data.field_access; + const obj_ty = blk: { + if (fa.object.data == .identifier) { + const name = self.type_aliases.get(fa.object.data.identifier.name) orelse fa.object.data.identifier.name; + if (self.union_types.contains(name)) break :blk Type{ .union_type = name }; + } + const ty = self.resolveType(fa.object); + if (ty.isUnion()) break :blk ty; + break :blk @as(?Type, null); + }; + if (obj_ty) |uty| return uty; + } + const callee_name = self.resolveCalleeName(call_node) orelse return Type.s(32); + const base_name = if (std.mem.lastIndexOfScalar(u8, callee_name, '.')) |idx| callee_name[idx + 1 ..] else callee_name; + // Built-in: sqrt returns same type as argument + if (std.mem.eql(u8, base_name, "sqrt")) { + if (call_node.args.len > 0) return self.inferType(call_node.args[0]); + return .f32; + } + // Built-in: size_of returns s32 + if (std.mem.eql(u8, base_name, "size_of")) return Type.s(32); + // Built-in: type_of returns s32 (type tag) + if (std.mem.eql(u8, base_name, "type_of")) return Type.s(32); + // Built-in: type_name returns string + if (std.mem.eql(u8, base_name, "type_name")) return .string_type; + // Built-in: field_count returns s32 + if (std.mem.eql(u8, base_name, "field_count")) return Type.s(32); + // Built-in: field_name returns string + if (std.mem.eql(u8, base_name, "field_name")) return .string_type; + // Built-in: field_value returns Any + if (std.mem.eql(u8, base_name, "field_value")) return .{ .any_type = {} }; + // Built-in: cast returns the target type (first arg) + if (std.mem.eql(u8, base_name, "cast")) { + if (call_node.args.len > 0) return self.resolveType(call_node.args[0]); + return Type.s(32); + } + // Built-in: alloc returns string + if (std.mem.eql(u8, base_name, "alloc")) return .string_type; + // Check generic templates — infer return type from widened bindings + const template = self.generic_templates.get(callee_name) orelse blk: { + // Intra-namespace fallback + if (self.current_namespace) |ns| { + const qualified = std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }) catch return Type.s(32); + break :blk self.generic_templates.get(qualified); + } + break :blk null; + }; + if (template) |tmpl| { + const gfd = tmpl.fd; + // Build widened type bindings from all call args + var inferred_bindings = std.StringHashMap(Type).init(self.allocator); + for (gfd.params, 0..) |param, pi| { + if (param.type_expr.data == .type_expr) { + for (gfd.type_params) |tp| { + if (std.mem.eql(u8, tp.name, param.type_expr.data.type_expr.name)) { + if (pi < call_node.args.len) { + const arg_ty = self.inferType(call_node.args[pi]); + if (inferred_bindings.get(tp.name)) |existing| { + inferred_bindings.put(tp.name, Type.widen(existing, arg_ty)) catch {}; + } else { + inferred_bindings.put(tp.name, arg_ty) catch {}; + } + } + break; + } + } + } + } + // Resolve return type from bindings + if (gfd.return_type) |rt| { + if (rt.data == .type_expr) { + if (inferred_bindings.get(rt.data.type_expr.name)) |bound_ty| { + return bound_ty; + } + } + // Try resolving as a concrete type (e.g. -> string, -> s32) + const resolved = self.resolveType(rt); + if (!std.meta.eql(resolved, Type.void_type)) return resolved; + } + return Type.s(32); + } + // Check non-generic LLVM functions + const callee_name_z = self.allocator.dupeZ(u8, callee_name) catch return Type.s(32); + var callee_fn_opt = c.LLVMGetNamedFunction(self.module, callee_name_z.ptr); + // Intra-namespace fallback + if (callee_fn_opt == null) { + if (self.current_namespace) |ns| { + const q = std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }) catch return Type.s(32); + const qz = self.allocator.dupeZ(u8, q) catch return Type.s(32); + callee_fn_opt = c.LLVMGetNamedFunction(self.module, qz.ptr); + } + } + if (callee_fn_opt) |callee_fn| { + const fn_type = c.LLVMGlobalGetValueType(callee_fn); + const ret_llvm = c.LLVMGetReturnType(fn_type); + return self.llvmTypeToSxType(ret_llvm); + } + return Type.s(32); + }, + .unary_op => |unop| { + return self.inferType(unop.operand); + }, + .field_access => |fa| { + const obj_ty = self.inferType(fa.object); + if (obj_ty == .string_type) { + if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); + if (std.mem.eql(u8, fa.field, "ptr")) return .string_type; + } + if (obj_ty.isSlice()) { + if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); + } + if (obj_ty.isAny()) { + if (std.mem.eql(u8, fa.field, "tag")) return Type.s(32); + if (std.mem.eql(u8, fa.field, "value")) return Type.s(64); + } + if (obj_ty.isVector()) { + return obj_ty.vectorElementType() orelse Type.s(32); + } + if (obj_ty.isStruct()) { + if (self.struct_types.get(obj_ty.struct_type)) |info| { + if (self.findFieldIndex(info, fa.field)) |idx| { + return info.field_types[idx]; + } + } + } + if (obj_ty.isUnion()) { + if (self.union_types.get(obj_ty.union_type)) |info| { + for (info.variant_names, 0..) |vn, i| { + if (std.mem.eql(u8, vn, fa.field)) { + return info.variant_types[i]; + } + } + } + } + return Type.s(32); + }, + .index_expr => |ie| { + const obj_ty = self.inferType(ie.object); + if (obj_ty.isVector()) { + return obj_ty.vectorElementType() orelse Type.s(32); + } + if (obj_ty.isArray()) { + return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(32); + } + if (obj_ty.isSlice()) { + return obj_ty.sliceElementType() orelse Type.s(32); + } + return Type.s(32); + }, + .array_literal => |al| { + if (al.elements.len == 0) return .void_type; + const elem_ty = self.inferType(al.elements[0]); + const elem_name = elem_ty.displayName(self.allocator) catch return Type.s(32); + return .{ .array_type = .{ .element_name = elem_name, .length = @intCast(al.elements.len) } }; + }, + .while_expr, .for_expr, .break_expr, .continue_expr => .void_type, + else => Type.s(32), + }; + } + + fn exprIsFloat(self: *CodeGen, node: *Node) bool { + return self.inferType(node).isFloat(); + } + + pub fn verify(self: *CodeGen) !void { + var err_msg: [*c]u8 = null; + if (c.LLVMVerifyModule(self.module, c.LLVMReturnStatusAction, &err_msg) != 0) { + defer c.LLVMDisposeMessage(err_msg); + const msg = std.mem.span(err_msg); + return self.emitErrorFmt("LLVM verification failed: {s}", .{msg}); + } + } + + pub fn printIR(self: *CodeGen) void { + const ir = c.LLVMPrintModuleToString(self.module); + defer c.LLVMDisposeMessage(ir); + const len = std.mem.len(ir); + std.debug.print("{s}\n", .{ir[0..len]}); + } + + pub fn emitObject(self: *CodeGen, output_path: [*:0]const u8) !void { + llvm.initAllTargets(); + + const triple = c.LLVMGetDefaultTargetTriple(); + defer c.LLVMDisposeMessage(triple); + + var target: c.LLVMTargetRef = null; + var err_msg: [*c]u8 = null; + + if (c.LLVMGetTargetFromTriple(triple, &target, &err_msg) != 0) { + defer c.LLVMDisposeMessage(err_msg); + const msg = std.mem.span(err_msg); + return self.emitErrorFmt("failed to get target: {s}", .{msg}); + } + + const tm = c.LLVMCreateTargetMachine( + target, + triple, + "generic", + "", + c.LLVMCodeGenLevelDefault, + c.LLVMRelocPIC, + c.LLVMCodeModelDefault, + ); + defer c.LLVMDisposeTargetMachine(tm); + + c.LLVMSetTarget(self.module, triple); + + var err_msg2: [*c]u8 = null; + if (c.LLVMTargetMachineEmitToFile(tm, self.module, output_path, c.LLVMObjectFile, &err_msg2) != 0) { + defer c.LLVMDisposeMessage(err_msg2); + const msg = std.mem.span(err_msg2); + return self.emitErrorFmt("failed to emit object file: {s}", .{msg}); + } + } + + pub fn link(io: std.Io, output_obj: []const u8, output_bin: []const u8) !void { + var child = std.process.spawn(io, .{ + .argv = &.{ "cc", output_obj, "-o", output_bin }, + }) catch return error.LinkError; + const result = child.wait(io) catch return error.LinkError; + if (result != .exited) return error.LinkError; + if (result.exited != 0) return error.LinkError; + } +}; diff --git a/src/comptime.zig b/src/comptime.zig new file mode 100644 index 0000000..67bc218 --- /dev/null +++ b/src/comptime.zig @@ -0,0 +1,1753 @@ +const std = @import("std"); +const types = @import("types.zig"); +const Type = types.Type; + +/// Runtime value for comptime evaluation. +/// Replaces codegen's JitResult with richer type support. +pub const Value = union(enum) { + int_val: i64, + float_val: f64, + float32_val: f32, + bool_val: bool, + string_val: []const u8, + void_val: void, + struct_val: StructValue, + array_val: ArrayValue, + type_val: Type, + function_val: FunctionVal, + + pub const StructValue = struct { + type_name: []const u8, + field_names: []const []const u8, + fields: []Value, + }; + + pub const ArrayValue = struct { + elements: []Value, + }; + + pub const FunctionVal = struct { + name: []const u8, + param_count: u8, + }; + + pub fn isInt(self: Value) bool { + return self == .int_val; + } + + pub fn isFloat(self: Value) bool { + return switch (self) { + .float_val, .float32_val => true, + else => false, + }; + } + + pub fn asInt(self: Value) ?i64 { + return switch (self) { + .int_val => |v| v, + .bool_val => |v| if (v) @as(i64, 1) else 0, + else => null, + }; + } + + pub fn asFloat(self: Value) ?f64 { + return switch (self) { + .float_val => |v| v, + .float32_val => |v| @floatCast(v), + .int_val => |v| @floatFromInt(v), + else => null, + }; + } + + pub fn format(self: Value, allocator: std.mem.Allocator) ![]const u8 { + return switch (self) { + .int_val => |v| std.fmt.allocPrint(allocator, "{d}", .{v}), + .float_val => |v| std.fmt.allocPrint(allocator, "{d}", .{v}), + .float32_val => |v| std.fmt.allocPrint(allocator, "{d}", .{v}), + .bool_val => |v| if (v) allocator.dupe(u8, "true") else allocator.dupe(u8, "false"), + .string_val => |v| allocator.dupe(u8, v), + .void_val => allocator.dupe(u8, "void"), + .type_val => |v| v.displayName(allocator), + .function_val => |v| std.fmt.allocPrint(allocator, "", .{v.name}), + .struct_val => |v| { + var buf: std.ArrayList(u8) = .empty; + try buf.appendSlice(allocator, v.type_name); + try buf.append(allocator, '{'); + for (v.fields, 0..) |fv, i| { + if (i > 0) try buf.appendSlice(allocator, ", "); + if (i < v.field_names.len) { + try buf.appendSlice(allocator, v.field_names[i]); + try buf.appendSlice(allocator, ": "); + } + const fs = try fv.format(allocator); + try buf.appendSlice(allocator, fs); + } + try buf.append(allocator, '}'); + return buf.items; + }, + .array_val => |v| { + var buf: std.ArrayList(u8) = .empty; + try buf.append(allocator, '['); + for (v.elements, 0..) |elem, i| { + if (i > 0) try buf.appendSlice(allocator, ", "); + const es = try elem.format(allocator); + try buf.appendSlice(allocator, es); + } + try buf.append(allocator, ']'); + return buf.items; + }, + }; + } +}; + +/// Bytecode instruction for the comptime VM. +pub const Instruction = union(enum) { + // Constants + push_int: i64, + push_float: f64, + push_f32: f32, + push_true, + push_false, + push_string: u32, // index into Chunk.strings + push_void, + push_type: Type, + push_function: FnRef, + + // Local variables + get_local: u16, // slot index in current frame + set_local: u16, + + // Global variables (resolved lazily from root_decls) + get_global: u32, // index into Chunk.strings for the global name + + // Arithmetic (type-dispatched at runtime via Value tag) + add, + sub, + mul, + div, + mod, + negate, + + // Comparison + eq, + neq, + lt, + lte, + gt, + gte, + + // Logic + not, + + // Type conversion + cast: CastInfo, + + // Control flow + jump: i32, // relative offset + jump_if_false: i32, + jump_if_true: i32, + pop, + dup, + + // Functions + call: CallInfo, + call_builtin: BuiltinCall, + ret, + ret_void, + + // Structs + make_struct: StructMake, + get_field: u16, + set_field: u16, + + // Arrays + make_array: u32, // element count on stack + get_index, + set_index, + + // Strings + concat, + format_to_string, // convert top-of-stack value to string representation + + pub const CastInfo = struct { to: ValueKind }; + pub const CallInfo = struct { func_name: []const u8, arg_count: u8 }; + pub const BuiltinCall = struct { id: BuiltinId, arg_count: u8 }; + pub const StructMake = struct { type_name: []const u8, field_count: u16, field_names: []const []const u8 }; + pub const FnRef = struct { name: []const u8, param_count: u8 }; +}; + +pub const ValueKind = enum { int, float, f32_k, bool_k, string }; + +pub const BuiltinId = enum { print, write, sqrt, size_of, cast, alloc }; + +/// A compiled function or expression — a flat sequence of instructions. +pub const Chunk = struct { + code: []const Instruction, + strings: []const []const u8, // string constant pool + local_count: u16, // number of local variable slots + name: []const u8, // function name (for debugging) +}; + +const ast = @import("ast.zig"); +const Node = ast.Node; +const sema = @import("sema.zig"); +const codegen_mod = @import("codegen.zig"); +const llvm = @import("llvm_api.zig"); + +/// Compute byte size of a Type. Uses LLVM data layout via codegen if available, +/// otherwise falls back to known sizes for primitive types. +fn sizeOfType(ty: Type, cg: ?*codegen_mod.CodeGen) u64 { + if (cg) |gen| { + if (std.meta.eql(ty, Type.void_type)) return 0; + const llvm_ty = gen.typeToLLVM(ty); + const data_layout = llvm.c.LLVMGetModuleDataLayout(gen.module); + return llvm.c.LLVMStoreSizeOfType(data_layout, llvm_ty); + } + // Fallback without codegen + return switch (ty) { + .signed, .unsigned => |w| (w + 7) / 8, + .f32 => 4, + .f64 => 8, + .boolean => 1, + .string_type => 8, + .void_type => 0, + .enum_type => 4, + else => 0, + }; +} + +/// Compiles AST expressions into bytecode Chunks. +pub const Compiler = struct { + allocator: std.mem.Allocator, + instructions: std.ArrayList(Instruction), + strings: std.ArrayList([]const u8), + locals: std.ArrayList(Local), + scope_depth: u16, + sema_result: ?*const sema.SemaResult, + root_decls: []const *Node, + codegen: ?*codegen_mod.CodeGen, + + // Loop context for break/continue + loop_start: ?usize = null, // instruction index of condition start (for continue) + break_patches: std.ArrayList(usize) = std.ArrayList(usize).empty, // indices of break jumps to patch + + const Local = struct { + name: []const u8, + depth: u16, + }; + + pub fn init(allocator: std.mem.Allocator, sema_result: ?*const sema.SemaResult, root_decls: []const *Node, cg: ?*codegen_mod.CodeGen) Compiler { + return .{ + .allocator = allocator, + .instructions = std.ArrayList(Instruction).empty, + .strings = std.ArrayList([]const u8).empty, + .locals = std.ArrayList(Local).empty, + .scope_depth = 0, + .sema_result = sema_result, + .root_decls = root_decls, + .codegen = cg, + }; + } + + pub fn compile(self: *Compiler, expr: *Node) !Chunk { + try self.compileNode(expr); + return .{ + .code = try self.instructions.toOwnedSlice(self.allocator), + .strings = try self.strings.toOwnedSlice(self.allocator), + .local_count = @intCast(self.locals.items.len), + .name = "", + }; + } + + pub fn compileFunction(self: *Compiler, fd: ast.FnDecl) !Chunk { + // Add params as locals + for (fd.params) |param| { + try self.locals.append(self.allocator, .{ .name = param.name, .depth = self.scope_depth }); + } + try self.compileNode(fd.body); + // Ensure there's a return at the end. + // If the function has a return type, emit `ret` (implicit return of last value). + // Otherwise emit `ret_void`. + const code = self.instructions.items; + if (code.len == 0 or (code[code.len - 1] != .ret and code[code.len - 1] != .ret_void)) { + const has_return_type = fd.return_type != null; + if (has_return_type) { + try self.emit(.ret); + } else { + try self.emit(.ret_void); + } + } + return .{ + .code = try self.instructions.toOwnedSlice(self.allocator), + .strings = try self.strings.toOwnedSlice(self.allocator), + .local_count = @intCast(self.locals.items.len), + .name = fd.name, + }; + } + + fn emit(self: *Compiler, instruction: Instruction) !void { + try self.instructions.append(self.allocator, instruction); + } + + fn addString(self: *Compiler, str: []const u8) !u32 { + const idx: u32 = @intCast(self.strings.items.len); + try self.strings.append(self.allocator, str); + return idx; + } + + fn resolveLocal(self: *Compiler, name: []const u8) ?u16 { + var i = self.locals.items.len; + while (i > 0) { + i -= 1; + if (std.mem.eql(u8, self.locals.items[i].name, name)) { + return @intCast(i); + } + } + return null; + } + + /// Process escape sequences in a raw string literal. + fn unescapeString(allocator: std.mem.Allocator, raw: []const u8) ![]u8 { + var result = try allocator.alloc(u8, raw.len); + var i: usize = 0; + var j: usize = 0; + while (i < raw.len) { + if (raw[i] == '\\' and i + 1 < raw.len) { + i += 1; + switch (raw[i]) { + 'n' => result[j] = '\n', + 't' => result[j] = '\t', + 'r' => result[j] = '\r', + '\\' => result[j] = '\\', + '"' => result[j] = '"', + '0' => result[j] = 0, + else => result[j] = raw[i], + } + j += 1; + i += 1; + } else { + result[j] = raw[i]; + j += 1; + i += 1; + } + } + return result[0..j]; + } + + /// Compile a string literal with escape sequences and interpolation support. + /// Handles `{expr}` patterns by parsing and compiling the inner expressions, + /// then concatenating all segments together. + /// + /// Strategy: emit each segment in order, and after each additional segment + /// (from the second one onward), emit a concat instruction to merge it with + /// the accumulated result so far. + fn compileStringLiteral(self: *Compiler, raw: []const u8) !void { + // String literals are plain text — {} is NOT interpolated here. + // String interpolation is handled by print() at the call site. + const unescaped = try unescapeString(self.allocator, raw); + const idx = try self.addString(unescaped); + try self.emit(.{ .push_string = idx }); + } + + fn compileNode(self: *Compiler, node: *Node) anyerror!void { + switch (node.data) { + .int_literal => |lit| { + try self.emit(.{ .push_int = lit.value }); + }, + .float_literal => |lit| { + try self.emit(.{ .push_float = lit.value }); + }, + .bool_literal => |lit| { + try self.emit(if (lit.value) .push_true else .push_false); + }, + .string_literal => |lit| { + try self.compileStringLiteral(lit.raw); + }, + .identifier => |ident| { + if (self.resolveLocal(ident.name)) |slot| { + try self.emit(.{ .get_local = slot }); + } else { + // Not a local — emit get_global to resolve lazily at runtime + const idx = try self.addString(ident.name); + try self.emit(.{ .get_global = idx }); + } + }, + .binary_op => |binop| { + if (binop.op == .and_op) { + // Short-circuit AND: LHS, dup, jump_if_false +N, pop, RHS + try self.compileNode(binop.lhs); + try self.emit(.dup); + const jump_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_false = 0 }); + try self.emit(.pop); + try self.compileNode(binop.rhs); + self.instructions.items[jump_idx] = .{ + .jump_if_false = @intCast(@as(i64, @intCast(self.instructions.items.len)) - @as(i64, @intCast(jump_idx)) - 1), + }; + } else if (binop.op == .or_op) { + // Short-circuit OR: LHS, dup, jump_if_true +N, pop, RHS + try self.compileNode(binop.lhs); + try self.emit(.dup); + const jump_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_true = 0 }); + try self.emit(.pop); + try self.compileNode(binop.rhs); + self.instructions.items[jump_idx] = .{ + .jump_if_true = @intCast(@as(i64, @intCast(self.instructions.items.len)) - @as(i64, @intCast(jump_idx)) - 1), + }; + } else { + try self.compileNode(binop.lhs); + try self.compileNode(binop.rhs); + try self.emit(switch (binop.op) { + .add => .add, + .sub => .sub, + .mul => .mul, + .div => .div, + .mod => .mod, + .eq => .eq, + .neq => .neq, + .lt => .lt, + .lte => .lte, + .gt => .gt, + .gte => .gte, + .and_op, .or_op => unreachable, + }); + } + }, + .chained_comparison => |chain| { + // Compile first pair + try self.compileNode(chain.operands[0]); + try self.compileNode(chain.operands[1]); + try self.emit(switch (chain.ops[0]) { + .lt => .lt, + .lte => .lte, + .gt => .gt, + .gte => .gte, + .eq => .eq, + .neq => .neq, + else => unreachable, + }); + // For each subsequent pair, short-circuit AND + var i: usize = 1; + while (i < chain.ops.len) : (i += 1) { + try self.emit(.dup); + const jump_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_false = 0 }); + try self.emit(.pop); + try self.compileNode(chain.operands[i]); + try self.compileNode(chain.operands[i + 1]); + try self.emit(switch (chain.ops[i]) { + .lt => .lt, + .lte => .lte, + .gt => .gt, + .gte => .gte, + .eq => .eq, + .neq => .neq, + else => unreachable, + }); + self.instructions.items[jump_idx] = .{ + .jump_if_false = @intCast(@as(i64, @intCast(self.instructions.items.len)) - @as(i64, @intCast(jump_idx)) - 1), + }; + } + }, + .unary_op => |unop| { + try self.compileNode(unop.operand); + switch (unop.op) { + .negate => try self.emit(.negate), + .not => try self.emit(.not), + .xx => {}, // cast — handle later + } + }, + .comptime_expr => |ct| { + try self.compileNode(ct.expr); + }, + .block => |blk| { + self.scope_depth += 1; + const scope_start = self.locals.items.len; + for (blk.stmts) |stmt| { + try self.compileNode(stmt); + } + // Pop locals from this scope + while (self.locals.items.len > scope_start) { + _ = self.locals.pop(); + } + self.scope_depth -= 1; + }, + .var_decl => |vd| { + if (vd.value) |val| { + try self.compileNode(val); + } else { + try self.emit(.push_void); + } + const slot: u16 = @intCast(self.locals.items.len); + try self.locals.append(self.allocator, .{ .name = vd.name, .depth = self.scope_depth }); + try self.emit(.{ .set_local = slot }); + }, + .const_decl => |cd| { + try self.compileNode(cd.value); + const slot: u16 = @intCast(self.locals.items.len); + try self.locals.append(self.allocator, .{ .name = cd.name, .depth = self.scope_depth }); + try self.emit(.{ .set_local = slot }); + }, + .assignment => |asgn| { + if (asgn.target.data == .identifier) { + if (self.resolveLocal(asgn.target.data.identifier.name)) |slot| { + if (asgn.op != .assign) { + // Compound assignment: get current value, compile RHS, apply op, set + try self.emit(.{ .get_local = slot }); + try self.compileNode(asgn.value); + try self.emit(switch (asgn.op) { + .add_assign => .add, + .sub_assign => .sub, + .mul_assign => .mul, + .div_assign => .div, + .mod_assign => .mod, + .assign => unreachable, + }); + } else { + try self.compileNode(asgn.value); + } + try self.emit(.{ .set_local = slot }); + } else { + return error.UndefinedVariable; + } + } else if (asgn.target.data == .index_expr) { + // arr[i] = val → push arr, push idx, push val, set_index + const ie = asgn.target.data.index_expr; + try self.compileNode(ie.object); + try self.compileNode(ie.index); + if (asgn.op != .assign) { + // Compound: get current, apply op with RHS + try self.emit(.dup); // dup index + // We need the array and index for both get and set + // Stack: arr, idx — but we need arr[idx] for the compound op + // Simpler: just support simple assign for index targets + return error.UnsupportedExpression; + } + try self.compileNode(asgn.value); + try self.emit(.set_index); + // set_index pushes the modified container back; store it back if it's a local + if (ie.object.data == .identifier) { + if (self.resolveLocal(ie.object.data.identifier.name)) |slot| { + try self.emit(.{ .set_local = slot }); + } + } + } + }, + .return_stmt => |rs| { + if (rs.value) |val| { + try self.compileNode(val); + try self.emit(.ret); + } else { + try self.emit(.ret_void); + } + }, + .if_expr => |ie| { + try self.compileNode(ie.condition); + const jump_false_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_false = 0 }); // placeholder + try self.compileNode(ie.then_branch); + if (ie.else_branch) |eb| { + const jump_end_idx = self.instructions.items.len; + try self.emit(.{ .jump = 0 }); // placeholder + // Patch jump_if_false to here + self.instructions.items[jump_false_idx] = .{ .jump_if_false = @intCast(self.instructions.items.len - jump_false_idx - 1) }; + try self.compileNode(eb); + // Patch jump to end + self.instructions.items[jump_end_idx] = .{ .jump = @intCast(self.instructions.items.len - jump_end_idx - 1) }; + } else { + // Patch jump_if_false to here + self.instructions.items[jump_false_idx] = .{ .jump_if_false = @intCast(self.instructions.items.len - jump_false_idx - 1) }; + } + }, + .call => |call_node| { + // Compile arguments + for (call_node.args) |arg| { + try self.compileNode(arg); + } + // Resolve callee name + const callee_name = if (call_node.callee.data == .identifier) + call_node.callee.data.identifier.name + else if (call_node.callee.data == .field_access) blk: { + const fa = call_node.callee.data.field_access; + if (fa.object.data == .identifier) { + break :blk fa.field; + } + break :blk null; + } else null; + + if (callee_name) |name| { + // Check if it's a builtin + const base = if (std.mem.lastIndexOfScalar(u8, name, '.')) |idx| name[idx + 1 ..] else name; + if (std.mem.eql(u8, base, "print")) { + try self.emit(.{ .call_builtin = .{ .id = .print, .arg_count = @intCast(call_node.args.len) } }); + } else if (std.mem.eql(u8, base, "write")) { + try self.emit(.{ .call_builtin = .{ .id = .write, .arg_count = @intCast(call_node.args.len) } }); + } else if (std.mem.eql(u8, base, "sqrt")) { + try self.emit(.{ .call_builtin = .{ .id = .sqrt, .arg_count = @intCast(call_node.args.len) } }); + } else if (std.mem.eql(u8, base, "size_of")) { + try self.emit(.{ .call_builtin = .{ .id = .size_of, .arg_count = @intCast(call_node.args.len) } }); + } else if (std.mem.eql(u8, base, "cast")) { + try self.emit(.{ .call_builtin = .{ .id = .cast, .arg_count = @intCast(call_node.args.len) } }); + } else if (std.mem.eql(u8, base, "alloc")) { + try self.emit(.{ .call_builtin = .{ .id = .alloc, .arg_count = @intCast(call_node.args.len) } }); + } else { + try self.emit(.{ .call = .{ .func_name = name, .arg_count = @intCast(call_node.args.len) } }); + } + } else { + return error.InvalidCallee; + } + }, + .match_expr => |me| { + try self.compileNode(me.subject); + var end_jumps = std.ArrayList(usize).empty; + for (me.arms) |arm| { + if (arm.pattern) |pattern| { + try self.emit(.dup); // duplicate subject for comparison + try self.compileNode(pattern); + try self.emit(.eq); + const jump_next_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_false = 0 }); // placeholder + try self.emit(.pop); // pop the subject copy + try self.compileNode(arm.body); + try end_jumps.append(self.allocator, self.instructions.items.len); + try self.emit(.{ .jump = 0 }); // placeholder jump to end + // Patch jump_if_false + self.instructions.items[jump_next_idx] = .{ .jump_if_false = @intCast(self.instructions.items.len - jump_next_idx - 1) }; + } else { + // else arm: unconditionally execute body + try self.emit(.pop); // pop the subject copy + try self.compileNode(arm.body); + try end_jumps.append(self.allocator, self.instructions.items.len); + try self.emit(.{ .jump = 0 }); // placeholder jump to end + } + } + try self.emit(.pop); // pop remaining subject + // Patch all end jumps + for (end_jumps.items) |idx| { + self.instructions.items[idx] = .{ .jump = @intCast(self.instructions.items.len - idx - 1) }; + } + }, + .struct_literal => |sl| { + for (sl.field_inits) |fi| { + try self.compileNode(fi.value); + } + const name = sl.struct_name orelse ""; + const fnames = try self.allocator.alloc([]const u8, sl.field_inits.len); + for (sl.field_inits, 0..) |fi, i| { + fnames[i] = fi.name orelse ""; + } + try self.emit(.{ .make_struct = .{ .type_name = name, .field_count = @intCast(sl.field_inits.len), .field_names = fnames } }); + }, + .field_access => |fa| { + try self.compileNode(fa.object); + // Check for string field access (.len, .ptr) + if (self.sema_result) |sr| { + const obj_ty = sr.type_map.get(fa.object); + if (obj_ty != null and obj_ty.? == .string_type) { + if (std.mem.eql(u8, fa.field, "len")) { + try self.emit(.{ .get_field = 1 }); // len is field 1 in {ptr, len} + return; + } else if (std.mem.eql(u8, fa.field, "ptr")) { + try self.emit(.{ .get_field = 0 }); // ptr is field 0 + return; + } + } + } + // Look up field index from sema struct_types + if (self.sema_result) |sr| { + // Infer the object type to find the struct name + const obj_ty = sr.type_map.get(fa.object); + if (obj_ty != null and obj_ty.?.isStruct()) { + if (sr.struct_types.get(obj_ty.?.struct_type)) |info| { + for (info.field_names, 0..) |fname, idx| { + if (std.mem.eql(u8, fname, fa.field)) { + try self.emit(.{ .get_field = @intCast(idx) }); + return; + } + } + } + } + } + // Fallback: use field name for well-known string fields + // (sema may not have type info for nodes in imported function bodies) + if (std.mem.eql(u8, fa.field, "len")) { + try self.emit(.{ .get_field = 1 }); + } else { + try self.emit(.{ .get_field = 0 }); + } + }, + .array_literal => |al| { + for (al.elements) |elem| { + try self.compileNode(elem); + } + try self.emit(.{ .make_array = @intCast(al.elements.len) }); + }, + .index_expr => |ie| { + try self.compileNode(ie.object); + try self.compileNode(ie.index); + try self.emit(.get_index); + }, + .type_expr => |te| { + const resolved = if (self.sema_result) |sr| + sr.type_map.get(node) orelse Type.fromName(te.name) orelse .void_type + else + Type.fromName(te.name) orelse .void_type; + try self.emit(.{ .push_type = resolved }); + }, + .enum_literal => |el| { + const idx = try self.addString(el.name); + try self.emit(.{ .push_string = idx }); + }, + .while_expr => |we| { + // Save outer loop context + const saved_loop_start = self.loop_start; + const saved_break_patches = self.break_patches; + self.break_patches = std.ArrayList(usize).empty; + + // Record condition start position + const condition_start = self.instructions.items.len; + self.loop_start = condition_start; + + // Compile condition + try self.compileNode(we.condition); + + // Jump past body if false + const jump_false_idx = self.instructions.items.len; + try self.emit(.{ .jump_if_false = 0 }); // placeholder + + // Compile body + try self.compileNode(we.body); + + // Jump back to condition + const back_offset = @as(i32, @intCast(condition_start)) - @as(i32, @intCast(self.instructions.items.len)) - 1; + try self.emit(.{ .jump = back_offset }); + + // Patch jump_if_false to after the loop + const after_loop = self.instructions.items.len; + self.instructions.items[jump_false_idx] = .{ .jump_if_false = @intCast(after_loop - jump_false_idx - 1) }; + + // Patch all break jumps to after the loop + for (self.break_patches.items) |patch_idx| { + self.instructions.items[patch_idx] = .{ .jump = @as(i32, @intCast(after_loop)) - @as(i32, @intCast(patch_idx)) - 1 }; + } + + // Restore outer loop context + self.loop_start = saved_loop_start; + self.break_patches = saved_break_patches; + }, + .break_expr => { + // Emit placeholder jump, record for patching + try self.break_patches.append(self.allocator, self.instructions.items.len); + try self.emit(.{ .jump = 0 }); // placeholder — patched when while ends + }, + .continue_expr => { + // Jump back to condition start + const target = self.loop_start orelse return error.UnsupportedExpression; + const offset = @as(i32, @intCast(target)) - @as(i32, @intCast(self.instructions.items.len)) - 1; + try self.emit(.{ .jump = offset }); + }, + .defer_stmt => {}, // defer not meaningful in comptime + .insert_expr => {}, // handled by codegen, not VM + else => { + return error.UnsupportedExpression; + }, + } + } +}; + +/// Stack-based virtual machine for comptime bytecode execution. +pub const VM = struct { + stack: [256]Value = undefined, + sp: u16 = 0, + frames: [64]CallFrame = undefined, + fp: u8 = 0, + functions: std.StringHashMap(Chunk), + globals: std.StringHashMap(Value), + allocator: std.mem.Allocator, + sema_result: ?*const sema.SemaResult, + root_decls: []const *Node, + codegen: ?*codegen_mod.CodeGen, + + pub const CallFrame = struct { + chunk: *const Chunk, + ip: u32, + base_slot: u16, + }; + + pub fn init(allocator: std.mem.Allocator, sema_result: ?*const sema.SemaResult, root_decls: []const *Node, cg: ?*codegen_mod.CodeGen) VM { + return .{ + .functions = std.StringHashMap(Chunk).init(allocator), + .globals = std.StringHashMap(Value).init(allocator), + .allocator = allocator, + .sema_result = sema_result, + .root_decls = root_decls, + .codegen = cg, + }; + } + + fn push(self: *VM, value: Value) !void { + if (self.sp >= 256) return error.StackOverflow; + self.stack[self.sp] = value; + self.sp += 1; + } + + fn pop(self: *VM) !Value { + if (self.sp == 0) return error.StackUnderflow; + self.sp -= 1; + return self.stack[self.sp]; + } + + fn peek(self: *VM) !Value { + if (self.sp == 0) return error.StackUnderflow; + return self.stack[self.sp - 1]; + } + + pub fn execute(self: *VM, chunk: *const Chunk) !Value { + // Set up initial frame + self.frames[0] = .{ .chunk = chunk, .ip = 0, .base_slot = 0 }; + self.fp = 1; + + return self.run(); + } + + fn run(self: *VM) !Value { + while (true) { + const frame = &self.frames[self.fp - 1]; + if (frame.ip >= frame.chunk.code.len) { + // End of chunk — return top of stack or void + if (self.sp > frame.base_slot) { + return self.pop(); + } + return .{ .void_val = {} }; + } + + const instruction = frame.chunk.code[frame.ip]; + frame.ip += 1; + + switch (instruction) { + // Constants + .push_int => |v| try self.push(.{ .int_val = v }), + .push_float => |v| try self.push(.{ .float_val = v }), + .push_f32 => |v| try self.push(.{ .float32_val = v }), + .push_true => try self.push(.{ .bool_val = true }), + .push_false => try self.push(.{ .bool_val = false }), + .push_string => |idx| { + if (idx < frame.chunk.strings.len) { + try self.push(.{ .string_val = frame.chunk.strings[idx] }); + } else { + try self.push(.{ .string_val = "" }); + } + }, + .push_void => try self.push(.{ .void_val = {} }), + .push_type => |t| try self.push(.{ .type_val = t }), + .push_function => |fr| try self.push(.{ .function_val = .{ .name = fr.name, .param_count = fr.param_count } }), + + // Stack ops + .pop => _ = try self.pop(), + .dup => { + const v = try self.peek(); + try self.push(v); + }, + + // Local variables + .get_local => |slot| { + const abs_slot = frame.base_slot + slot; + if (abs_slot < self.sp) { + try self.push(self.stack[abs_slot]); + } else { + try self.push(.{ .void_val = {} }); + } + }, + .set_local => |slot| { + const abs_slot = frame.base_slot + slot; + const val = try self.pop(); + // Grow stack if needed + while (self.sp <= abs_slot) { + self.stack[self.sp] = .{ .void_val = {} }; + self.sp += 1; + } + self.stack[abs_slot] = val; + }, + + // Global variables (lazily resolved from root_decls) + .get_global => |name_idx| { + const name = if (name_idx < frame.chunk.strings.len) frame.chunk.strings[name_idx] else return error.InvalidGlobal; + try self.push(try self.resolveGlobal(name)); + }, + + // Arithmetic + .add => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(try self.arith(a, b, .add_op)); + }, + .sub => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(try self.arith(a, b, .sub_op)); + }, + .mul => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(try self.arith(a, b, .mul_op)); + }, + .div => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(try self.arith(a, b, .div_op)); + }, + .mod => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(try self.arith(a, b, .mod_op)); + }, + .negate => { + const v = try self.pop(); + try self.push(switch (v) { + .int_val => |i| Value{ .int_val = -i }, + .float_val => |f| Value{ .float_val = -f }, + .float32_val => |f| Value{ .float32_val = -f }, + else => return error.TypeError, + }); + }, + + // Comparison + .eq => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = self.valEqual(a, b) }); + }, + .neq => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = !self.valEqual(a, b) }); + }, + .lt => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = self.valLess(a, b) }); + }, + .lte => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = self.valLess(a, b) or self.valEqual(a, b) }); + }, + .gt => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = self.valLess(b, a) }); + }, + .gte => { + const b = try self.pop(); + const a = try self.pop(); + try self.push(.{ .bool_val = self.valLess(b, a) or self.valEqual(a, b) }); + }, + .not => { + const v = try self.pop(); + const b = switch (v) { + .bool_val => |bv| bv, + .int_val => |iv| iv != 0, + else => true, + }; + try self.push(.{ .bool_val = !b }); + }, + + // Control flow + .jump => |offset| { + frame.ip = @intCast(@as(i64, frame.ip) + offset); + }, + .jump_if_false => |offset| { + const v = try self.pop(); + const is_true = switch (v) { + .bool_val => |bv| bv, + .int_val => |iv| iv != 0, + else => true, + }; + if (!is_true) { + frame.ip = @intCast(@as(i64, frame.ip) + offset); + } + }, + .jump_if_true => |offset| { + const v = try self.pop(); + const is_true = switch (v) { + .bool_val => |bv| bv, + .int_val => |iv| iv != 0, + else => true, + }; + if (is_true) { + frame.ip = @intCast(@as(i64, frame.ip) + offset); + } + }, + + // Functions + .call => |ci| { + try self.callFunction(ci.func_name, ci.arg_count); + }, + .call_builtin => |bi| { + try self.callBuiltin(bi.id, bi.arg_count); + }, + .ret => { + const result = try self.pop(); + if (self.fp <= 1) return result; + // Pop frame + self.fp -= 1; + self.sp = frame.base_slot; + try self.push(result); + }, + .ret_void => { + if (self.fp <= 1) return .{ .void_val = {} }; + self.fp -= 1; + self.sp = frame.base_slot; + try self.push(.{ .void_val = {} }); + }, + + // Structs + .make_struct => |sm| { + const fields = try self.allocator.alloc(Value, sm.field_count); + var i: u16 = sm.field_count; + while (i > 0) { + i -= 1; + fields[i] = try self.pop(); + } + try self.push(.{ .struct_val = .{ .type_name = sm.type_name, .field_names = sm.field_names, .fields = fields } }); + }, + .get_field => |idx| { + const obj = try self.pop(); + if (obj == .struct_val) { + if (idx < obj.struct_val.fields.len) { + try self.push(obj.struct_val.fields[idx]); + } else { + try self.push(.{ .void_val = {} }); + } + } else if (obj == .string_val) { + // String slice: field 0 = ptr (return string itself), field 1 = len + if (idx == 1) { + try self.push(.{ .int_val = @intCast(obj.string_val.len) }); + } else { + try self.push(obj); // ptr → return string itself + } + } else { + return error.TypeError; + } + }, + .set_field => |idx| { + const val = try self.pop(); + const obj = try self.pop(); + if (obj == .struct_val) { + if (idx < obj.struct_val.fields.len) { + obj.struct_val.fields[idx] = val; + } + try self.push(obj); + } else { + return error.TypeError; + } + }, + + // Arrays + .make_array => |count| { + const elements = try self.allocator.alloc(Value, count); + var i: u32 = count; + while (i > 0) { + i -= 1; + elements[i] = try self.pop(); + } + try self.push(.{ .array_val = .{ .elements = elements } }); + }, + .get_index => { + const idx_val = try self.pop(); + const arr = try self.pop(); + if (arr == .array_val) { + const idx: usize = @intCast(idx_val.asInt() orelse return error.TypeError); + if (idx < arr.array_val.elements.len) { + try self.push(arr.array_val.elements[idx]); + } else { + return error.IndexOutOfBounds; + } + } else if (arr == .string_val) { + // String indexing: return byte as int + const idx: usize = @intCast(idx_val.asInt() orelse return error.TypeError); + if (idx < arr.string_val.len) { + try self.push(.{ .int_val = @intCast(arr.string_val[idx]) }); + } else { + return error.IndexOutOfBounds; + } + } else { + return error.TypeError; + } + }, + .set_index => { + const val = try self.pop(); + const idx_val = try self.pop(); + const arr = try self.pop(); + if (arr == .array_val) { + const idx: usize = @intCast(idx_val.asInt() orelse return error.TypeError); + if (idx < arr.array_val.elements.len) { + arr.array_val.elements[idx] = val; + } + try self.push(arr); + } else if (arr == .string_val) { + // String index assignment: mutate byte + const idx: usize = @intCast(idx_val.asInt() orelse return error.TypeError); + const byte_val: u8 = @intCast(val.asInt() orelse return error.TypeError); + if (idx < arr.string_val.len) { + const mutable = @constCast(arr.string_val); + mutable[idx] = byte_val; + } + try self.push(arr); + } else { + return error.TypeError; + } + }, + + // Strings + .concat => { + const b = try self.pop(); + const a = try self.pop(); + const sa = try a.format(self.allocator); + const sb = try b.format(self.allocator); + const result = try std.fmt.allocPrint(self.allocator, "{s}{s}", .{ sa, sb }); + try self.push(.{ .string_val = result }); + }, + .format_to_string => { + const v = try self.pop(); + const s = try v.format(self.allocator); + try self.push(.{ .string_val = s }); + }, + + // Cast + .cast => { + // TODO: implement type casting + }, + } + } + } + + const ArithOp = enum { add_op, sub_op, mul_op, div_op, mod_op }; + + fn arith(self: *VM, a: Value, b: Value, op: ArithOp) !Value { + _ = self; + // Both int + if (a == .int_val and b == .int_val) { + const ai = a.int_val; + const bi = b.int_val; + return .{ .int_val = switch (op) { + .add_op => ai + bi, + .sub_op => ai - bi, + .mul_op => ai * bi, + .div_op => if (bi != 0) @divTrunc(ai, bi) else return error.DivisionByZero, + .mod_op => if (bi != 0) @rem(ai, bi) else return error.DivisionByZero, + } }; + } + + // Both f32 + if (a == .float32_val and b == .float32_val) { + const af = a.float32_val; + const bf = b.float32_val; + return .{ .float32_val = switch (op) { + .add_op => af + bf, + .sub_op => af - bf, + .mul_op => af * bf, + .div_op => af / bf, + .mod_op => @rem(af, bf), + } }; + } + + // Promote to f64 + const af = a.asFloat() orelse return error.TypeError; + const bf = b.asFloat() orelse return error.TypeError; + return .{ .float_val = switch (op) { + .add_op => af + bf, + .sub_op => af - bf, + .mul_op => af * bf, + .div_op => af / bf, + .mod_op => @rem(af, bf), + } }; + } + + fn valEqual(self: *VM, a: Value, b: Value) bool { + _ = self; + if (a == .int_val and b == .int_val) return a.int_val == b.int_val; + if (a == .bool_val and b == .bool_val) return a.bool_val == b.bool_val; + if (a == .string_val and b == .string_val) return std.mem.eql(u8, a.string_val, b.string_val); + // Float comparison + const af = a.asFloat(); + const bf = b.asFloat(); + if (af != null and bf != null) return af.? == bf.?; + return false; + } + + fn valLess(self: *VM, a: Value, b: Value) bool { + _ = self; + if (a == .int_val and b == .int_val) return a.int_val < b.int_val; + const af = a.asFloat(); + const bf = b.asFloat(); + if (af != null and bf != null) return af.? < bf.?; + return false; + } + + fn callFunction(self: *VM, name: []const u8, arg_count: u8) !void { + // Look up chunk in cache + if (self.functions.getPtr(name)) |ptr| { + return self.invokeChunk(ptr, arg_count); + } + + // On-demand compilation: find function AST in root_decls + for (self.root_decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + if (std.mem.eql(u8, fd.name, name)) { + var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + const chunk = try compiler.compileFunction(fd); + try self.functions.put(name, chunk); + if (self.functions.getPtr(name)) |ptr| { + return self.invokeChunk(ptr, arg_count); + } + } + }, + .namespace_decl => |ns| { + for (ns.decls) |d| { + if (d.data == .fn_decl) { + if (std.mem.eql(u8, d.data.fn_decl.name, name)) { + var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + const chunk = try compiler.compileFunction(d.data.fn_decl); + try self.functions.put(name, chunk); + if (self.functions.getPtr(name)) |ptr| { + return self.invokeChunk(ptr, arg_count); + } + } + } + } + }, + else => {}, + } + } + + return error.UndefinedFunction; + } + + fn invokeChunk(self: *VM, chunk: *const Chunk, arg_count: u8) !void { + if (self.fp >= 64) return error.StackOverflow; + + // Args are on the stack. Set up new frame. + const base = self.sp - @as(u16, arg_count); + self.frames[self.fp] = .{ .chunk = chunk, .ip = 0, .base_slot = base }; + self.fp += 1; + } + + fn callBuiltin(self: *VM, id: BuiltinId, arg_count: u8) !void { + switch (id) { + .write => { + // write(str) — raw string output + if (arg_count >= 1) { + const val = try self.pop(); + const str = try val.format(self.allocator); + std.debug.print("{s}", .{str}); + } + try self.push(.{ .void_val = {} }); + }, + .print => { + // print(fmt, args...) — positional {} formatting + if (arg_count == 0) { + try self.push(.{ .void_val = {} }); + } else if (arg_count == 1) { + // Single arg: just print it + const val = try self.pop(); + const str = try val.format(self.allocator); + std.debug.print("{s}", .{str}); + try self.push(.{ .void_val = {} }); + } else { + // Pop args in reverse order (stack is LIFO) + var vals = std.ArrayList(Value).empty; + var j: u8 = 0; + while (j < arg_count) : (j += 1) { + try vals.append(self.allocator, try self.pop()); + } + // vals[0] is last arg, vals[arg_count-1] is first (format string) + const fmt_val = vals.items[arg_count - 1]; + const fmt_str = try fmt_val.format(self.allocator); + + // Process format string with {} placeholders + var out = std.ArrayList(u8).empty; + var arg_idx: usize = 0; + var fi: usize = 0; + while (fi < fmt_str.len) { + if (fi + 1 < fmt_str.len and fmt_str[fi] == '{' and fmt_str[fi + 1] == '}') { + if (arg_idx < arg_count - 1) { + // vals are in reverse: vals[arg_count-2] is first value arg, vals[0] is last + const val_idx = arg_count - 2 - arg_idx; + const formatted = try vals.items[val_idx].format(self.allocator); + try out.appendSlice(self.allocator, formatted); + arg_idx += 1; + } + fi += 2; + } else if (fi + 1 < fmt_str.len and fmt_str[fi] == '{' and fmt_str[fi + 1] == '{') { + try out.append(self.allocator, '{'); + fi += 2; + } else if (fi + 1 < fmt_str.len and fmt_str[fi] == '}' and fmt_str[fi + 1] == '}') { + try out.append(self.allocator, '}'); + fi += 2; + } else { + try out.append(self.allocator, fmt_str[fi]); + fi += 1; + } + } + std.debug.print("{s}", .{out.items}); + try self.push(.{ .void_val = {} }); + } + }, + .sqrt => { + if (arg_count >= 1) { + const val = try self.pop(); + const f = val.asFloat() orelse return error.TypeError; + try self.push(.{ .float_val = @sqrt(f) }); + } else { + try self.push(.{ .float_val = 0.0 }); + } + }, + .size_of => { + if (arg_count >= 1) { + const val = try self.pop(); + if (val == .type_val) { + const size = sizeOfType(val.type_val, self.codegen); + try self.push(.{ .int_val = @intCast(size) }); + } else { + try self.push(.{ .int_val = 0 }); + } + } else { + try self.push(.{ .int_val = 0 }); + } + }, + .cast => { + // cast(Type, val) — explicit type conversion + if (arg_count >= 2) { + const val = try self.pop(); // second arg (value) + const type_arg = try self.pop(); // first arg (type) + const target_ty: Type = if (type_arg == .type_val) type_arg.type_val else .void_type; + // Convert based on target type + if (target_ty.isFloat()) { + // Target is float — convert from int or other float + switch (val) { + .int_val => |v| try self.push(.{ .float_val = @floatFromInt(v) }), + .float32_val => |v| try self.push(.{ .float_val = @as(f64, v) }), + .float_val => try self.push(val), + else => try self.push(val), + } + } else if (target_ty.isInt()) { + // Target is int — convert from float + switch (val) { + .float_val => |v| try self.push(.{ .int_val = @intFromFloat(v) }), + .float32_val => |v| try self.push(.{ .int_val = @intFromFloat(v) }), + .int_val => try self.push(val), + else => try self.push(val), + } + } else { + try self.push(val); // pass through + } + } else { + try self.push(.{ .int_val = 0 }); + } + }, + .alloc => { + // alloc(size) — allocate zeroed byte buffer, return as string + if (arg_count >= 1) { + const val = try self.pop(); + const size: usize = if (val.asInt()) |v| @intCast(@max(0, v)) else 0; + const buf = try self.allocator.alloc(u8, size); + @memset(buf, 0); + try self.push(.{ .string_val = buf }); + } else { + try self.push(.{ .string_val = "" }); + } + }, + } + } + + /// Resolve a global variable by name. Checks the globals cache first, + /// then searches root_decls for matching const_decl/var_decl and evaluates. + const VMError = error{ + CompileError, + UndefinedVariable, + UndefinedFunction, + InvalidGlobal, + InvalidCallee, + TypeError, + StackOverflow, + StackUnderflow, + IndexOutOfBounds, + DivisionByZero, + UnsupportedExpression, + OutOfMemory, + }; + + /// Evaluate a chunk in a fresh VM to avoid corrupting this VM's state. + fn evalInFreshVM(self: *VM, chunk: *const Chunk) VMError!Value { + var nested_vm = VM.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + // Share the globals cache so nested evaluations see already-resolved globals + nested_vm.globals = self.globals; + const result = nested_vm.execute(chunk) catch return error.CompileError; + // Copy back any new globals that were resolved during nested evaluation + self.globals = nested_vm.globals; + return result; + } + + fn resolveGlobal(self: *VM, name: []const u8) VMError!Value { + // Check cache first + if (self.globals.get(name)) |val| return val; + + // Search root_decls for matching declaration + for (self.root_decls) |decl| { + switch (decl.data) { + .const_decl => |cd| { + if (std.mem.eql(u8, cd.name, name)) { + var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + const chunk = compiler.compile(cd.value) catch return error.CompileError; + const result = self.evalInFreshVM(&chunk) catch return error.CompileError; + try self.globals.put(name, result); + return result; + } + }, + .var_decl => |vd| { + if (std.mem.eql(u8, vd.name, name)) { + if (vd.value) |val_expr| { + var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + const chunk = compiler.compile(val_expr) catch return error.CompileError; + const result = self.evalInFreshVM(&chunk) catch return error.CompileError; + try self.globals.put(name, result); + return result; + } + return .{ .void_val = {} }; + } + }, + .namespace_decl => |ns| { + // Check inside namespace for matching declarations + for (ns.decls) |d| { + if (d.data == .const_decl and std.mem.eql(u8, d.data.const_decl.name, name)) { + var compiler = Compiler.init(self.allocator, self.sema_result, self.root_decls, self.codegen); + const chunk = compiler.compile(d.data.const_decl.value) catch return error.CompileError; + const result = self.evalInFreshVM(&chunk) catch return error.CompileError; + try self.globals.put(name, result); + return result; + } + } + }, + else => {}, + } + } + + // Check for struct/enum/union type declarations + for (self.root_decls) |decl| { + switch (decl.data) { + .struct_decl => |sd| { + if (std.mem.eql(u8, sd.name, name)) { + const val = Value{ .type_val = .{ .struct_type = name } }; + try self.globals.put(name, val); + return val; + } + }, + .enum_decl => |ed| { + if (std.mem.eql(u8, ed.name, name)) { + const val = Value{ .type_val = .{ .enum_type = name } }; + try self.globals.put(name, val); + return val; + } + }, + .union_decl => |ud| { + if (std.mem.eql(u8, ud.name, name)) { + const val = Value{ .type_val = .{ .union_type = name } }; + try self.globals.put(name, val); + return val; + } + }, + else => {}, + } + } + + // Check if it's a primitive type name (s32, f64, bool, etc.) + if (Type.fromName(name)) |ty| { + const val = Value{ .type_val = ty }; + try self.globals.put(name, val); + return val; + } + + return error.UndefinedVariable; + } +}; + +test "Value: basic operations" { + const a = Value{ .int_val = 42 }; + try std.testing.expect(a.isInt()); + try std.testing.expect(!a.isFloat()); + try std.testing.expectEqual(@as(i64, 42), a.asInt().?); + try std.testing.expectEqual(@as(f64, 42.0), a.asFloat().?); + + const b = Value{ .float_val = 3.14 }; + try std.testing.expect(!b.isInt()); + try std.testing.expect(b.isFloat()); + try std.testing.expectEqual(@as(f64, 3.14), b.asFloat().?); + + const c = Value{ .bool_val = true }; + try std.testing.expectEqual(@as(i64, 1), c.asInt().?); +} + +const parser_mod = @import("parser.zig"); + +fn compileAndRun(source: [:0]const u8) !Value { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var p = parser_mod.Parser.init(alloc, source); + const expr = try p.parseExpr(); + + var compiler = Compiler.init(alloc, null, &.{}, null); + const chunk = try compiler.compile(expr); + + var vm = VM.init(alloc, null, &.{}, null); + return vm.execute(&chunk); +} + +test "VM: 2 + 3 = 5" { + const result = try compileAndRun("2 + 3"); + try std.testing.expectEqual(@as(i64, 5), result.int_val); +} + +test "VM: arithmetic operations" { + // subtraction + const r1 = try compileAndRun("10 - 3"); + try std.testing.expectEqual(@as(i64, 7), r1.int_val); + + // multiplication + const r2 = try compileAndRun("6 * 7"); + try std.testing.expectEqual(@as(i64, 42), r2.int_val); + + // division + const r3 = try compileAndRun("20 / 4"); + try std.testing.expectEqual(@as(i64, 5), r3.int_val); + + // negation + const r4 = try compileAndRun("-42"); + try std.testing.expectEqual(@as(i64, -42), r4.int_val); +} + +test "VM: comparison operations" { + const r1 = try compileAndRun("3 == 3"); + try std.testing.expectEqual(true, r1.bool_val); + + const r2 = try compileAndRun("3 != 4"); + try std.testing.expectEqual(true, r2.bool_val); + + const r3 = try compileAndRun("2 < 5"); + try std.testing.expectEqual(true, r3.bool_val); + + const r4 = try compileAndRun("5 > 2"); + try std.testing.expectEqual(true, r4.bool_val); +} + +test "VM: boolean literals" { + const r1 = try compileAndRun("true"); + try std.testing.expectEqual(true, r1.bool_val); + + const r2 = try compileAndRun("false"); + try std.testing.expectEqual(false, r2.bool_val); + + const r3 = try compileAndRun("!false"); + try std.testing.expectEqual(true, r3.bool_val); +} + +test "VM: float arithmetic" { + const r1 = try compileAndRun("1.5 + 2.5"); + try std.testing.expectEqual(@as(f64, 4.0), r1.float_val); + + const r2 = try compileAndRun("3.0 * 2.0"); + try std.testing.expectEqual(@as(f64, 6.0), r2.float_val); +} + +test "VM: if expression" { + const r1 = try compileAndRun("if true then 1 else 2"); + try std.testing.expectEqual(@as(i64, 1), r1.int_val); + + const r2 = try compileAndRun("if false then 1 else 2"); + try std.testing.expectEqual(@as(i64, 2), r2.int_val); +} + +test "VM: block with variables" { + // Parse a block expression: { x := 5; y := x + 3; y; } + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Parse a block as a statement sequence + var p = parser_mod.Parser.init(alloc, "{ x := 5; y := x + 3; y; }"); + const expr = try p.parseExpr(); + + var compiler = Compiler.init(alloc, null, &.{}, null); + const chunk = try compiler.compile(expr); + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqual(@as(i64, 8), result.int_val); +} + +test "VM: nested if with variables" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var p = parser_mod.Parser.init(alloc, "{ x := 10; if x > 5 then 1 else 0; }"); + const expr = try p.parseExpr(); + + var compiler = Compiler.init(alloc, null, &.{}, null); + const chunk = try compiler.compile(expr); + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqual(@as(i64, 1), result.int_val); +} + +/// Helper to compile and run a full program, executing a specific expression +/// after all declarations are registered. +fn compileAndRunProgram(source: [:0]const u8, expr_source: [:0]const u8) !Value { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Parse the full program to get root decls + var prog_parser = parser_mod.Parser.init(alloc, source); + const root = try prog_parser.parse(); + const decls = root.data.root.decls; + + // Parse the expression to evaluate + var expr_parser = parser_mod.Parser.init(alloc, expr_source); + const expr = try expr_parser.parseExpr(); + + var compiler = Compiler.init(alloc, null, decls, null); + const chunk = try compiler.compile(expr); + + var vm = VM.init(alloc, null, decls, null); + return vm.execute(&chunk); +} + +test "VM: function call" { + const result = try compileAndRunProgram( + "add :: (a: s32, b: s32) -> s32 { a + b; }", + "add(2, 3)", + ); + try std.testing.expectEqual(@as(i64, 5), result.int_val); +} + +test "VM: nested function calls" { + const result = try compileAndRunProgram( + "double :: (x: s32) -> s32 { x * 2; } quad :: (x: s32) -> s32 { double(double(x)); }", + "quad(3)", + ); + try std.testing.expectEqual(@as(i64, 12), result.int_val); +} + +test "VM: match expression" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Match on integer value + var p = parser_mod.Parser.init(alloc, "{ x := 2; if x == { case 1: 10; case 2: 20; case 3: 30; } }"); + const expr = try p.parseExpr(); + + var compiler = Compiler.init(alloc, null, &.{}, null); + const chunk = try compiler.compile(expr); + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqual(@as(i64, 20), result.int_val); +} + +test "VM: builtin sqrt" { + const result = try compileAndRun("sqrt(16.0)"); + try std.testing.expectEqual(@as(f64, 4.0), result.float_val); +} + +test "VM: struct literal and field access" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Manually build a chunk that creates a struct and reads field 1 + const code = [_]Instruction{ + .{ .push_int = 10 }, // field 0: x + .{ .push_int = 20 }, // field 1: y + .{ .make_struct = .{ .type_name = "Point", .field_count = 2, .field_names = &.{ "x", "y" } } }, + .{ .get_field = 1 }, // get y + }; + const chunk = Chunk{ + .code = &code, + .strings = &.{}, + .local_count = 0, + .name = "", + }; + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqual(@as(i64, 20), result.int_val); +} + +test "VM: array literal and index" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Manually build: make_array([10, 20, 30]), get_index(1) + const code = [_]Instruction{ + .{ .push_int = 10 }, + .{ .push_int = 20 }, + .{ .push_int = 30 }, + .{ .make_array = 3 }, + .{ .push_int = 1 }, // index + .get_index, + }; + const chunk = Chunk{ + .code = &code, + .strings = &.{}, + .local_count = 0, + .name = "", + }; + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqual(@as(i64, 20), result.int_val); +} + +test "VM: string concat" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const strings = [_][]const u8{ "hello ", "world" }; + const code = [_]Instruction{ + .{ .push_string = 0 }, + .{ .push_string = 1 }, + .concat, + }; + const chunk = Chunk{ + .code = &code, + .strings = &strings, + .local_count = 0, + .name = "", + }; + + var vm = VM.init(alloc, null, &.{}, null); + const result = try vm.execute(&chunk); + try std.testing.expectEqualStrings("hello world", result.string_val); +} + +test "VM: type value" { + const result = try compileAndRun("f64"); + try std.testing.expect(result == .type_val); +} + +test "VM: function with return statement" { + const result = try compileAndRunProgram( + "compute :: (x: s32) -> s32 { return x * x; }", + "compute(6)", + ); + try std.testing.expectEqual(@as(i64, 36), result.int_val); +} diff --git a/src/core.zig b/src/core.zig new file mode 100644 index 0000000..edd6756 --- /dev/null +++ b/src/core.zig @@ -0,0 +1,110 @@ +const std = @import("std"); +const ast = @import("ast.zig"); +const parser = @import("parser.zig"); +const imports = @import("imports.zig"); +const sema = @import("sema.zig"); +const codegen = @import("codegen.zig"); +const errors = @import("errors.zig"); +const Node = ast.Node; + +pub const Compilation = struct { + allocator: std.mem.Allocator, + io: std.Io, + file_path: []const u8, + source: [:0]const u8, + diagnostics: errors.DiagnosticList, + + // Pipeline results + root: ?*Node = null, + resolved_root: ?*Node = null, + import_sources: std.StringHashMap([:0]const u8), + sema_result: ?sema.SemaResult = null, + cg: ?codegen.CodeGen = null, + + pub fn init(allocator: std.mem.Allocator, io: std.Io, file_path: []const u8, source: [:0]const u8) Compilation { + return .{ + .allocator = allocator, + .io = io, + .file_path = file_path, + .source = source, + .diagnostics = errors.DiagnosticList.init(allocator, source, file_path), + .import_sources = std.StringHashMap([:0]const u8).init(allocator), + }; + } + + pub fn deinit(self: *Compilation) void { + if (self.cg) |*cg| cg.deinit(); + self.diagnostics.deinit(); + } + + pub fn parse(self: *Compilation) !void { + var p = parser.Parser.init(self.allocator, self.source); + p.diagnostics = &self.diagnostics; + self.root = p.parse() catch return error.CompileError; + } + + pub fn resolveImports(self: *Compilation) !void { + const root = self.root orelse return error.CompileError; + var chain = std.StringHashMap(void).init(self.allocator); + var cache = imports.ModuleCache.init(self.allocator); + const base_dir = imports.dirName(self.file_path); + const mod = imports.resolveImports( + self.allocator, + self.io, + root, + base_dir, + self.file_path, + &chain, + &cache, + &self.import_sources, + &self.diagnostics, + ) catch return error.CompileError; + + // Build a root node from the resolved module's decls + const new_root = try self.allocator.create(Node); + new_root.* = .{ + .span = root.span, + .data = .{ .root = .{ .decls = mod.decls } }, + }; + self.resolved_root = new_root; + } + + pub fn analyze(self: *Compilation) !void { + const root = self.resolved_root orelse self.root orelse return error.CompileError; + var analyzer = sema.Analyzer.init(self.allocator); + self.sema_result = analyzer.analyze(root) catch return error.CompileError; + // Merge sema diagnostics into our list + if (self.sema_result) |sr| { + for (sr.diagnostics) |d| { + self.diagnostics.add(d.level, d.message, d.span); + } + } + } + + pub fn generateCode(self: *Compilation) !void { + const root = self.resolved_root orelse self.root orelse return error.CompileError; + var cg = codegen.CodeGen.init(self.allocator, "sx_module"); + cg.diagnostics = &self.diagnostics; + if (self.sema_result) |*sr| { + cg.sema_result = sr; + } + cg.generate(root) catch return error.CompileError; + self.cg = cg; + } + + pub fn renderErrors(self: *const Compilation) void { + for (self.diagnostics.items.items) |d| { + const level_str = switch (d.level) { + .err => "error", + .warn => "warning", + .note => "note", + }; + if (d.span) |span| { + const loc = errors.SourceLoc.compute(self.source, span.start); + std.debug.print("{s}:{d}:{d}: {s}: {s}\n", .{ self.file_path, loc.line, loc.col, level_str, d.message }); + } else { + std.debug.print("{s}: {s}: {s}\n", .{ self.file_path, level_str, d.message }); + } + } + } +}; diff --git a/src/errors.zig b/src/errors.zig new file mode 100644 index 0000000..546a9ae --- /dev/null +++ b/src/errors.zig @@ -0,0 +1,96 @@ +const std = @import("std"); +const Span = @import("ast.zig").Span; + +pub const Level = enum { + err, + warn, + note, +}; + +pub const SourceLoc = struct { + line: u32, + col: u32, + + pub fn compute(source: []const u8, byte_offset: u32) SourceLoc { + var line: u32 = 1; + var col: u32 = 1; + for (source[0..byte_offset]) |c| { + if (c == '\n') { + line += 1; + col = 1; + } else { + col += 1; + } + } + return .{ .line = line, .col = col }; + } +}; + +pub const Diagnostic = struct { + level: Level, + message: []const u8, + span: ?Span, +}; + +pub const DiagnosticList = struct { + items: std.ArrayList(Diagnostic) = .empty, + allocator: std.mem.Allocator, + source: []const u8, + file_name: []const u8, + + pub fn init(allocator: std.mem.Allocator, source: []const u8, file_name: []const u8) DiagnosticList { + return .{ + .allocator = allocator, + .source = source, + .file_name = file_name, + }; + } + + pub fn deinit(self: *DiagnosticList) void { + self.items.deinit(self.allocator); + } + + pub fn add(self: *DiagnosticList, level: Level, message: []const u8, span: ?Span) void { + // Deduplicate: skip if same level+span+message already exists + for (self.items.items) |d| { + if (d.level == level and std.mem.eql(u8, d.message, message)) { + const a = d.span orelse continue; + const b = span orelse continue; + if (a.start == b.start and a.end == b.end) return; + } + } + self.items.append(self.allocator, .{ + .level = level, + .message = message, + .span = span, + }) catch {}; + } + + pub fn addFmt(self: *DiagnosticList, level: Level, span: ?Span, comptime fmt: []const u8, args: anytype) void { + const message = std.fmt.allocPrint(self.allocator, fmt, args) catch "diagnostic format error"; + self.add(level, message, span); + } + + pub fn hasErrors(self: *const DiagnosticList) bool { + for (self.items.items) |d| { + if (d.level == .err) return true; + } + return false; + } + + pub fn render(self: *const DiagnosticList, writer: anytype) !void { + for (self.items.items) |d| { + const level_str = switch (d.level) { + .err => "error", + .warn => "warning", + .note => "note", + }; + if (d.span) |span| { + const loc = SourceLoc.compute(self.source, span.start); + try writer.print("{s}:{d}:{d}: {s}: {s}\n", .{ self.file_name, loc.line, loc.col, level_str, d.message }); + } else { + try writer.print("{s}: {s}: {s}\n", .{ self.file_name, level_str, d.message }); + } + } + } +}; diff --git a/src/imports.zig b/src/imports.zig new file mode 100644 index 0000000..3c444d7 --- /dev/null +++ b/src/imports.zig @@ -0,0 +1,150 @@ +const std = @import("std"); +const ast = @import("ast.zig"); +const parser = @import("parser.zig"); +const errors = @import("errors.zig"); +const Node = ast.Node; + +pub fn dirName(path: []const u8) []const u8 { + var last_sep: usize = 0; + var found = false; + for (path, 0..) |ch, i| { + if (ch == '/') { + last_sep = i; + found = true; + } + } + return if (found) path[0..last_sep] else "."; +} + +/// A resolved module: the fully-resolved declarations of a single .sx file, +/// with its own scope tracking which names are defined. +pub const ResolvedModule = struct { + path: []const u8, + decls: []const *Node, + scope: std.StringHashMap(void), + + /// Try to add a declaration. Returns true if added, false if name already in scope. + pub fn addDecl(self: *ResolvedModule, allocator: std.mem.Allocator, list: *std.ArrayList(*Node), decl: *Node) !bool { + if (decl.data.declName()) |name| { + if (self.scope.contains(name)) return false; + try self.scope.put(name, {}); + } + try list.append(allocator, decl); + return true; + } + + /// Merge another module's decls as flat imports (skipping duplicates). + pub fn mergeFlat(self: *ResolvedModule, allocator: std.mem.Allocator, list: *std.ArrayList(*Node), other: ResolvedModule) !void { + for (other.decls) |decl| { + _ = try self.addDecl(allocator, list, decl); + } + } + + /// Add another module as a namespaced import. + pub fn addNamespace(self: *ResolvedModule, allocator: std.mem.Allocator, list: *std.ArrayList(*Node), name: []const u8, other: ResolvedModule, span: ast.Span) !void { + const ns_node = try allocator.create(Node); + ns_node.* = .{ + .span = span, + .data = .{ .namespace_decl = .{ + .name = name, + .decls = other.decls, + } }, + }; + try self.scope.put(name, {}); + try list.append(allocator, ns_node); + } + + pub fn finalize(self: *ResolvedModule, allocator: std.mem.Allocator, list: *std.ArrayList(*Node)) !void { + self.decls = try list.toOwnedSlice(allocator); + } +}; + +/// Module cache: maps resolved file paths to their ResolvedModules. +pub const ModuleCache = std.StringHashMap(ResolvedModule); + +pub fn resolveImports( + allocator: std.mem.Allocator, + io: std.Io, + root: *Node, + base_dir: []const u8, + file_path: []const u8, + chain: *std.StringHashMap(void), + cache: *ModuleCache, + source_map: ?*std.StringHashMap([:0]const u8), + diagnostics: ?*errors.DiagnosticList, +) !ResolvedModule { + var mod = ResolvedModule{ + .path = file_path, + .decls = &.{}, + .scope = std.StringHashMap(void).init(allocator), + }; + + if (root.data != .root) { + mod.decls = &.{}; + return mod; + } + + var decl_list = std.ArrayList(*Node).empty; + + for (root.data.root.decls) |decl| { + if (decl.data != .import_decl) { + _ = try mod.addDecl(allocator, &decl_list, decl); + continue; + } + const imp = decl.data.import_decl; + + // Resolve path relative to base_dir + const resolved_path = if (std.mem.eql(u8, base_dir, ".")) + imp.path + else + try std.fmt.allocPrint(allocator, "{s}/{s}", .{ base_dir, imp.path }); + + // Circular import check — only along the current chain + if (chain.contains(resolved_path)) continue; + + // Resolve or retrieve the imported module + const imported_mod = if (cache.get(resolved_path)) |cached| + cached + else blk: { + // Read imported file + const imp_bytes = std.Io.Dir.readFileAlloc(.cwd(), io, resolved_path, allocator, .limited(10 * 1024 * 1024)) catch { + if (diagnostics) |diags| { + diags.addFmt(.err, decl.span, "cannot read import '{s}'", .{resolved_path}); + } + return error.ImportError; + }; + const imp_source = try allocator.dupeZ(u8, imp_bytes); + + if (source_map) |sm| { + sm.put(resolved_path, imp_source) catch {}; + } + + var p = parser.Parser.init(allocator, imp_source); + const imp_root = p.parse() catch { + if (diagnostics) |diags| { + diags.addFmt(.err, decl.span, "parse error in '{s}': {s}", .{ resolved_path, p.err_msg orelse "unknown" }); + } + return error.ImportError; + }; + + // Push onto chain before recursing, pop after + try chain.put(resolved_path, {}); + const imp_dir = dirName(resolved_path); + const result = try resolveImports(allocator, io, imp_root, imp_dir, resolved_path, chain, cache, source_map, diagnostics); + _ = chain.remove(resolved_path); + + // Cache + try cache.put(resolved_path, result); + break :blk result; + }; + + if (imp.name) |ns_name| { + try mod.addNamespace(allocator, &decl_list, ns_name, imported_mod, decl.span); + } else { + try mod.mergeFlat(allocator, &decl_list, imported_mod); + } + } + + try mod.finalize(allocator, &decl_list); + return mod; +} diff --git a/src/lexer.zig b/src/lexer.zig new file mode 100644 index 0000000..0c16247 --- /dev/null +++ b/src/lexer.zig @@ -0,0 +1,403 @@ +const std = @import("std"); +const Token = @import("token.zig").Token; +const Tag = @import("token.zig").Tag; +const getKeyword = @import("token.zig").getKeyword; + +pub const Lexer = struct { + source: [:0]const u8, + index: u32, + + pub fn init(source: [:0]const u8) Lexer { + return .{ .source = source, .index = 0 }; + } + + pub fn next(self: *Lexer) Token { + // Skip whitespace and comments + while (true) { + if (self.index >= self.source.len) { + return self.makeToken(.eof, self.index, self.index); + } + const c = self.source[self.index]; + if (c == ' ' or c == '\t' or c == '\n' or c == '\r') { + self.index += 1; + continue; + } + // Line comments + if (c == '/' and self.index + 1 < self.source.len and self.source[self.index + 1] == '/') { + while (self.index < self.source.len and self.source[self.index] != '\n') { + self.index += 1; + } + continue; + } + break; + } + + const start = self.index; + const c = self.source[start]; + + // Integer / float literals + if (isDigit(c)) { + return self.lexNumber(start); + } + + // Identifiers and keywords + if (isIdentStart(c)) { + return self.lexIdentifier(start); + } + + // String literals + if (c == '"') { + return self.lexString(start); + } + + // Directives: #import, #insert, #run + if (c == '#') { + if (self.source.len >= start + 7 and std.mem.eql(u8, self.source[start .. start + 7], "#import") and + (start + 7 >= self.source.len or !isIdentContinue(self.source[start + 7]))) + { + self.index = start + 7; + return self.makeToken(.hash_import, start, self.index); + } + if (self.source.len >= start + 7 and std.mem.eql(u8, self.source[start .. start + 7], "#insert") and + (start + 7 >= self.source.len or !isIdentContinue(self.source[start + 7]))) + { + self.index = start + 7; + return self.makeToken(.hash_insert, start, self.index); + } + if (self.source.len >= start + 4 and std.mem.eql(u8, self.source[start .. start + 4], "#run") and + (start + 4 >= self.source.len or !isIdentContinue(self.source[start + 4]))) + { + self.index = start + 4; + return self.makeToken(.hash_run, start, self.index); + } + if (self.source.len >= start + 8 and std.mem.eql(u8, self.source[start .. start + 8], "#builtin") and + (start + 8 >= self.source.len or !isIdentContinue(self.source[start + 8]))) + { + self.index = start + 8; + return self.makeToken(.hash_builtin, start, self.index); + } + self.index += 1; + return self.makeToken(.invalid, start, self.index); + } + + // Punctuation and operators + self.index += 1; + switch (c) { + ';' => return self.makeToken(.semicolon, start, self.index), + ',' => return self.makeToken(.comma, start, self.index), + '(' => return self.makeToken(.l_paren, start, self.index), + ')' => return self.makeToken(.r_paren, start, self.index), + '{' => return self.makeToken(.l_brace, start, self.index), + '}' => return self.makeToken(.r_brace, start, self.index), + '[' => return self.makeToken(.l_bracket, start, self.index), + ']' => return self.makeToken(.r_bracket, start, self.index), + '.' => { + if (self.peek() == '.') { + self.index += 1; + return self.makeToken(.dot_dot, start, self.index); + } + return self.makeToken(.dot, start, self.index); + }, + '$' => return self.makeToken(.dollar, start, self.index), + ':' => { + if (self.peek() == ':') { + self.index += 1; + return self.makeToken(.colon_colon, start, self.index); + } + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.colon_equal, start, self.index); + } + return self.makeToken(.colon, start, self.index); + }, + '=' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.equal_equal, start, self.index); + } + if (self.peek() == '>') { + self.index += 1; + return self.makeToken(.fat_arrow, start, self.index); + } + return self.makeToken(.equal, start, self.index); + }, + '+' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.plus_equal, start, self.index); + } + return self.makeToken(.plus, start, self.index); + }, + '-' => { + if (self.peek() == '-' and (self.index + 1) < self.source.len and self.source[self.index + 1] == '-') { + self.index += 2; + return self.makeToken(.triple_minus, start, self.index); + } + if (self.peek() == '>') { + self.index += 1; + return self.makeToken(.arrow, start, self.index); + } + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.minus_equal, start, self.index); + } + return self.makeToken(.minus, start, self.index); + }, + '*' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.star_equal, start, self.index); + } + return self.makeToken(.star, start, self.index); + }, + '/' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.slash_equal, start, self.index); + } + return self.makeToken(.slash, start, self.index); + }, + '%' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.percent_equal, start, self.index); + } + return self.makeToken(.percent, start, self.index); + }, + '!' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.bang_equal, start, self.index); + } + return self.makeToken(.bang, start, self.index); + }, + '<' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.less_equal, start, self.index); + } + return self.makeToken(.less, start, self.index); + }, + '>' => { + if (self.peek() == '=') { + self.index += 1; + return self.makeToken(.greater_equal, start, self.index); + } + return self.makeToken(.greater, start, self.index); + }, + else => return self.makeToken(.invalid, start, self.index), + } + } + + fn lexNumber(self: *Lexer, start: u32) Token { + // Advance past the initial digit that was already matched + self.index += 1; + + // Check for hex (0x/0X) or binary (0b/0B) prefix + if (self.source[start] == '0' and self.index < self.source.len) { + const prefix = self.source[self.index]; + if (prefix == 'x' or prefix == 'X') { + self.index += 1; // skip 'x'/'X' + while (self.index < self.source.len and isHexDigit(self.source[self.index])) { + self.index += 1; + } + return self.makeToken(.int_literal, start, self.index); + } + if (prefix == 'b' or prefix == 'B') { + self.index += 1; // skip 'b'/'B' + while (self.index < self.source.len and (self.source[self.index] == '0' or self.source[self.index] == '1')) { + self.index += 1; + } + return self.makeToken(.int_literal, start, self.index); + } + } + + while (self.index < self.source.len and isDigit(self.source[self.index])) { + self.index += 1; + } + // Check for float + if (self.index < self.source.len and self.source[self.index] == '.') { + // Look ahead: must be followed by a digit (not `.identifier`) + if (self.index + 1 < self.source.len and isDigit(self.source[self.index + 1])) { + self.index += 1; // skip '.' + while (self.index < self.source.len and isDigit(self.source[self.index])) { + self.index += 1; + } + return self.makeToken(.float_literal, start, self.index); + } + } + return self.makeToken(.int_literal, start, self.index); + } + + fn lexIdentifier(self: *Lexer, start: u32) Token { + while (self.index < self.source.len and isIdentContinue(self.source[self.index])) { + self.index += 1; + } + const text = self.source[start..self.index]; + if (getKeyword(text)) |kw| { + return self.makeToken(kw, start, self.index); + } + return self.makeToken(.identifier, start, self.index); + } + + fn lexString(self: *Lexer, start: u32) Token { + self.index += 1; // skip opening " + while (self.index < self.source.len) { + const ch = self.source[self.index]; + if (ch == '"') { + self.index += 1; + return self.makeToken(.string_literal, start, self.index); + } + if (ch == '\\') { + self.index += 1; // skip escape + } + self.index += 1; + } + // Unterminated string + return self.makeToken(.invalid, start, self.index); + } + + fn peek(self: *const Lexer) u8 { + if (self.index < self.source.len) { + return self.source[self.index]; + } + return 0; + } + + fn makeToken(_: *const Lexer, tag: Tag, start: u32, end: u32) Token { + return .{ .tag = tag, .loc = .{ .start = start, .end = end } }; + } + + fn isDigit(c: u8) bool { + return c >= '0' and c <= '9'; + } + + fn isIdentStart(c: u8) bool { + return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; + } + + fn isHexDigit(c: u8) bool { + return isDigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F'); + } + + fn isIdentContinue(c: u8) bool { + return isIdentStart(c) or isDigit(c); + } +}; + +test "lex minimal main" { + var lex = Lexer.init("main :: () { 42; }"); + const expected = [_]Tag{ .identifier, .colon_colon, .l_paren, .r_paren, .l_brace, .int_literal, .semicolon, .r_brace, .eof }; + for (expected) |exp| { + const tok = lex.next(); + try std.testing.expectEqual(exp, tok.tag); + } +} + +test "lex with comments" { + var lex = Lexer.init("// comment\nmain :: () { 0; }"); + try std.testing.expectEqual(Tag.identifier, lex.next().tag); + try std.testing.expectEqual(Tag.colon_colon, lex.next().tag); +} + +test "lex operators" { + var lex = Lexer.init(":= : :: += -= *= /= -> => == != <= >="); + const expected = [_]Tag{ + .colon_equal, .colon, .colon_colon, .plus_equal, .minus_equal, + .star_equal, .slash_equal, .arrow, .fat_arrow, .equal_equal, + .bang_equal, .less_equal, .greater_equal, + }; + for (expected) |exp| { + try std.testing.expectEqual(exp, lex.next().tag); + } +} + +test "lex float" { + var lex = Lexer.init("0.3 42 0.9"); + try std.testing.expectEqual(Tag.float_literal, lex.next().tag); + try std.testing.expectEqual(Tag.int_literal, lex.next().tag); + try std.testing.expectEqual(Tag.float_literal, lex.next().tag); +} + +test "lex keywords" { + var lex = Lexer.init("if else then true false enum case break return f32 f64 struct"); + const expected = [_]Tag{ + .kw_if, .kw_else, .kw_then, .kw_true, .kw_false, + .kw_enum, .kw_case, .kw_break, .kw_return, .kw_f32, .kw_f64, .kw_struct, + }; + for (expected) |exp| { + try std.testing.expectEqual(exp, lex.next().tag); + } +} + +test "lex type-like identifiers" { + // s32, u8, bool, string are identifiers, not keywords + var lex = Lexer.init("s32 u8 bool string"); + for (0..4) |_| { + try std.testing.expectEqual(Tag.identifier, lex.next().tag); + } +} + +test "lex hash_run" { + var lex = Lexer.init("#run"); + try std.testing.expectEqual(Tag.hash_run, lex.next().tag); + try std.testing.expectEqual(Tag.eof, lex.next().tag); + + // #run followed by identifier + var lex2 = Lexer.init("#run compute(5)"); + try std.testing.expectEqual(Tag.hash_run, lex2.next().tag); + try std.testing.expectEqual(Tag.identifier, lex2.next().tag); + + // #running should not match (identContinue after "run") + var lex3 = Lexer.init("#running"); + try std.testing.expectEqual(Tag.invalid, lex3.next().tag); +} + +test "lex hash_import" { + var lex = Lexer.init("#import \"foo.sx\""); + try std.testing.expectEqual(Tag.hash_import, lex.next().tag); + try std.testing.expectEqual(Tag.string_literal, lex.next().tag); + try std.testing.expectEqual(Tag.eof, lex.next().tag); + + // #importing should not match + var lex2 = Lexer.init("#importing"); + try std.testing.expectEqual(Tag.invalid, lex2.next().tag); +} + +test "lex hash_insert" { + var lex = Lexer.init("#insert #run generate()"); + try std.testing.expectEqual(Tag.hash_insert, lex.next().tag); + try std.testing.expectEqual(Tag.hash_run, lex.next().tag); + try std.testing.expectEqual(Tag.identifier, lex.next().tag); + + // #inserting should not match + var lex2 = Lexer.init("#inserting"); + try std.testing.expectEqual(Tag.invalid, lex2.next().tag); +} + +test "lex string" { + var lex = Lexer.init("\"Hello\""); + const tok = lex.next(); + try std.testing.expectEqual(Tag.string_literal, tok.tag); + try std.testing.expectEqualStrings("\"Hello\"", tok.slice("\"Hello\"")); +} + +test "lex hex literal" { + var lex = Lexer.init("0xFF 0X1A"); + const tok1 = lex.next(); + try std.testing.expectEqual(Tag.int_literal, tok1.tag); + try std.testing.expectEqualStrings("0xFF", tok1.slice("0xFF 0X1A")); + const tok2 = lex.next(); + try std.testing.expectEqual(Tag.int_literal, tok2.tag); + try std.testing.expectEqualStrings("0X1A", tok2.slice("0xFF 0X1A")); +} + +test "lex binary literal" { + var lex = Lexer.init("0b1010 0B110"); + const tok1 = lex.next(); + try std.testing.expectEqual(Tag.int_literal, tok1.tag); + try std.testing.expectEqualStrings("0b1010", tok1.slice("0b1010 0B110")); + const tok2 = lex.next(); + try std.testing.expectEqual(Tag.int_literal, tok2.tag); + try std.testing.expectEqualStrings("0B110", tok2.slice("0b1010 0B110")); +} diff --git a/src/llvm_api.zig b/src/llvm_api.zig new file mode 100644 index 0000000..6a2c701 --- /dev/null +++ b/src/llvm_api.zig @@ -0,0 +1,54 @@ +pub const c = @cImport({ + @cInclude("llvm-c/Core.h"); + @cInclude("llvm-c/Analysis.h"); + @cInclude("llvm-c/BitWriter.h"); + @cInclude("llvm-c/Target.h"); + @cInclude("llvm-c/TargetMachine.h"); + @cInclude("llvm-c/LLJIT.h"); + @cInclude("llvm-c/Orc.h"); + @cInclude("llvm-c/Error.h"); +}); + +extern fn sx_llvm_init_all_targets() void; +extern fn sx_llvm_init_native_target() void; + +pub fn initAllTargets() void { + sx_llvm_init_all_targets(); +} + +pub fn initNativeTarget() void { + sx_llvm_init_native_target(); +} + +// Type aliases for ergonomics +pub const Context = c.LLVMContextRef; +pub const Module = c.LLVMModuleRef; +pub const Builder = c.LLVMBuilderRef; +pub const Value = c.LLVMValueRef; +pub const Type = c.LLVMTypeRef; +pub const BasicBlock = c.LLVMBasicBlockRef; +pub const TargetMachine = c.LLVMTargetMachineRef; + +pub fn createContext() Context { + return c.LLVMContextCreate(); +} + +pub fn disposeContext(ctx: Context) void { + c.LLVMContextDispose(ctx); +} + +pub fn moduleCreateWithName(name: [*:0]const u8) Module { + return c.LLVMModuleCreateWithNameInContext(name, c.LLVMGetGlobalContext()); +} + +pub fn disposeModule(module: Module) void { + c.LLVMDisposeModule(module); +} + +pub fn createBuilderInContext(ctx: Context) Builder { + return c.LLVMCreateBuilderInContext(ctx); +} + +pub fn disposeBuilder(builder: Builder) void { + c.LLVMDisposeBuilder(builder); +} diff --git a/src/lsp/document.zig b/src/lsp/document.zig new file mode 100644 index 0000000..c250043 --- /dev/null +++ b/src/lsp/document.zig @@ -0,0 +1,48 @@ +const std = @import("std"); + +pub const DocumentStore = struct { + documents: std.StringHashMap(Document), + allocator: std.mem.Allocator, + + pub const Document = struct { + uri: []const u8, + text: []const u8, + version: i64, + }; + + pub fn init(allocator: std.mem.Allocator) DocumentStore { + return .{ + .documents = std.StringHashMap(Document).init(allocator), + .allocator = allocator, + }; + } + + pub fn open(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void { + const uri_copy = try self.allocator.dupe(u8, uri); + const text_copy = try self.allocator.dupe(u8, text); + try self.documents.put(uri_copy, .{ + .uri = uri_copy, + .text = text_copy, + .version = version, + }); + } + + pub fn update(self: *DocumentStore, uri: []const u8, text: []const u8, version: i64) !void { + if (self.documents.getPtr(uri)) |doc| { + self.allocator.free(doc.text); + doc.text = try self.allocator.dupe(u8, text); + doc.version = version; + } + } + + pub fn close(self: *DocumentStore, uri: []const u8) void { + if (self.documents.fetchRemove(uri)) |kv| { + self.allocator.free(kv.value.text); + self.allocator.free(kv.key); + } + } + + pub fn get(self: *const DocumentStore, uri: []const u8) ?*const Document { + return self.documents.getPtr(uri); + } +}; diff --git a/src/lsp/server.zig b/src/lsp/server.zig new file mode 100644 index 0000000..5ac347a --- /dev/null +++ b/src/lsp/server.zig @@ -0,0 +1,1776 @@ +const std = @import("std"); +const sx = struct { + pub const ast = @import("../ast.zig"); + pub const parser = @import("../parser.zig"); + pub const lexer = @import("../lexer.zig"); + pub const token = @import("../token.zig"); + pub const types = @import("../types.zig"); + pub const sema = @import("../sema.zig"); + pub const errors = @import("../errors.zig"); + pub const imports = @import("../imports.zig"); + pub const core = @import("../core.zig"); +}; +const lsp = @import("types.zig"); +const DocumentStore = @import("document.zig").DocumentStore; +const Transport = @import("transport.zig").Transport; +const Analyzer = sx.sema.Analyzer; +const SemaResult = sx.sema.SemaResult; + +pub const Server = struct { + allocator: std.mem.Allocator, + documents: DocumentStore, + transport: *Transport, + io: std.Io, + shutdown_requested: bool = false, + /// Cached sema results per document URI. + sema_cache: std.StringHashMap(DocumentAnalysis), + + const DocumentAnalysis = struct { + source: [:0]const u8, + root: *sx.ast.Node, + sema: SemaResult, + /// Namespace name → resolved file path (for namespace member lookups). + import_map: std.StringHashMap([]const u8), + /// Resolved import file path → source content (populated by resolveImports). + import_sources: std.StringHashMap([:0]const u8), + }; + + pub fn init(allocator: std.mem.Allocator, transport: *Transport, io: std.Io) Server { + return .{ + .allocator = allocator, + .documents = DocumentStore.init(allocator), + .transport = transport, + .io = io, + .sema_cache = std.StringHashMap(DocumentAnalysis).init(allocator), + }; + } + + pub fn handleMessage(self: *Server, raw: []const u8) bool { + const parsed = std.json.parseFromSlice(std.json.Value, self.allocator, raw, .{}) catch { + return true; + }; + const root = parsed.value; + + const method = jsonStr(jsonGet(root, "method") orelse return true) orelse return true; + + const id = jsonGet(root, "id"); + const params = jsonGet(root, "params"); + + if (std.mem.eql(u8, method, "initialize")) { + self.handleInitialize(id) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "initialized")) { + // Nothing to do + } else if (std.mem.eql(u8, method, "shutdown")) { + self.shutdown_requested = true; + if (id) |req_id| { + const id_json = lsp.valueToJson(self.allocator, req_id) catch return true; + const resp = lsp.jsonRpcResponse(self.allocator, id_json, "null") catch return true; + self.transport.writeMessage(resp) catch {}; + } + } else if (std.mem.eql(u8, method, "exit")) { + return false; + } else if (std.mem.eql(u8, method, "textDocument/didOpen")) { + if (params) |p| self.handleDidOpen(p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/didChange")) { + if (params) |p| self.handleDidChange(p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/didClose")) { + if (params) |p| self.handleDidClose(p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/definition")) { + if (params) |p| self.handleDefinition(id, p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/hover")) { + if (params) |p| self.handleHover(id, p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/documentSymbol")) { + if (params) |p| self.handleDocumentSymbol(id, p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/completion")) { + if (params) |p| self.handleCompletion(id, p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/signatureHelp")) { + if (params) |p| self.handleSignatureHelp(id, p) catch |e| self.logError(method, e); + } else if (std.mem.eql(u8, method, "textDocument/semanticTokens/full")) { + if (params) |p| self.handleSemanticTokens(id, p) catch |e| self.logError(method, e); + } + + return true; + } + + fn logError(self: *Server, method: []const u8, err: anyerror) void { + const stderr = std.Io.File.stderr(); + var buf: [256]u8 = undefined; + const msg = std.fmt.bufPrint(&buf, "lsp: {s} failed: {s}\n", .{ method, @errorName(err) }) catch return; + stderr.writeStreamingAll(self.io, msg) catch {}; + } + + fn handleInitialize(self: *Server, id: ?std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + const result_json = try lsp.initializeResultJson(self.allocator); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, result_json); + try self.transport.writeMessage(resp); + } + + fn handleDidOpen(self: *Server, params: std.json.Value) !void { + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const text = jsonStr(jsonGet(td, "text") orelse return) orelse return; + const version = jsonInt(jsonGet(td, "version") orelse return) orelse return; + + try self.documents.open(uri, text, version); + try self.analyzeAndPublish(uri, text); + } + + fn handleDidChange(self: *Server, params: std.json.Value) !void { + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const version = jsonInt(jsonGet(td, "version") orelse return) orelse return; + + const changes_arr = jsonArr(jsonGet(params, "contentChanges") orelse return) orelse return; + if (changes_arr.len == 0) return; + + const last = changes_arr[changes_arr.len - 1]; + const text = jsonStr(jsonGet(last, "text") orelse return) orelse return; + + try self.documents.update(uri, text, version); + try self.analyzeAndPublish(uri, text); + } + + fn handleDidClose(self: *Server, params: std.json.Value) !void { + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + + self.documents.close(uri); + _ = self.sema_cache.remove(uri); + try self.sendDiagnostics(uri, &.{}); + } + + fn handleDefinition(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const position = jsonGet(params, "position") orelse return; + const line = std.math.cast(u32, jsonInt(jsonGet(position, "line") orelse return) orelse return) orelse return; + const character = std.math.cast(u32, jsonInt(jsonGet(position, "character") orelse return) orelse return) orelse return; + + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + const offset = positionToOffset(analysis.source, line, character) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + // Check if cursor is on a reference → jump to definition + if (sx.sema.findReferenceAtOffset(analysis.sema.references, offset)) |ref_idx| { + const ref = analysis.sema.references[ref_idx]; + if (ref.symbol_index < analysis.sema.symbols.len) { + const sym = analysis.sema.symbols[ref.symbol_index]; + if (try self.sendSymbolLocation(id_json, uri, analysis, sym)) return; + } + } + + // Check if cursor is on a symbol definition name + if (findSymbolNameAtOffset(analysis.sema.symbols, analysis.source, offset)) |sym_idx| { + const sym = analysis.sema.symbols[sym_idx]; + if (try self.sendSymbolLocation(id_json, uri, analysis, sym)) return; + } + + // Check if cursor is on a qualified name (e.g. "std.print") + if (extractQualifiedName(analysis.source, offset)) |qn| { + if (try self.resolveImportedLocation(id_json, analysis, qn.ns, qn.member)) |_| return; + } + + // Check if cursor is on an #import "path" string → open the file + if (findImportPathAtOffset(analysis.source, offset)) |import_path| { + const file_path = uriToFilePath(uri) orelse ""; + const base_dir = sx.imports.dirName(file_path); + const resolved = if (std.mem.eql(u8, base_dir, ".")) + import_path + else + try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ base_dir, import_path }); + const target_uri = try std.fmt.allocPrint(self.allocator, "file://{s}", .{resolved}); + const range = lsp.Range{ + .start = .{ .line = 0, .character = 0 }, + .end = .{ .line = 0, .character = 0 }, + }; + const loc_json = try lsp.locationJson(self.allocator, target_uri, range); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, loc_json); + try self.transport.writeMessage(resp); + return; + } + + // Fallback: identifier inside string interpolation (not in sema) + if (extractIdentAtOffset(analysis.source, offset)) |name| { + const name_start = @as(u32, @intCast(@intFromPtr(name.ptr) - @intFromPtr(analysis.source.ptr))); + const is_qualified = name_start > 0 and analysis.source[name_start - 1] == '.'; + if (!is_qualified) { + if (findSymbolByName(analysis.sema.symbols, name)) |si| { + const sym = analysis.sema.symbols[si]; + if (try self.sendSymbolLocation(id_json, uri, analysis, sym)) return; + } + } + } + + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + } + + fn handleHover(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const position = jsonGet(params, "position") orelse return; + const line = std.math.cast(u32, jsonInt(jsonGet(position, "line") orelse return) orelse return) orelse return; + const character = std.math.cast(u32, jsonInt(jsonGet(position, "character") orelse return) orelse return) orelse return; + + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + const offset = positionToOffset(analysis.source, line, character) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + // Check if cursor is on a qualified name (e.g. std.print) — source-based, no AST + if (extractQualifiedName(analysis.source, offset)) |qn| { + // Namespace member hover + if (analysis.import_map.get(qn.ns)) |import_path| { + if (try self.formatNamespaceMemberHover(analysis, qn.ns, qn.member, import_path)) |hover_text| { + const hover_json = try lsp.hoverJson(self.allocator, hover_text); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, hover_json); + try self.transport.writeMessage(resp); + return; + } + } + // Struct field hover (e.g. point.x) + if (try self.formatStructFieldHover(analysis, qn.ns, qn.member)) |hover_text| { + const hover_json = try lsp.hoverJson(self.allocator, hover_text); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, hover_json); + try self.transport.writeMessage(resp); + return; + } + } + + // Check if cursor is on an enum variant or other AST node + if (sx.sema.findNodeAtOffset(analysis.root, offset)) |node| { + // Enum variant hover (e.g. .red) + if (node.data == .enum_literal) { + if (try self.formatEnumVariantHover(analysis, node.data.enum_literal.name)) |hover_text| { + const hover_json = try lsp.hoverJson(self.allocator, hover_text); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, hover_json); + try self.transport.writeMessage(resp); + return; + } + } + } + + // Find symbol via reference or definition name + var sym_idx: ?usize = null; + if (sx.sema.findReferenceAtOffset(analysis.sema.references, offset)) |ref_idx| { + const si = analysis.sema.references[ref_idx].symbol_index; + if (si < analysis.sema.symbols.len) sym_idx = si; + } else { + sym_idx = findSymbolNameAtOffset(analysis.sema.symbols, analysis.source, offset); + } + + // Fallback: identifier inside string interpolation (not in sema) + if (sym_idx == null) { + if (extractIdentAtOffset(analysis.source, offset)) |name| { + // If preceded by '.', this is a qualified access — don't match bare name + const name_start = @as(u32, @intCast(@intFromPtr(name.ptr) - @intFromPtr(analysis.source.ptr))); + const is_qualified = name_start > 0 and analysis.source[name_start - 1] == '.'; + if (!is_qualified) { + sym_idx = findSymbolByName(analysis.sema.symbols, name); + } + } + } + + if (sym_idx) |si| { + const sym = analysis.sema.symbols[si]; + const resolved = resolveSource(analysis, sym); + const source_for_hover = if (resolved) |r| r.source else analysis.source; + const hover_text = try formatSymbolHover(self.allocator, sym, analysis.root, source_for_hover); + const hover_json = try lsp.hoverJson(self.allocator, hover_text); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, hover_json); + try self.transport.writeMessage(resp); + return; + } + + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + } + + fn handleDocumentSymbol(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "[]"); + try self.transport.writeMessage(resp); + return; + }; + + var doc_symbols = std.ArrayList(lsp.DocumentSymbol).empty; + for (analysis.sema.symbols) |sym| { + // Only top-level symbols (scope_depth == 0) + if (sym.scope_depth != 0) continue; + // Skip symbols from imported files (their spans don't map to this source) + if (!isSymbolInMainSource(analysis.source, sym)) continue; + + const kind: u32 = switch (sym.kind) { + .function => @intFromEnum(lsp.SymbolKindLsp.Function), + .variable => @intFromEnum(lsp.SymbolKindLsp.Variable), + .constant => @intFromEnum(lsp.SymbolKindLsp.Constant), + .enum_type => @intFromEnum(lsp.SymbolKindLsp.Enum), + .struct_type => @intFromEnum(lsp.SymbolKindLsp.Struct), + .type_alias => @intFromEnum(lsp.SymbolKindLsp.Class), + .param => @intFromEnum(lsp.SymbolKindLsp.Variable), + .namespace => @intFromEnum(lsp.SymbolKindLsp.Namespace), + }; + + const range = spanToRange(analysis.source, sym.def_span); + try doc_symbols.append(self.allocator, .{ + .name = sym.name, + .kind = kind, + .range = range, + .selection_range = range, + }); + } + + const symbols_json = try lsp.documentSymbolsJson(self.allocator, doc_symbols.items); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, symbols_json); + try self.transport.writeMessage(resp); + } + + fn handleCompletion(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const position = jsonGet(params, "position") orelse return; + const line = std.math.cast(u32, jsonInt(jsonGet(position, "line") orelse return) orelse return) orelse return; + const character = std.math.cast(u32, jsonInt(jsonGet(position, "character") orelse return) orelse return) orelse return; + + // Check if cursor is right after a dot — if so, do dot-completion + if (self.documents.get(uri)) |doc| { + if (positionToOffset(doc.text, line, character)) |off| { + if (off > 0 and doc.text[off - 1] == '.') { + try self.handleDotCompletion(id_json, uri, doc.text, off); + return; + } + } + } + + // Regular completion: all in-scope symbols + keywords + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "[]"); + try self.transport.writeMessage(resp); + return; + }; + + var items = std.ArrayList(lsp.CompletionItem).empty; + + for (analysis.sema.symbols) |sym| { + const kind: u32 = switch (sym.kind) { + .function => @intFromEnum(lsp.CompletionItemKind.Function), + .variable => @intFromEnum(lsp.CompletionItemKind.Variable), + .constant => @intFromEnum(lsp.CompletionItemKind.Constant), + .enum_type => @intFromEnum(lsp.CompletionItemKind.Enum), + .struct_type => @intFromEnum(lsp.CompletionItemKind.Struct), + .type_alias => @intFromEnum(lsp.CompletionItemKind.Class), + .param => @intFromEnum(lsp.CompletionItemKind.Variable), + .namespace => @intFromEnum(lsp.CompletionItemKind.Module), + }; + + const detail = if (sym.ty) |ty| try ty.displayName(self.allocator) else null; + + try items.append(self.allocator, .{ + .label = sym.name, + .kind = kind, + .detail = detail, + }); + } + + const keywords = [_][]const u8{ + "if", "else", "then", "return", "defer", + "case", "break", "enum", "struct", "true", + "false", "xx", "while", "continue", + "and", "or", "union", + }; + + const builtins = [_]struct { label: []const u8, detail: []const u8 }{ + .{ .label = "type_of", .detail = "(val: $T) -> Type" }, + .{ .label = "type_name", .detail = "($T: Type) -> string" }, + .{ .label = "field_count", .detail = "($T: Type) -> s32" }, + .{ .label = "field_name", .detail = "($T: Type, idx: s32) -> string" }, + .{ .label = "field_value", .detail = "(s: $T, idx: s32) -> Any" }, + .{ .label = "size_of", .detail = "($T: Type) -> s32" }, + .{ .label = "cast", .detail = "(Type) expr — prefix type cast" }, + .{ .label = "alloc", .detail = "(size: s32) -> string" }, + .{ .label = "sqrt", .detail = "(x: $T) -> T" }, + }; + for (&keywords) |kw| { + try items.append(self.allocator, .{ + .label = kw, + .kind = @intFromEnum(lsp.CompletionItemKind.Keyword), + }); + } + for (&builtins) |b| { + try items.append(self.allocator, .{ + .label = b.label, + .kind = @intFromEnum(lsp.CompletionItemKind.Function), + .detail = b.detail, + }); + } + + const items_json = try lsp.completionItemsJson(self.allocator, items.items); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, items_json); + try self.transport.writeMessage(resp); + } + + fn handleDotCompletion(self: *Server, id_json: []const u8, uri: []const u8, text: []const u8, cursor_offset: u32) !void { + var items = std.ArrayList(lsp.CompletionItem).empty; + + if (extractDotPrefix(text, cursor_offset)) |prefix| { + if (self.sema_cache.get(uri)) |analysis| { + // Check if prefix is a namespace — offer its inner declarations + if (!try self.collectNamespaceCompletions(&items, analysis, prefix)) { + // Otherwise look up prefix as a struct/enum type name in sema symbols + try self.collectMemberCompletions(&items, analysis, prefix); + } + } + } + + const items_json = try lsp.completionItemsJson(self.allocator, items.items); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, items_json); + try self.transport.writeMessage(resp); + } + + fn collectMemberCompletions(self: *Server, items: *std.ArrayList(lsp.CompletionItem), analysis: DocumentAnalysis, name: []const u8) !void { + for (analysis.sema.symbols) |sym| { + if (!std.mem.eql(u8, sym.name, name)) continue; + + if (sym.kind == .struct_type) { + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .struct_decl) { + const sd = node.data.struct_decl; + for (sd.field_names, 0..) |field_name, fi| { + const detail: ?[]const u8 = if (fi < sd.field_types.len) blk: { + const ft = sd.field_types[fi]; + break :blk if (ft.data == .type_expr) ft.data.type_expr.name else null; + } else null; + + try items.append(self.allocator, .{ + .label = field_name, + .kind = @intFromEnum(lsp.CompletionItemKind.Field), + .detail = detail, + }); + } + } + } + } else if (sym.kind == .enum_type) { + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .enum_decl) { + const ed = node.data.enum_decl; + for (ed.variants) |variant| { + try items.append(self.allocator, .{ + .label = variant, + .kind = @intFromEnum(lsp.CompletionItemKind.EnumMember), + }); + } + } + } + } + break; // Found the symbol + } + } + + fn collectNamespaceCompletions(self: *Server, items: *std.ArrayList(lsp.CompletionItem), analysis: DocumentAnalysis, name: []const u8) !bool { + // Find a namespace symbol matching the prefix + for (analysis.sema.symbols) |sym| { + if (sym.kind != .namespace or !std.mem.eql(u8, sym.name, name)) continue; + + // Find the namespace_decl AST node + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .namespace_decl) { + for (node.data.namespace_decl.decls) |decl| { + switch (decl.data) { + .fn_decl => |fd| { + var detail_buf = std.ArrayList(u8).empty; + try detail_buf.append(self.allocator, '('); + for (fd.params, 0..) |param, pi| { + if (pi > 0) try detail_buf.appendSlice(self.allocator, ", "); + try detail_buf.appendSlice(self.allocator, param.name); + try detail_buf.appendSlice(self.allocator, ": "); + if (param.type_expr.data == .type_expr) { + try detail_buf.appendSlice(self.allocator, param.type_expr.data.type_expr.name); + } else { + try detail_buf.appendSlice(self.allocator, "?"); + } + } + try detail_buf.append(self.allocator, ')'); + if (fd.return_type) |rt| { + try detail_buf.appendSlice(self.allocator, " -> "); + if (rt.data == .type_expr) { + try detail_buf.appendSlice(self.allocator, rt.data.type_expr.name); + } + } + try items.append(self.allocator, .{ + .label = fd.name, + .kind = @intFromEnum(lsp.CompletionItemKind.Function), + .detail = detail_buf.items, + }); + }, + .const_decl => |cd| { + const kind: u32 = if (cd.value.data == .lambda) + @intFromEnum(lsp.CompletionItemKind.Function) + else + @intFromEnum(lsp.CompletionItemKind.Constant); + try items.append(self.allocator, .{ + .label = cd.name, + .kind = kind, + }); + }, + .enum_decl => |ed| { + try items.append(self.allocator, .{ + .label = ed.name, + .kind = @intFromEnum(lsp.CompletionItemKind.Enum), + }); + }, + .struct_decl => |sd| { + try items.append(self.allocator, .{ + .label = sd.name, + .kind = @intFromEnum(lsp.CompletionItemKind.Struct), + }); + }, + .union_decl => |ud| { + try items.append(self.allocator, .{ + .label = ud.name, + .kind = @intFromEnum(lsp.CompletionItemKind.Enum), + }); + }, + else => {}, + } + } + return true; + } + } + break; + } + return false; + } + + fn handleSignatureHelp(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + const position = jsonGet(params, "position") orelse return; + const line = std.math.cast(u32, jsonInt(jsonGet(position, "line") orelse return) orelse return) orelse return; + const character = std.math.cast(u32, jsonInt(jsonGet(position, "character") orelse return) orelse return) orelse return; + + const doc = self.documents.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + const offset = positionToOffset(doc.text, line, character) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + const ctx = findCallContext(doc.text, offset) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + // Built-in function signatures + const builtin_sigs = [_]struct { name: []const u8, label: []const u8, params: []const []const u8 }{ + .{ .name = "type_of", .label = "type_of(val: $T) -> Type", .params = &.{"val: $T"} }, + .{ .name = "type_name", .label = "type_name($T: Type) -> string", .params = &.{"$T: Type"} }, + .{ .name = "field_count", .label = "field_count($T: Type) -> s32", .params = &.{"$T: Type"} }, + .{ .name = "field_name", .label = "field_name($T: Type, idx: s32) -> string", .params = &.{ "$T: Type", "idx: s32" } }, + .{ .name = "field_value", .label = "field_value(s: $T, idx: s32) -> Any", .params = &.{ "s: $T", "idx: s32" } }, + .{ .name = "size_of", .label = "size_of($T: Type) -> s32", .params = &.{"$T: Type"} }, + .{ .name = "cast", .label = "cast(Type) expr", .params = &.{"Type"} }, + .{ .name = "alloc", .label = "alloc(size: s32) -> string", .params = &.{"size: s32"} }, + .{ .name = "sqrt", .label = "sqrt(x: $T) -> T", .params = &.{"x: $T"} }, + .{ .name = "print", .label = "print(fmt: string, args: ..Any)", .params = &.{ "fmt: string", "args: ..Any" } }, + .{ .name = "write", .label = "write(str: string) -> void", .params = &.{"str: string"} }, + }; + for (&builtin_sigs) |b| { + const matches = std.mem.eql(u8, ctx.name, b.name) or + (std.mem.startsWith(u8, ctx.name, "std.") and std.mem.eql(u8, ctx.name[4..], b.name)); + if (matches) { + const sig_json = try lsp.signatureHelpJson(self.allocator, b.label, b.params, ctx.active_param); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, sig_json); + try self.transport.writeMessage(resp); + return; + } + } + + // Look up function in sema cache + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + return; + }; + + // Try to find the function — either at top level or inside a namespace + const fn_node = findFnDeclByName(analysis, ctx.name); + + if (fn_node) |fd| { + var label_buf = std.ArrayList(u8).empty; + try label_buf.appendSlice(self.allocator, fd.name); + try label_buf.append(self.allocator, '('); + + var param_labels = std.ArrayList([]const u8).empty; + for (fd.params, 0..) |param, pi| { + if (pi > 0) try label_buf.appendSlice(self.allocator, ", "); + const param_start = label_buf.items.len; + try label_buf.appendSlice(self.allocator, param.name); + try label_buf.appendSlice(self.allocator, ": "); + if (param.type_expr.data == .type_expr) { + try label_buf.appendSlice(self.allocator, param.type_expr.data.type_expr.name); + } else { + try label_buf.appendSlice(self.allocator, "?"); + } + const param_label = try self.allocator.dupe(u8, label_buf.items[param_start..]); + try param_labels.append(self.allocator, param_label); + } + try label_buf.append(self.allocator, ')'); + + if (fd.return_type) |rt| { + try label_buf.appendSlice(self.allocator, " -> "); + if (rt.data == .type_expr) { + try label_buf.appendSlice(self.allocator, rt.data.type_expr.name); + } + } + + const sig_json = try lsp.signatureHelpJson(self.allocator, label_buf.items, param_labels.items, ctx.active_param); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, sig_json); + try self.transport.writeMessage(resp); + return; + } + + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "null"); + try self.transport.writeMessage(resp); + } + + fn handleSemanticTokens(self: *Server, id: ?std.json.Value, params: std.json.Value) !void { + const req_id = id orelse return; + const id_json = try lsp.valueToJson(self.allocator, req_id); + + const td = jsonGet(params, "textDocument") orelse return; + const uri = jsonStr(jsonGet(td, "uri") orelse return) orelse return; + + const analysis = self.sema_cache.get(uri) orelse { + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, "{\"data\":[]}"); + try self.transport.writeMessage(resp); + return; + }; + + var data = std.ArrayList(u32).empty; + var prev_line: u32 = 0; + var prev_char: u32 = 0; + + // Re-lex the cached source to get all tokens + var lexer = sx.lexer.Lexer.init(analysis.source); + while (true) { + const tok = lexer.next(); + if (tok.tag == .eof) break; + + if (tok.tag == .string_literal) { + try emitStringParts(&data, self.allocator, analysis.source, tok.loc.start, tok.loc.end, &prev_line, &prev_char); + continue; + } + + const token_type = classifyToken(tok, analysis) orelse continue; + try emitToken(&data, self.allocator, analysis.source, tok.loc.start, tok.loc.end, token_type, &prev_line, &prev_char); + } + + const result_json = try lsp.semanticTokensJson(self.allocator, data.items); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, result_json); + try self.transport.writeMessage(resp); + } + + fn classifyToken(tok: sx.token.Token, analysis: DocumentAnalysis) ?u32 { + const ST = lsp.SemanticTokenType; + return switch (tok.tag) { + // Keywords + .kw_if, + .kw_else, + .kw_then, + .kw_true, + .kw_false, + .kw_enum, + .kw_case, + .kw_break, + .kw_continue, + .kw_while, + .kw_for, + .kw_return, + .kw_defer, + .kw_struct, + .kw_union, + .kw_xx, + .kw_and, + .kw_or, + .hash_run, + .hash_import, + .hash_insert, + .hash_builtin, + => ST.keyword, + + // Type keywords + .kw_f32, .kw_f64, .kw_Type => ST.type_, + + // Literals + .int_literal, .float_literal => ST.number, + .string_literal => null, // let TextMate grammar handle strings (interpolation) + + // Operators + .plus, + .minus, + .star, + .slash, + .equal, + .equal_equal, + .bang, + .bang_equal, + .less, + .less_equal, + .greater, + .greater_equal, + .plus_equal, + .minus_equal, + .star_equal, + .slash_equal, + .percent, + .percent_equal, + .arrow, + .fat_arrow, + .colon_colon, + .colon_equal, + .triple_minus, + => ST.operator_, + + // Identifiers — need sema lookup + .identifier => classifyIdentifier(tok, analysis), + + // Punctuation — no semantic coloring + .colon, + .semicolon, + .comma, + .dot, + .dot_dot, + .dollar, + .l_paren, + .r_paren, + .l_brace, + .r_brace, + .l_bracket, + .r_bracket, + .eof, + .invalid, + => null, + }; + } + + fn classifyIdentifier(tok: sx.token.Token, analysis: DocumentAnalysis) ?u32 { + const ST = lsp.SemanticTokenType; + const offset = tok.loc.start; + if (tok.loc.start >= analysis.source.len or tok.loc.end > analysis.source.len or tok.loc.start >= tok.loc.end) return null; + const name = analysis.source[tok.loc.start..tok.loc.end]; + + // Check if it's a reference to a known symbol + if (sx.sema.findReferenceAtOffset(analysis.sema.references, offset)) |ref_idx| { + const si = analysis.sema.references[ref_idx].symbol_index; + if (si >= analysis.sema.symbols.len) return null; + const sym = analysis.sema.symbols[si]; + return symbolKindToTokenType(sym.kind); + } + + // Check if it's a symbol definition by matching exact start position and name + for (analysis.sema.symbols) |sym| { + if (sym.def_span.start == offset and std.mem.eql(u8, sym.name, name)) { + return symbolKindToTokenType(sym.kind); + } + } + + // Check if it's a built-in type name (s32, u8, bool, string, etc.) + if (sx.types.Type.fromName(name) != null) { + return ST.type_; + } + + return null; + } + + fn symbolKindToTokenType(kind: sx.sema.SymbolKind) u32 { + const ST = lsp.SemanticTokenType; + return switch (kind) { + .function => ST.function, + .variable => ST.variable, + .constant => ST.variable, + .param => ST.parameter, + .enum_type => ST.enum_, + .struct_type => ST.struct_, + .type_alias => ST.type_, + .namespace => ST.namespace, + }; + } + + fn emitToken( + data: *std.ArrayList(u32), + allocator: std.mem.Allocator, + source: [:0]const u8, + start: u32, + end: u32, + token_type: u32, + prev_line: *u32, + prev_char: *u32, + ) !void { + if (start >= source.len or end > source.len or start >= end) return; + const loc = sx.errors.SourceLoc.compute(source, start); + if (loc.line == 0 or loc.col == 0) return; + const line = loc.line - 1; + const col = loc.col - 1; + const length = end - start; + + if (line < prev_line.*) return; // out-of-order token, skip + const delta_line = line - prev_line.*; + const delta_char = if (delta_line == 0) (if (col >= prev_char.*) col - prev_char.* else return) else col; + + try data.append(allocator, delta_line); + try data.append(allocator, delta_char); + try data.append(allocator, length); + try data.append(allocator, token_type); + try data.append(allocator, 0); + + prev_line.* = line; + prev_char.* = col; + } + + fn emitStringParts( + data: *std.ArrayList(u32), + allocator: std.mem.Allocator, + source: [:0]const u8, + tok_start: u32, + tok_end: u32, + prev_line: *u32, + prev_char: *u32, + ) !void { + const ST = lsp.SemanticTokenType; + var pos = tok_start; + var seg_start = tok_start; + var in_interp = false; + + while (pos < tok_end) : (pos += 1) { + if (in_interp) { + if (source[pos] == '}') { + in_interp = false; + seg_start = pos + 1; + } + } else { + if (source[pos] == '\\' and pos + 1 < tok_end) { + pos += 1; // skip escaped char + } else if (source[pos] == '{') { + if (pos > seg_start) { + try emitToken(data, allocator, source, seg_start, pos, ST.string_, prev_line, prev_char); + } + in_interp = true; + } + } + } + + // Emit remaining segment (after last interpolation, or the whole string if none) + if (!in_interp and seg_start < tok_end) { + try emitToken(data, allocator, source, seg_start, tok_end, ST.string_, prev_line, prev_char); + } + } + + fn analyzeAndPublish(self: *Server, uri: []const u8, text: []const u8) !void { + const source = try self.allocator.dupeZ(u8, text); + const file_path = uriToFilePath(uri) orelse ""; + + var comp = sx.core.Compilation.init(self.allocator, self.io, file_path, source); + defer comp.deinit(); + + comp.parse() catch { + try self.sendDiagnostics(uri, diagListToLsp(self.allocator, source, &comp.diagnostics)); + return; + }; + comp.resolveImports() catch {}; + comp.analyze() catch {}; + + // Only run codegen when earlier stages produced no errors. + // Codegen has unreachable/force-unwrap paths that panic on broken ASTs. + const has_errors = for (comp.diagnostics.items.items) |d| { + if (d.level == .err) break true; + } else false; + if (!has_errors) { + comp.generateCode() catch {}; + } + + // Build import_map (namespace name → resolved file path) for go-to-definition + var import_map = std.StringHashMap([]const u8).init(self.allocator); + const resolved_root = comp.resolved_root orelse comp.root orelse { + try self.sendDiagnostics(uri, diagListToLsp(self.allocator, source, &comp.diagnostics)); + return; + }; + if (resolved_root.data == .root) { + for (resolved_root.data.root.decls) |decl| { + if (decl.data == .namespace_decl) { + const ns = decl.data.namespace_decl; + var it = comp.import_sources.keyIterator(); + while (it.next()) |path| { + if (ns.decls.len > 0) { + const first_decl_name = ns.decls[0].data.declName() orelse continue; + const imp_src = comp.import_sources.get(path.*) orelse continue; + const start = ns.decls[0].span.start; + const end = start + @as(u32, @intCast(first_decl_name.len)); + if (end <= imp_src.len and std.mem.eql(u8, imp_src[start..end], first_decl_name)) { + import_map.put(ns.name, path.*) catch {}; + break; + } + } + } + } + } + } + + // Cache the analysis result + const uri_owned = try self.allocator.dupe(u8, uri); + try self.sema_cache.put(uri_owned, .{ + .source = source, + .root = resolved_root, + .sema = comp.sema_result orelse SemaResult{ + .symbols = &.{}, + .references = &.{}, + .diagnostics = &.{}, + .fn_signatures = std.StringHashMap(sx.sema.FnSignature).init(self.allocator), + .struct_types = std.StringHashMap(sx.sema.StructTypeInfo).init(self.allocator), + .enum_types = std.StringHashMap([]const []const u8).init(self.allocator), + .type_aliases = std.StringHashMap([]const u8).init(self.allocator), + .type_map = sx.sema.TypeMap.init(self.allocator), + }, + .import_map = import_map, + .import_sources = comp.import_sources, + }); + + // Publish all diagnostics (parse + import + sema + codegen) + try self.sendDiagnostics(uri, diagListToLsp(self.allocator, source, &comp.diagnostics)); + } + + fn sendDiagnostics(self: *Server, uri: []const u8, diagnostics: []const lsp.Diagnostic) !void { + const params_json = try lsp.publishDiagnosticsJson(self.allocator, uri, diagnostics); + const body = try lsp.jsonRpcNotification(self.allocator, "textDocument/publishDiagnostics", params_json); + try self.transport.writeMessage(body); + } + + fn diagListToLsp(allocator: std.mem.Allocator, source: [:0]const u8, diag_list: *const sx.errors.DiagnosticList) []const lsp.Diagnostic { + var result = std.ArrayList(lsp.Diagnostic).empty; + for (diag_list.items.items) |d| { + const range = if (d.span) |span| spanToRange(source, span) else lsp.Range{ + .start = .{ .line = 0, .character = 0 }, + .end = .{ .line = 0, .character = 1 }, + }; + const severity: u32 = switch (d.level) { + .err => 1, + .warn => 2, + .note => 3, + }; + result.append(allocator, .{ + .range = range, + .severity = severity, + .message = d.message, + }) catch continue; + } + return result.items; + } + + // ---- Safe JSON accessors (avoid panicking on wrong union tag) ---- + + fn jsonGet(val: std.json.Value, key: []const u8) ?std.json.Value { + return switch (val) { + .object => |obj| obj.get(key), + else => null, + }; + } + + fn jsonStr(val: std.json.Value) ?[]const u8 { + return switch (val) { + .string => |s| s, + else => null, + }; + } + + fn jsonInt(val: std.json.Value) ?i64 { + return switch (val) { + .integer => |i| i, + else => null, + }; + } + + fn jsonArr(val: std.json.Value) ?[]std.json.Value { + return switch (val) { + .array => |a| a.items, + else => null, + }; + } + + // ---- Helpers ---- + + /// Find a symbol whose name text starts at `offset` in the source. + /// Unlike findSymbolAtOffset (span containment), this only matches + /// when the cursor is actually on the symbol's name characters. + fn findSymbolNameAtOffset(symbols: []const sx.sema.Symbol, source: [:0]const u8, offset: u32) ?usize { + for (symbols, 0..) |sym, i| { + const name_start = sym.def_span.start; + const name_end = name_start + @as(u32, @intCast(sym.name.len)); + if (offset >= name_start and offset < name_end and name_end <= source.len) { + if (std.mem.eql(u8, source[name_start..name_end], sym.name)) { + return i; + } + } + } + return null; + } + + fn positionToOffset(source: []const u8, line: u32, character: u32) ?u32 { + var cur_line: u32 = 0; + var cur_col: u32 = 0; + for (source, 0..) |ch, i| { + if (cur_line == line and cur_col == character) { + return @intCast(i); + } + if (ch == '\n') { + if (cur_line == line) return @intCast(i); // past end of line + cur_line += 1; + cur_col = 0; + } else { + cur_col += 1; + } + } + // Handle position at end of source + if (cur_line == line and cur_col == character) { + return @intCast(source.len); + } + return null; + } + + /// Extract the dotted prefix before a dot at cursor_offset. + /// E.g. for "io.stdout." with cursor at 10, returns "io.stdout". + /// For "Color." with cursor at 6, returns "Color". + fn extractDotPrefix(source: []const u8, cursor_offset: u32) ?[]const u8 { + if (cursor_offset < 2) return null; + const dot_pos = cursor_offset - 1; + if (source[dot_pos] != '.') return null; + + var start: u32 = dot_pos; + while (start > 0) { + const ch = source[start - 1]; + if (std.ascii.isAlphanumeric(ch) or ch == '_' or ch == '.') { + start -= 1; + } else { + break; + } + } + + if (start == dot_pos) return null; + const prefix = source[start..dot_pos]; + + // Strip leading dots (e.g. ".Color" → "Color") + var trimmed_start: usize = 0; + while (trimmed_start < prefix.len and prefix[trimmed_start] == '.') { + trimmed_start += 1; + } + if (trimmed_start == prefix.len) return null; + return prefix[trimmed_start..]; + } + + /// Scan backwards from cursor to find the enclosing function call context. + /// Returns the function name (including dotted path) and active parameter index. + fn findCallContext(source: []const u8, cursor_offset: u32) ?struct { name: []const u8, active_param: u32 } { + if (cursor_offset == 0) return null; + + var depth: i32 = 0; + var comma_count: u32 = 0; + var pos: u32 = cursor_offset; + + while (pos > 0) { + pos -= 1; + const ch = source[pos]; + + if (ch == ')') { + depth += 1; + } else if (ch == '(') { + if (depth == 0) { + // Found the unmatched opening paren — extract function name before it + if (pos == 0) return null; + const name_end: u32 = pos; + var name_start: u32 = pos; + while (name_start > 0) { + const nc = source[name_start - 1]; + if (std.ascii.isAlphanumeric(nc) or nc == '_' or nc == '.') { + name_start -= 1; + } else { + break; + } + } + if (name_start == name_end) return null; + // Trim leading dots + var trimmed = name_start; + while (trimmed < name_end and source[trimmed] == '.') { + trimmed += 1; + } + if (trimmed == name_end) return null; + return .{ + .name = source[trimmed..name_end], + .active_param = comma_count, + }; + } + depth -= 1; + } else if (ch == ',' and depth == 0) { + comma_count += 1; + } + } + + return null; + } + + /// Extract the last segment of a dotted name. "io.foo" → "foo", "bar" → "bar". + fn extractLastSegment(name: []const u8) []const u8 { + var i = name.len; + while (i > 0) { + i -= 1; + if (name[i] == '.') { + return name[i + 1 ..]; + } + } + return name; + } + + /// Extract the identifier word surrounding the given offset. + fn extractIdentAtOffset(source: []const u8, offset: u32) ?[]const u8 { + if (offset >= source.len) return null; + var start: u32 = offset; + while (start > 0 and isIdentChar(source[start - 1])) { + start -= 1; + } + var end: u32 = offset; + while (end < source.len and isIdentChar(source[end])) { + end += 1; + } + if (start == end) return null; + return source[start..end]; + } + + fn isIdentChar(c: u8) bool { + return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_'; + } + + /// Check if cursor is inside an `#import "..."` string and return the path. + fn findImportPathAtOffset(source: []const u8, offset: u32) ?[]const u8 { + if (offset >= source.len) return null; + + // Find the enclosing "..." by scanning for quotes + var qstart: u32 = offset; + while (qstart > 0 and source[qstart] != '"') : (qstart -= 1) {} + if (source[qstart] != '"') return null; + + const qend: u32 = if (offset < source.len and source[offset] == '"') + offset // cursor is on closing quote + else blk: { + var e = offset; + while (e < source.len and source[e] != '"' and source[e] != '\n') : (e += 1) {} + if (e >= source.len or source[e] != '"') return null; + break :blk e; + }; + + if (qstart == qend) return null; + + // Check that #import precedes the opening quote (with optional whitespace) + var scan = qstart; + while (scan > 0 and (source[scan - 1] == ' ' or source[scan - 1] == '\t')) : (scan -= 1) {} + const kw = "#import"; + if (scan < kw.len) return null; + if (!std.mem.eql(u8, source[scan - kw.len .. scan], kw)) return null; + + return source[qstart + 1 .. qend]; + } + + /// Extract a qualified name (namespace.member) at a given byte offset. + /// Works directly from source text — no AST traversal needed. + fn extractQualifiedName(source: []const u8, offset: u32) ?struct { ns: []const u8, member: []const u8 } { + if (offset >= source.len) return null; + + // Find the word at offset + var end: u32 = offset; + while (end < source.len and isIdentChar(source[end])) end += 1; + var start: u32 = offset; + while (start > 0 and isIdentChar(source[start - 1])) start -= 1; + + if (start == end) return null; + + // Check if preceded by a dot and another identifier (ns.member) + if (start >= 2 and source[start - 1] == '.') { + var ns_start: u32 = start - 1; + while (ns_start > 0 and isIdentChar(source[ns_start - 1])) ns_start -= 1; + if (ns_start < start - 1) { + return .{ + .ns = source[ns_start .. start - 1], + .member = source[start..end], + }; + } + } + + // Check if followed by a dot and another identifier (cursor on ns part) + if (end < source.len and source[end] == '.') { + var member_end: u32 = end + 1; + while (member_end < source.len and isIdentChar(source[member_end])) member_end += 1; + if (member_end > end + 1) { + return .{ + .ns = source[start..end], + .member = source[end + 1 .. member_end], + }; + } + } + + return null; + } + + /// Find a symbol by name (searches backwards for innermost match). + fn findSymbolByName(symbols: []const sx.sema.Symbol, name: []const u8) ?usize { + var i = symbols.len; + while (i > 0) { + i -= 1; + if (std.mem.eql(u8, symbols[i].name, name)) { + return i; + } + } + return null; + } + + /// Check if a symbol's def_span corresponds to valid text in the given source. + fn isSymbolInMainSource(source: [:0]const u8, sym: sx.sema.Symbol) bool { + const start = sym.def_span.start; + const end = start + @as(u32, @intCast(sym.name.len)); + if (end > source.len) return false; + return std.mem.eql(u8, source[start..end], sym.name); + } + + /// Resolve a symbol's location (main source or import) and send a definition response. + fn sendSymbolLocation(self: *Server, id_json: []const u8, uri: []const u8, analysis: DocumentAnalysis, sym: sx.sema.Symbol) !bool { + const resolved = resolveSource(analysis, sym) orelse return false; + const range = spanToRange(resolved.source, sym.def_span); + const target_uri = if (resolved.import_path) |ip| + try std.fmt.allocPrint(self.allocator, "file://{s}", .{ip}) + else + uri; + const loc_json = try lsp.locationJson(self.allocator, target_uri, range); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, loc_json); + try self.transport.writeMessage(resp); + return true; + } + + const ResolvedSource = struct { + source: [:0]const u8, + import_path: ?[]const u8, + }; + + /// Resolve the source buffer that contains a symbol's definition. + /// Returns the main source if the symbol is local, or the imported file's source if not. + fn resolveSource(analysis: DocumentAnalysis, sym: sx.sema.Symbol) ?ResolvedSource { + if (isSymbolInMainSource(analysis.source, sym)) { + return .{ .source = analysis.source, .import_path = null }; + } + var it = analysis.import_sources.iterator(); + while (it.next()) |entry| { + const imp_source = entry.value_ptr.*; + const start = sym.def_span.start; + const end = start + @as(u32, @intCast(sym.name.len)); + if (end > imp_source.len) continue; + if (std.mem.eql(u8, imp_source[start..end], sym.name)) { + return .{ .source = imp_source, .import_path = entry.key_ptr.* }; + } + } + return null; + } + + /// Resolve a namespace member reference to a Location in the imported file. + /// Returns true if a response was sent, false if no match found. + fn resolveImportedLocation( + self: *Server, + id_json: []const u8, + analysis: DocumentAnalysis, + ns_name: []const u8, + member_name: []const u8, + ) !?void { + const import_path = analysis.import_map.get(ns_name) orelse return null; + const imp_source = analysis.import_sources.get(import_path) orelse return null; + + const ns_decls = findNamespaceDeclsByName(analysis.root, ns_name) orelse return null; + const target = findDeclInNamespace(ns_decls, member_name) orelse return null; + + const range = spanToRange(imp_source, target.span); + const target_uri = try std.fmt.allocPrint(self.allocator, "file://{s}", .{import_path}); + const loc_json = try lsp.locationJson(self.allocator, target_uri, range); + const resp = try lsp.jsonRpcResponse(self.allocator, id_json, loc_json); + try self.transport.writeMessage(resp); + return; + } + + /// Find a fn_decl by name. Supports dotted names like "ns.func" by looking + /// inside namespace_decl nodes. Falls back to top-level sema symbol lookup. + fn findFnDeclByName(analysis: DocumentAnalysis, name: []const u8) ?sx.ast.FnDecl { + // Check if name has a dot (e.g. "std.print") + if (std.mem.indexOfScalar(u8, name, '.')) |dot_idx| { + const ns_name = name[0..dot_idx]; + const fn_name = name[dot_idx + 1 ..]; + + // Find the namespace symbol + for (analysis.sema.symbols) |sym| { + if (sym.kind != .namespace or !std.mem.eql(u8, sym.name, ns_name)) continue; + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .namespace_decl) { + for (node.data.namespace_decl.decls) |decl| { + if (decl.data == .fn_decl and std.mem.eql(u8, decl.data.fn_decl.name, fn_name)) { + return decl.data.fn_decl; + } + } + } + } + break; + } + } + + // Top-level lookup + const func_name = extractLastSegment(name); + for (analysis.sema.symbols) |sym| { + if (sym.kind != .function or !std.mem.eql(u8, sym.name, func_name)) continue; + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .fn_decl) return node.data.fn_decl; + } + break; + } + + return null; + } + + /// Convert a file:// URI to a local file path. Returns null for non-file URIs. + fn uriToFilePath(uri: []const u8) ?[]const u8 { + if (std.mem.startsWith(u8, uri, "file://")) { + return uri[7..]; + } + return null; + } + + fn spanToRange(source: [:0]const u8, span: sx.ast.Span) lsp.Range { + const clamped_start = @min(span.start, @as(u32, @intCast(source.len))); + const clamped_end = @min(span.end, @as(u32, @intCast(source.len))); + const start = sx.errors.SourceLoc.compute(source, clamped_start); + const end = sx.errors.SourceLoc.compute(source, clamped_end); + return .{ + .start = .{ .line = start.line - 1, .character = start.col - 1 }, + .end = .{ .line = end.line - 1, .character = end.col - 1 }, + }; + } + + /// Extract consecutive // comment lines immediately above a declaration. + fn extractDocComment(source: []const u8, def_start: u32) ?[]const u8 { + if (def_start == 0 or def_start > source.len) return null; + + // Find start of the declaration line + var pos: u32 = def_start; + while (pos > 0 and source[pos - 1] != '\n') : (pos -= 1) {} + if (pos == 0) return null; + + // Scan backward for consecutive comment lines + const block_end = pos; + var block_start = pos; + + while (block_start > 0) { + var scan = block_start - 1; // position of \n + while (scan > 0 and source[scan - 1] != '\n') : (scan -= 1) {} + // scan..block_start is the previous line (incl trailing \n) + const line = std.mem.trimEnd(u8, source[scan..block_start], "\r\n"); + const trimmed = std.mem.trimStart(u8, line, " \t"); + if (trimmed.len >= 2 and trimmed[0] == '/' and trimmed[1] == '/') { + block_start = scan; + } else { + break; + } + } + + if (block_start >= block_end) return null; + var end = block_end; + while (end > block_start and (source[end - 1] == '\n' or source[end - 1] == '\r')) : (end -= 1) {} + if (end <= block_start) return null; + return source[block_start..end]; + } + + /// Format hover text for an AST declaration node with its source (for doc comments). + fn formatDeclHover(allocator: std.mem.Allocator, decl: *sx.ast.Node, source: []const u8) ![]const u8 { + var buf = std.ArrayList(u8).empty; + + if (extractDocComment(source, decl.span.start)) |comment| { + try buf.appendSlice(allocator, comment); + try buf.appendSlice(allocator, "\n\n"); + } + + try buf.appendSlice(allocator, "```sx\n"); + + switch (decl.data) { + .fn_decl => |fd| { + try buf.appendSlice(allocator, fd.name); + try buf.appendSlice(allocator, " :: ("); + for (fd.params, 0..) |param, pi| { + if (pi > 0) try buf.appendSlice(allocator, ", "); + try buf.appendSlice(allocator, param.name); + try buf.appendSlice(allocator, ": "); + if (param.type_expr.data == .type_expr) { + try buf.appendSlice(allocator, param.type_expr.data.type_expr.name); + } else { + try buf.appendSlice(allocator, "?"); + } + } + try buf.append(allocator, ')'); + if (fd.return_type) |rt| { + try buf.appendSlice(allocator, " -> "); + if (rt.data == .type_expr) { + try buf.appendSlice(allocator, rt.data.type_expr.name); + } + } + }, + .enum_decl => |ed| { + try buf.appendSlice(allocator, ed.name); + try buf.appendSlice(allocator, " :: enum { "); + for (ed.variants, 0..) |v, i| { + if (i > 0) try buf.appendSlice(allocator, ", "); + try buf.append(allocator, '.'); + try buf.appendSlice(allocator, v); + } + try buf.appendSlice(allocator, " }"); + }, + .struct_decl => |sd| { + try buf.appendSlice(allocator, sd.name); + try buf.appendSlice(allocator, " :: struct { "); + for (sd.field_names, 0..) |fn_, fi| { + if (fi > 0) try buf.appendSlice(allocator, ", "); + try buf.appendSlice(allocator, fn_); + if (fi < sd.field_types.len) { + if (sd.field_types[fi].data == .type_expr) { + try buf.appendSlice(allocator, ": "); + try buf.appendSlice(allocator, sd.field_types[fi].data.type_expr.name); + } + } + } + try buf.appendSlice(allocator, " }"); + }, + .union_decl => |ud| { + try buf.appendSlice(allocator, ud.name); + try buf.appendSlice(allocator, " :: union { "); + for (ud.variant_names, 0..) |vn, i| { + if (i > 0) try buf.appendSlice(allocator, ", "); + try buf.appendSlice(allocator, vn); + } + try buf.appendSlice(allocator, " }"); + }, + .const_decl => |cd| { + try buf.appendSlice(allocator, cd.name); + try buf.appendSlice(allocator, " :: "); + if (cd.type_annotation) |ta| { + if (ta.data == .type_expr) { + try buf.appendSlice(allocator, ta.data.type_expr.name); + } + } + }, + .var_decl => |vd| { + try buf.appendSlice(allocator, vd.name); + if (vd.type_annotation) |ta| { + if (ta.data == .type_expr) { + try buf.appendSlice(allocator, " : "); + try buf.appendSlice(allocator, ta.data.type_expr.name); + } + } + }, + else => { + try buf.appendSlice(allocator, "(declaration)"); + }, + } + + try buf.appendSlice(allocator, "\n```"); + return buf.items; + } + + /// Find namespace_decl's inner decls by namespace name. + fn findNamespaceDeclsByName(root: *sx.ast.Node, name: []const u8) ?[]const *sx.ast.Node { + if (root.data != .root) return null; + for (root.data.root.decls) |decl| { + if (decl.data == .namespace_decl) { + if (std.mem.eql(u8, decl.data.namespace_decl.name, name)) { + return decl.data.namespace_decl.decls; + } + } + } + return null; + } + + /// Find a specific declaration node inside a namespace by member name. + fn findDeclInNamespace(ns_decls: []const *sx.ast.Node, member_name: []const u8) ?*sx.ast.Node { + for (ns_decls) |decl| { + if (decl.data.declName()) |n| { + if (std.mem.eql(u8, n, member_name)) return decl; + } + } + return null; + } + + /// Build hover text for a namespace member (e.g. std.print). Reads imported file for doc comments. + fn formatNamespaceMemberHover(self: *Server, analysis: DocumentAnalysis, ns_name: []const u8, member_name: []const u8, import_path: []const u8) !?[]const u8 { + const ns_decls = findNamespaceDeclsByName(analysis.root, ns_name) orelse return null; + const target = findDeclInNamespace(ns_decls, member_name) orelse return null; + + const imp_source = analysis.import_sources.get(import_path) orelse return null; + return try formatDeclHover(self.allocator, target, imp_source); + } + + /// Build hover for a struct field: StructName.field : type, with doc comment. + fn formatStructFieldHover(self: *Server, analysis: DocumentAnalysis, obj_name: []const u8, field_name: []const u8) !?[]const u8 { + const struct_name = resolveStructTypeName(analysis, obj_name) orelse return null; + + for (analysis.sema.symbols) |sym| { + if (sym.kind != .struct_type or !std.mem.eql(u8, sym.name, struct_name)) continue; + if (sym.def_span.start >= analysis.source.len) break; + + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .struct_decl) { + const sd = node.data.struct_decl; + for (sd.field_names, 0..) |fn_, fi| { + if (!std.mem.eql(u8, fn_, field_name)) continue; + + var buf = std.ArrayList(u8).empty; + + // Doc comment above field (use pointer offset into source) + const fn_addr = @intFromPtr(fn_.ptr); + const src_addr = @intFromPtr(analysis.source.ptr); + const src_end = src_addr + analysis.source.len; + if (fn_addr >= src_addr and fn_addr < src_end) { + const field_offset = @as(u32, @intCast(fn_addr - src_addr)); + if (extractDocComment(analysis.source, field_offset)) |comment| { + try buf.appendSlice(self.allocator, comment); + try buf.appendSlice(self.allocator, "\n\n"); + } + } + + try buf.appendSlice(self.allocator, "```sx\n"); + try buf.appendSlice(self.allocator, struct_name); + try buf.append(self.allocator, '.'); + try buf.appendSlice(self.allocator, field_name); + if (fi < sd.field_types.len) { + if (sd.field_types[fi].data == .type_expr) { + try buf.appendSlice(self.allocator, " : "); + try buf.appendSlice(self.allocator, sd.field_types[fi].data.type_expr.name); + } + } + try buf.appendSlice(self.allocator, "\n```"); + return buf.items; + } + } + } + break; + } + return null; + } + + /// Try to resolve a variable/param name to its struct type name. + fn resolveStructTypeName(analysis: DocumentAnalysis, var_name: []const u8) ?[]const u8 { + var i = analysis.sema.symbols.len; + while (i > 0) { + i -= 1; + const sym = analysis.sema.symbols[i]; + if (!std.mem.eql(u8, sym.name, var_name)) continue; + + const ty = sym.ty orelse return null; + if (ty != .struct_type) return null; + + // For params, def_span is the type_expr span — read the type name from source + if (sym.kind == .param) { + if (sym.def_span.start < analysis.source.len and sym.def_span.end <= analysis.source.len) { + return analysis.source[sym.def_span.start..sym.def_span.end]; + } + } + + // For variables, find the var_decl node and check type_annotation + if (sym.kind == .variable) { + if (sym.def_span.start < analysis.source.len) { + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .var_decl) { + if (node.data.var_decl.type_annotation) |ta| { + if (ta.data == .type_expr) return ta.data.type_expr.name; + } + } + } + } + } + + return null; + } + return null; + } + + /// Build hover for an enum variant like .red — show EnumName.variant with doc comment. + fn formatEnumVariantHover(self: *Server, analysis: DocumentAnalysis, variant_name: []const u8) !?[]const u8 { + for (analysis.sema.symbols) |sym| { + if (sym.kind != .enum_type) continue; + if (sym.def_span.start >= analysis.source.len) continue; + + if (sx.sema.findNodeAtOffset(analysis.root, sym.def_span.start)) |node| { + if (node.data == .enum_decl) { + const ed = node.data.enum_decl; + for (ed.variants) |v| { + if (!std.mem.eql(u8, v, variant_name)) continue; + + var buf = std.ArrayList(u8).empty; + + // Doc comment above variant + const v_addr = @intFromPtr(v.ptr); + const src_addr2 = @intFromPtr(analysis.source.ptr); + const src_end2 = src_addr2 + analysis.source.len; + if (v_addr >= src_addr2 and v_addr < src_end2) { + const variant_offset = @as(u32, @intCast(v_addr - src_addr2)); + if (extractDocComment(analysis.source, variant_offset)) |comment| { + try buf.appendSlice(self.allocator, comment); + try buf.appendSlice(self.allocator, "\n\n"); + } + } + + try buf.appendSlice(self.allocator, "```sx\n"); + try buf.appendSlice(self.allocator, sym.name); + try buf.append(self.allocator, '.'); + try buf.appendSlice(self.allocator, variant_name); + try buf.appendSlice(self.allocator, "\n```"); + return buf.items; + } + } + } + } + return null; + } + + /// Find a top-level declaration node by name. + fn findDeclByName(root: *sx.ast.Node, name: []const u8) ?*sx.ast.Node { + if (root.data != .root) return null; + for (root.data.root.decls) |decl| { + if (decl.data.declName()) |dn| { + if (std.mem.eql(u8, dn, name)) return decl; + } + } + return null; + } + + fn formatSymbolHover(allocator: std.mem.Allocator, sym: sx.sema.Symbol, root: *sx.ast.Node, source: [:0]const u8) ![]const u8 { + // Try offset-based AST lookup (works when symbol is in the same source buffer) + if (sym.def_span.start < source.len) { + if (sx.sema.findNodeAtOffset(root, sym.def_span.start)) |node| { + if (node.data.declName()) |dn| { + if (std.mem.eql(u8, dn, sym.name)) { + return try formatDeclHover(allocator, node, source); + } + } + } + } + + // Fallback: name-based lookup (works for imported symbols whose offsets collide) + if (findDeclByName(root, sym.name)) |decl| { + return try formatDeclHover(allocator, decl, source); + } + + // Last resort: simple format (for params, namespace symbols, etc.) + var buf = std.ArrayList(u8).empty; + + if (sym.def_span.start < source.len) { + if (extractDocComment(source, sym.def_span.start)) |comment| { + try buf.appendSlice(allocator, comment); + try buf.appendSlice(allocator, "\n\n"); + } + } + + try buf.appendSlice(allocator, "```sx\n"); + + switch (sym.kind) { + .function => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: (...)"); + if (sym.ty) |ty| { + try buf.appendSlice(allocator, " -> "); + const type_name = try ty.displayName(allocator); + try buf.appendSlice(allocator, type_name); + } + }, + .variable => { + try buf.appendSlice(allocator, sym.name); + if (sym.ty) |ty| { + try buf.appendSlice(allocator, " : "); + const type_name = try ty.displayName(allocator); + try buf.appendSlice(allocator, type_name); + } + }, + .constant => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: "); + if (sym.ty) |ty| { + const type_name = try ty.displayName(allocator); + try buf.appendSlice(allocator, type_name); + } else { + try buf.appendSlice(allocator, "(constant)"); + } + }, + .param => { + try buf.appendSlice(allocator, sym.name); + if (sym.ty) |ty| { + try buf.appendSlice(allocator, " : "); + const type_name = try ty.displayName(allocator); + try buf.appendSlice(allocator, type_name); + } + }, + .enum_type => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: enum { ... }"); + }, + .struct_type => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: struct { ... }"); + }, + .type_alias => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: (type)"); + }, + .namespace => { + try buf.appendSlice(allocator, sym.name); + try buf.appendSlice(allocator, " :: (namespace)"); + }, + } + + try buf.appendSlice(allocator, "\n```"); + return buf.items; + } +}; diff --git a/src/lsp/transport.zig b/src/lsp/transport.zig new file mode 100644 index 0000000..32cd509 --- /dev/null +++ b/src/lsp/transport.zig @@ -0,0 +1,75 @@ +const std = @import("std"); + +pub const Transport = struct { + in: *std.Io.Reader, + out_file: std.Io.File, + io: std.Io, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator, io: std.Io, in: *std.Io.Reader, out_file: std.Io.File) Transport { + return .{ + .in = in, + .out_file = out_file, + .io = io, + .allocator = allocator, + }; + } + + /// Read one LSP message: parse Content-Length header, read body. + pub fn readMessage(self: *Transport) ![]const u8 { + var content_length: ?usize = null; + + // Parse headers (terminated by \r\n\r\n) + while (true) { + const line = try self.readLine(); + if (line.len == 0) break; // empty line = end of headers + + if (std.mem.startsWith(u8, line, "Content-Length: ")) { + content_length = std.fmt.parseInt(usize, line["Content-Length: ".len..], 10) catch + return error.InvalidContentLength; + } + } + + const len = content_length orelse return error.MissingContentLength; + + const body = try self.allocator.alloc(u8, len); + try self.in.readSliceAll(body); + + return body; + } + + /// Write one LSP message: Content-Length header + body. + pub fn writeMessage(self: *Transport, body: []const u8) !void { + var buf: [32]u8 = undefined; + const len_str = std.fmt.bufPrint(&buf, "{d}", .{body.len}) catch unreachable; + + self.out_file.writeStreamingAll(self.io, "Content-Length: ") catch return error.WriteFailed; + self.out_file.writeStreamingAll(self.io, len_str) catch return error.WriteFailed; + self.out_file.writeStreamingAll(self.io, "\r\n\r\n") catch return error.WriteFailed; + self.out_file.writeStreamingAll(self.io, body) catch return error.WriteFailed; + } + + /// Read a single line terminated by \r\n. Returns content without \r\n. + fn readLine(self: *Transport) ![]const u8 { + var buf = std.ArrayList(u8).empty; + while (true) { + const byte = self.in.takeByte() catch |err| switch (err) { + error.EndOfStream => { + if (buf.items.len == 0) return error.EndOfStream; + return buf.items; + }, + else => return error.ReadFailed, + }; + + if (byte == '\n') { + const line = buf.items; + if (line.len > 0 and line[line.len - 1] == '\r') { + return line[0 .. line.len - 1]; + } + return line; + } + + try buf.append(self.allocator, byte); + } + } +}; diff --git a/src/lsp/types.zig b/src/lsp/types.zig new file mode 100644 index 0000000..74e2f9a --- /dev/null +++ b/src/lsp/types.zig @@ -0,0 +1,331 @@ +const std = @import("std"); + +pub const Position = struct { + line: u32, + character: u32, +}; + +pub const Range = struct { + start: Position, + end: Position, +}; + +pub const Location = struct { + uri: []const u8, + range: Range, +}; + +pub const Diagnostic = struct { + range: Range, + severity: u32, + message: []const u8, + source: []const u8 = "sx", +}; + +/// Build a JSON-RPC response with a pre-built result JSON string. +pub fn jsonRpcResponse(allocator: std.mem.Allocator, id_json: []const u8, result_json: []const u8) ![]const u8 { + return std.fmt.allocPrint(allocator, "{{\"jsonrpc\":\"2.0\",\"id\":{s},\"result\":{s}}}", .{ id_json, result_json }); +} + +/// Build a JSON-RPC notification. +pub fn jsonRpcNotification(allocator: std.mem.Allocator, method: []const u8, params_json: []const u8) ![]const u8 { + return std.fmt.allocPrint(allocator, "{{\"jsonrpc\":\"2.0\",\"method\":\"{s}\",\"params\":{s}}}", .{ method, params_json }); +} + +/// Serialize a JSON Value to string. +pub fn valueToJson(allocator: std.mem.Allocator, value: std.json.Value) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try writeJsonValue(&buf, allocator, value); + return buf.items; +} + +/// Escape a string for JSON. +pub fn jsonString(allocator: std.mem.Allocator, s: []const u8) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, '"'); + for (s) |ch| { + switch (ch) { + '"' => try buf.appendSlice(allocator, "\\\""), + '\\' => try buf.appendSlice(allocator, "\\\\"), + '\n' => try buf.appendSlice(allocator, "\\n"), + '\r' => try buf.appendSlice(allocator, "\\r"), + '\t' => try buf.appendSlice(allocator, "\\t"), + else => try buf.append(allocator, ch), + } + } + try buf.append(allocator, '"'); + return buf.items; +} + +fn writeJsonValue(buf: *std.ArrayList(u8), allocator: std.mem.Allocator, value: std.json.Value) !void { + switch (value) { + .null => try buf.appendSlice(allocator, "null"), + .bool => |b| try buf.appendSlice(allocator, if (b) "true" else "false"), + .integer => |i| { + const s = try std.fmt.allocPrint(allocator, "{d}", .{i}); + try buf.appendSlice(allocator, s); + }, + .float => |f| { + const s = try std.fmt.allocPrint(allocator, "{d}", .{f}); + try buf.appendSlice(allocator, s); + }, + .string => |s| { + const escaped = try jsonString(allocator, s); + try buf.appendSlice(allocator, escaped); + }, + .array => |arr| { + try buf.append(allocator, '['); + for (arr.items, 0..) |item, idx| { + if (idx > 0) try buf.append(allocator, ','); + try writeJsonValue(buf, allocator, item); + } + try buf.append(allocator, ']'); + }, + .object => |obj| { + try buf.append(allocator, '{'); + var first = true; + var it = obj.iterator(); + while (it.next()) |entry| { + if (!first) try buf.append(allocator, ','); + first = false; + const key = try jsonString(allocator, entry.key_ptr.*); + try buf.appendSlice(allocator, key); + try buf.append(allocator, ':'); + try writeJsonValue(buf, allocator, entry.value_ptr.*); + } + try buf.append(allocator, '}'); + }, + .number_string => |s| try buf.appendSlice(allocator, s), + } +} + +/// Build the initialize result JSON. +pub fn initializeResultJson(allocator: std.mem.Allocator) ![]const u8 { + return std.fmt.allocPrint(allocator, + "{{\"capabilities\":{{\"textDocumentSync\":1,\"definitionProvider\":true,\"hoverProvider\":true,\"documentSymbolProvider\":true," ++ + "\"completionProvider\":{{\"triggerCharacters\":[\".\"]}}," ++ + "\"signatureHelpProvider\":{{\"triggerCharacters\":[\"(\",\",\"]}}," ++ + "\"semanticTokensProvider\":{{\"legend\":{{" ++ + "\"tokenTypes\":[\"namespace\",\"type\",\"enum\",\"struct\",\"parameter\",\"variable\",\"enumMember\",\"function\",\"keyword\",\"number\",\"string\",\"operator\"]," ++ + "\"tokenModifiers\":[\"declaration\",\"readonly\"]" ++ + "}},\"full\":true}}}}}}", + .{}, + ); +} + +/// LSP SymbolKind enum values. +pub const SymbolKindLsp = enum(u32) { + File = 1, + Module = 2, + Namespace = 3, + Package = 4, + Class = 5, + Method = 6, + Property = 7, + Field = 8, + Constructor = 9, + Enum = 10, + Interface = 11, + Function = 12, + Variable = 13, + Constant = 14, + String = 15, + Number = 16, + Boolean = 17, + Array = 18, + Object = 19, + Key = 20, + Null = 21, + EnumMember = 22, + Struct = 23, + Event = 24, + Operator = 25, + TypeParameter = 26, +}; + +/// LSP CompletionItemKind enum values. +pub const CompletionItemKind = enum(u32) { + Text = 1, + Method = 2, + Function = 3, + Constructor = 4, + Field = 5, + Variable = 6, + Class = 7, + Interface = 8, + Module = 9, + Property = 10, + Unit = 11, + Value = 12, + Enum = 13, + Keyword = 14, + Snippet = 15, + Color = 16, + File = 17, + Reference = 18, + Folder = 19, + EnumMember = 20, + Constant = 21, + Struct = 22, + Event = 23, + Operator = 24, + TypeParameter = 25, +}; + +/// Build document symbols JSON array. +pub fn documentSymbolsJson(allocator: std.mem.Allocator, symbols: []const DocumentSymbol) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, '['); + for (symbols, 0..) |sym, idx| { + if (idx > 0) try buf.append(allocator, ','); + const name_escaped = try jsonString(allocator, sym.name); + const item = try std.fmt.allocPrint(allocator, + "{{\"name\":{s},\"kind\":{d},\"range\":{{\"start\":{{\"line\":{d},\"character\":{d}}},\"end\":{{\"line\":{d},\"character\":{d}}}}},\"selectionRange\":{{\"start\":{{\"line\":{d},\"character\":{d}}},\"end\":{{\"line\":{d},\"character\":{d}}}}}}}", + .{ + name_escaped, sym.kind, + sym.range.start.line, sym.range.start.character, + sym.range.end.line, sym.range.end.character, + sym.selection_range.start.line, sym.selection_range.start.character, + sym.selection_range.end.line, sym.selection_range.end.character, + }, + ); + try buf.appendSlice(allocator, item); + } + try buf.append(allocator, ']'); + return buf.items; +} + +pub const DocumentSymbol = struct { + name: []const u8, + kind: u32, + range: Range, + selection_range: Range, +}; + +/// Build completion items JSON array. +pub fn completionItemsJson(allocator: std.mem.Allocator, items: []const CompletionItem) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, '['); + for (items, 0..) |item, idx| { + if (idx > 0) try buf.append(allocator, ','); + const label_escaped = try jsonString(allocator, item.label); + const detail_escaped = if (item.detail) |d| try jsonString(allocator, d) else null; + if (detail_escaped) |de| { + const json = try std.fmt.allocPrint(allocator, + "{{\"label\":{s},\"kind\":{d},\"detail\":{s}}}", + .{ label_escaped, item.kind, de }, + ); + try buf.appendSlice(allocator, json); + } else { + const json = try std.fmt.allocPrint(allocator, + "{{\"label\":{s},\"kind\":{d}}}", + .{ label_escaped, item.kind }, + ); + try buf.appendSlice(allocator, json); + } + } + try buf.append(allocator, ']'); + return buf.items; +} + +pub const CompletionItem = struct { + label: []const u8, + kind: u32, + detail: ?[]const u8 = null, +}; + +/// Build a Location JSON response (for go-to-definition). +pub fn locationJson(allocator: std.mem.Allocator, uri: []const u8, range: Range) ![]const u8 { + const uri_escaped = try jsonString(allocator, uri); + return std.fmt.allocPrint(allocator, + "{{\"uri\":{s},\"range\":{{\"start\":{{\"line\":{d},\"character\":{d}}},\"end\":{{\"line\":{d},\"character\":{d}}}}}}}", + .{ uri_escaped, range.start.line, range.start.character, range.end.line, range.end.character }, + ); +} + +/// Build a Hover JSON response. +pub fn hoverJson(allocator: std.mem.Allocator, contents: []const u8) ![]const u8 { + const escaped = try jsonString(allocator, contents); + return std.fmt.allocPrint(allocator, + "{{\"contents\":{{\"kind\":\"markdown\",\"value\":{s}}}}}", + .{escaped}, + ); +} + +/// Build a SignatureHelp JSON response. +pub fn signatureHelpJson(allocator: std.mem.Allocator, label: []const u8, param_labels: []const []const u8, active_param: u32) ![]const u8 { + var buf = std.ArrayList(u8).empty; + const label_escaped = try jsonString(allocator, label); + + try buf.appendSlice(allocator, "{\"signatures\":[{\"label\":"); + try buf.appendSlice(allocator, label_escaped); + try buf.appendSlice(allocator, ",\"parameters\":["); + + for (param_labels, 0..) |pl, idx| { + if (idx > 0) try buf.append(allocator, ','); + const pl_escaped = try jsonString(allocator, pl); + try buf.appendSlice(allocator, "{\"label\":"); + try buf.appendSlice(allocator, pl_escaped); + try buf.append(allocator, '}'); + } + + const ap_str = try std.fmt.allocPrint(allocator, "{d}", .{active_param}); + try buf.appendSlice(allocator, "]}],\"activeSignature\":0,\"activeParameter\":"); + try buf.appendSlice(allocator, ap_str); + try buf.append(allocator, '}'); + + return buf.items; +} + +/// Semantic token type indices (must match legend in initializeResultJson). +pub const SemanticTokenType = struct { + pub const namespace: u32 = 0; + pub const type_: u32 = 1; + pub const enum_: u32 = 2; + pub const struct_: u32 = 3; + pub const parameter: u32 = 4; + pub const variable: u32 = 5; + pub const enum_member: u32 = 6; + pub const function: u32 = 7; + pub const keyword: u32 = 8; + pub const number: u32 = 9; + pub const string_: u32 = 10; + pub const operator_: u32 = 11; +}; + +/// Build a SemanticTokens JSON response. +pub fn semanticTokensJson(allocator: std.mem.Allocator, data: []const u32) ![]const u8 { + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(allocator, "{\"data\":["); + for (data, 0..) |val, idx| { + if (idx > 0) try buf.append(allocator, ','); + const s = try std.fmt.allocPrint(allocator, "{d}", .{val}); + try buf.appendSlice(allocator, s); + } + try buf.appendSlice(allocator, "]}"); + return buf.items; +} + +/// Build publishDiagnostics params JSON. +pub fn publishDiagnosticsJson(allocator: std.mem.Allocator, uri: []const u8, diagnostics: []const Diagnostic) ![]const u8 { + var buf = std.ArrayList(u8).empty; + const uri_escaped = try jsonString(allocator, uri); + + try buf.appendSlice(allocator, "{\"uri\":"); + try buf.appendSlice(allocator, uri_escaped); + try buf.appendSlice(allocator, ",\"diagnostics\":["); + + for (diagnostics, 0..) |d, idx| { + if (idx > 0) try buf.append(allocator, ','); + const msg_escaped = try jsonString(allocator, d.message); + const src_escaped = try jsonString(allocator, d.source); + const diag_json = try std.fmt.allocPrint(allocator, + "{{\"range\":{{\"start\":{{\"line\":{d},\"character\":{d}}},\"end\":{{\"line\":{d},\"character\":{d}}}}},\"severity\":{d},\"message\":{s},\"source\":{s}}}", + .{ d.range.start.line, d.range.start.character, d.range.end.line, d.range.end.character, d.severity, msg_escaped, src_escaped }, + ); + try buf.appendSlice(allocator, diag_json); + } + + try buf.appendSlice(allocator, "]}"); + return buf.items; +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..caea656 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,158 @@ +const std = @import("std"); +const sx = @import("sx"); + +pub fn main(init: std.process.Init) !void { + const allocator = init.arena.allocator(); + const io = init.io; + const args = try init.minimal.args.toSlice(allocator); + + if (args.len < 2) { + printUsage(); + return; + } + + const command = args[1]; + + // LSP subcommand doesn't need a file argument + if (std.mem.eql(u8, command, "lsp")) { + runLsp(allocator, io); + return; + } + + if (args.len < 3) { + printUsage(); + return; + } + + const input_path = args[2]; + + if (std.mem.eql(u8, command, "build")) { + const output_name = deriveOutputName(input_path); + compile(allocator, io, input_path, output_name) catch return; + std.debug.print("compiled: {s}\n", .{output_name}); + } else if (std.mem.eql(u8, command, "ir")) { + emitIR(allocator, io, input_path) catch return; + } else if (std.mem.eql(u8, command, "run")) { + const tmp_bin = "/tmp/sx_run_tmp"; + compile(allocator, io, input_path, tmp_bin) catch return; + defer { + std.Io.Dir.deleteFile(.cwd(), io, tmp_bin) catch {}; + } + var child = std.process.spawn(io, .{ + .argv = &.{tmp_bin}, + }) catch { + std.debug.print("error: failed to run program\n", .{}); + return; + }; + _ = child.wait(io) catch { + std.debug.print("error: program execution failed\n", .{}); + return; + }; + } else { + printUsage(); + } +} + +fn printUsage() void { + std.debug.print( + \\Usage: sx [file.sx] + \\ + \\Commands: + \\ run Build and run immediately + \\ build Build binary in current directory + \\ ir Print LLVM IR to stdout + \\ lsp Start language server (LSP) + \\ + , .{}); +} + +fn runLsp(allocator: std.mem.Allocator, io: std.Io) void { + const Transport = sx.lsp.transport.Transport; + const Server = sx.lsp.server.Server; + + const stdin_file = std.Io.File.stdin(); + const stdout_file = std.Io.File.stdout(); + + var read_buf: [4096]u8 = undefined; + var stdin_reader = stdin_file.readerStreaming(io, &read_buf); + + var transport = Transport.init(allocator, io, &stdin_reader.interface, stdout_file); + var server = Server.init(allocator, &transport, io); + + while (true) { + const msg = transport.readMessage() catch |err| { + if (err == error.EndOfStream) break; + std.debug.print("lsp: read error: {}\n", .{err}); + break; + }; + + const keep_going = server.handleMessage(msg); + + if (!keep_going) break; + } +} + +fn deriveOutputName(input_path: []const u8) []const u8 { + // Get basename (strip directory) + var start: usize = 0; + for (input_path, 0..) |ch, i| { + if (ch == '/') start = i + 1; + } + const basename = input_path[start..]; + // Strip .sx extension + if (std.mem.endsWith(u8, basename, ".sx")) { + return basename[0 .. basename.len - 3]; + } + return basename; +} + + +fn readSource(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8) ![:0]const u8 { + const source_bytes = std.Io.Dir.readFileAlloc(.cwd(), io, input_path, allocator, .limited(10 * 1024 * 1024)) catch |err| { + std.debug.print("error: cannot read '{s}': {}\n", .{ input_path, err }); + return error.CompileError; + }; + return try allocator.dupeZ(u8, source_bytes); +} + +fn emitIR(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8) !void { + const source = try readSource(allocator, io, input_path); + + var comp = sx.core.Compilation.init(allocator, io, input_path, source); + defer comp.deinit(); + + comp.parse() catch { comp.renderErrors(); return error.CompileError; }; + comp.resolveImports() catch { comp.renderErrors(); return error.CompileError; }; + comp.generateCode() catch { comp.renderErrors(); return error.CompileError; }; + + var cg = &comp.cg.?; + cg.verify() catch { comp.renderErrors(); return error.CompileError; }; + cg.printIR(); +} + +fn compile(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8, output_path: []const u8) !void { + const source = try readSource(allocator, io, input_path); + + var comp = sx.core.Compilation.init(allocator, io, input_path, source); + defer comp.deinit(); + + comp.parse() catch { comp.renderErrors(); return error.CompileError; }; + comp.resolveImports() catch { comp.renderErrors(); return error.CompileError; }; + comp.generateCode() catch { comp.renderErrors(); return error.CompileError; }; + + var cg = &comp.cg.?; + cg.verify() catch { comp.renderErrors(); return error.CompileError; }; + + // Emit object file + const obj_path = try std.fmt.allocPrintSentinel(allocator, "{s}.o", .{output_path}, 0); + cg.emitObject(obj_path.ptr) catch { comp.renderErrors(); return error.CompileError; }; + + // Link + sx.codegen.CodeGen.link(io, obj_path, output_path) catch { + std.debug.print("error: linking failed\n", .{}); + return error.CompileError; + }; + + // Clean up object file + std.Io.Dir.deleteFile(.cwd(), io, obj_path) catch {}; +} diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..d1c227f --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,1573 @@ +const std = @import("std"); +const Token = @import("token.zig").Token; +const Tag = @import("token.zig").Tag; +const Lexer = @import("lexer.zig").Lexer; +const ast = @import("ast.zig"); +const Node = ast.Node; +const Type = @import("types.zig").Type; +const errors = @import("errors.zig"); + +pub const Parser = struct { + lexer: Lexer, + current: Token, + source: [:0]const u8, + allocator: std.mem.Allocator, + err_msg: ?[]const u8, + err_offset: ?u32 = null, + prev_end: u32 = 0, + diagnostics: ?*errors.DiagnosticList = null, + + pub fn init(allocator: std.mem.Allocator, source: [:0]const u8) Parser { + var lexer = Lexer.init(source); + const first = lexer.next(); + return .{ + .lexer = lexer, + .current = first, + .source = source, + .allocator = allocator, + .err_msg = null, + .err_offset = null, + }; + } + + fn createNode(self: *Parser, start: u32, data: Node.Data) !*Node { + const node = try self.allocator.create(Node); + node.* = .{ .span = .{ .start = start, .end = self.prev_end }, .data = data }; + return node; + } + + pub fn parse(self: *Parser) anyerror!*Node { + var decls = std.ArrayList(*Node).empty; + while (self.current.tag != .eof) { + const decl = try self.parseTopLevel(); + try decls.append(self.allocator, decl); + } + const node = try self.createNode(0, .{ .root = .{ .decls = try decls.toOwnedSlice(self.allocator) } }); + return node; + } + + fn parseTopLevel(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + + // Top-level flat import: #import "path"; + if (self.current.tag == .hash_import) { + self.advance(); + if (self.current.tag != .string_literal) { + return self.fail("expected string path after '#import'"); + } + const raw = self.tokenSlice(self.current); + const path = raw[1 .. raw.len - 1]; + self.advance(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .import_decl = .{ .path = path, .name = null } }); + } + + // Top-level #run directive + if (self.current.tag == .hash_run) { + self.advance(); + const expr = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .comptime_expr = .{ .expr = expr } }); + } + + // All top-level declarations start with an identifier + if (self.current.tag != .identifier) { + return self.fail("expected identifier at top level"); + } + const name = self.tokenSlice(self.current); + self.advance(); + + // IDENT :: ... + if (self.current.tag == .colon_colon) { + self.advance(); + return self.parseConstBinding(name, start); + } + + // IDENT : type : value; (typed constant) + // IDENT : type = value; (typed variable) + if (self.current.tag == .colon) { + self.advance(); + return self.parseTypedBinding(name, start); + } + + // IDENT := value; (variable) + if (self.current.tag == .colon_equal) { + self.advance(); + const value = try self.parseExpr(); + try self.expectSemicolonAfter(value); + return try self.createNode(start, .{ .var_decl = .{ .name = name, .type_annotation = null, .value = value } }); + } + + return self.fail("expected '::', ':=', or ':' after identifier"); + } + + fn parseConstBinding(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + // After `::` + // Could be: #run expr, enum { ... }, (params) -> type { body }, or expr; + + // Namespaced import: name :: #import "path"; + if (self.current.tag == .hash_import) { + self.advance(); + if (self.current.tag != .string_literal) { + return self.fail("expected string path after '#import'"); + } + const raw = self.tokenSlice(self.current); + const path = raw[1 .. raw.len - 1]; + self.advance(); + try self.expect(.semicolon); + return try self.createNode(start_pos, .{ .import_decl = .{ .path = path, .name = name } }); + } + + // Compile-time evaluation: name :: #run expr; + if (self.current.tag == .hash_run) { + const run_start = self.current.loc.start; + self.advance(); + const inner = try self.parseExpr(); + try self.expect(.semicolon); + const ct = try self.createNode(run_start, .{ .comptime_expr = .{ .expr = inner } }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = ct } }); + } + + // Built-in declaration: name :: #builtin; + if (self.current.tag == .hash_builtin) { + const bi_start = self.current.loc.start; + self.advance(); + try self.expect(.semicolon); + const bi = try self.createNode(bi_start, .{ .builtin_expr = {} }); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = bi } }); + } + + // Enum declaration + if (self.current.tag == .kw_enum) { + return self.parseEnumDecl(name, start_pos); + } + + // Struct declaration + if (self.current.tag == .kw_struct) { + return self.parseStructDecl(name, start_pos); + } + + // Union declaration + if (self.current.tag == .kw_union) { + return self.parseUnionDecl(name, start_pos); + } + + // Function declaration: (params) -> type { body } or () { body } + if (self.current.tag == .l_paren) { + // Look ahead: is this a function or an expression starting with `(`? + // Heuristic: if after matching parens we see `{` or `->`, it's a function. + if (self.isFunctionDef()) { + return self.parseFnDecl(name, start_pos); + } + } + + // Bare block shorthand: name :: { body } is equivalent to name :: () { body } + if (self.current.tag == .l_brace) { + const body = try self.parseBlock(); + return try self.createNode(start_pos, .{ .fn_decl = .{ .name = name, .params = &.{}, .return_type = null, .body = body } }); + } + + // Otherwise it's a constant expression + const value = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = null, .value = value } }); + } + + fn parseTypedBinding(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + // After `name :` + // Parse type + const type_node = try self.parseTypeExpr(); + + if (self.current.tag == .colon) { + // name : type : value; (typed constant) + self.advance(); + const value = try self.parseExpr(); + try self.expectSemicolonAfter(value); + return try self.createNode(start_pos, .{ .const_decl = .{ .name = name, .type_annotation = type_node, .value = value } }); + } + + if (self.current.tag == .equal) { + // name : type = value; (typed variable) + self.advance(); + const value = try self.parseExpr(); + try self.expectSemicolonAfter(value); + return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .type_annotation = type_node, .value = value } }); + } + + if (self.current.tag == .semicolon) { + // name : type; (default-initialized variable) + self.advance(); + return try self.createNode(start_pos, .{ .var_decl = .{ .name = name, .type_annotation = type_node, .value = null } }); + } + + return self.fail("expected ':', '=' or ';' after type annotation"); + } + + fn parseTypeExpr(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + + // Array type: [N]T + if (self.current.tag == .l_bracket) { + self.advance(); // skip '[' + const len_node = try self.parseExpr(); + try self.expect(.r_bracket); + const elem_type = try self.parseTypeExpr(); + return try self.createNode(start, .{ .array_type_expr = .{ .length = len_node, .element_type = elem_type } }); + } + + // Generic type parameter introduction: $T + if (self.current.tag == .dollar) { + self.advance(); + if (self.current.tag != .identifier) { + return self.fail("expected type parameter name after '$'"); + } + const name = self.tokenSlice(self.current); + self.advance(); + return try self.createNode(start, .{ .type_expr = .{ .name = name, .is_generic = true } }); + } + if (self.current.tag.isTypeKeyword() or self.current.tag == .identifier) { + var name = self.tokenSlice(self.current); + self.advance(); + + // Qualified name: ns.Type or ns.Type(args) + while (self.current.tag == .dot) { + self.advance(); + if (self.current.tag == .identifier or self.current.tag.isTypeKeyword()) { + name = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ name, self.tokenSlice(self.current) }); + self.advance(); + } else break; + } + + // Parameterized type: Vector(N, T) or later generic struct instantiation + if (self.current.tag == .l_paren) { + self.advance(); // skip '(' + var args = std.ArrayList(*Node).empty; + while (self.current.tag != .r_paren and self.current.tag != .eof) { + if (args.items.len > 0) { + try self.expect(.comma); + } + // Args can be int literals (for lengths) or type expressions + if (self.current.tag == .int_literal) { + const arg_start = self.current.loc.start; + const text = self.tokenSlice(self.current); + const value = std.fmt.parseInt(i64, text, 10) catch 0; + self.advance(); + try args.append(self.allocator, try self.createNode(arg_start, .{ .int_literal = .{ .value = value } })); + } else { + try args.append(self.allocator, try self.parseTypeExpr()); + } + } + try self.expect(.r_paren); + return try self.createNode(start, .{ .parameterized_type_expr = .{ + .name = name, + .args = try args.toOwnedSlice(self.allocator), + } }); + } + + return try self.createNode(start, .{ .type_expr = .{ .name = name } }); + } + return self.fail("expected type name"); + } + + fn parseEnumDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + self.advance(); // skip 'enum' + try self.expect(.l_brace); + var variants = std.ArrayList([]const u8).empty; + while (self.current.tag != .r_brace and self.current.tag != .eof) { + if (self.current.tag != .identifier) { + return self.fail("expected variant name"); + } + try variants.append(self.allocator, self.tokenSlice(self.current)); + self.advance(); + if (self.current.tag == .semicolon) { + self.advance(); + } + } + try self.expect(.r_brace); + return try self.createNode(start_pos, .{ .enum_decl = .{ .name = name, .variants = try variants.toOwnedSlice(self.allocator) } }); + } + + fn parseUnionDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + self.advance(); // skip 'union' + try self.expect(.l_brace); + var variant_names = std.ArrayList([]const u8).empty; + var variant_types = std.ArrayList(?*Node).empty; + while (self.current.tag != .r_brace and self.current.tag != .eof) { + if (self.current.tag != .identifier) { + return self.fail("expected variant name in union"); + } + try variant_names.append(self.allocator, self.tokenSlice(self.current)); + self.advance(); + if (self.current.tag == .colon) { + // Typed variant: name: type; + self.advance(); + const vtype = try self.parseTypeExpr(); + try variant_types.append(self.allocator, vtype); + } else { + // Void variant: name; + try variant_types.append(self.allocator, null); + } + if (self.current.tag == .semicolon) { + self.advance(); + } + } + try self.expect(.r_brace); + return try self.createNode(start_pos, .{ .union_decl = .{ + .name = name, + .variant_names = try variant_names.toOwnedSlice(self.allocator), + .variant_types = try variant_types.toOwnedSlice(self.allocator), + } }); + } + + fn parseStructDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + self.advance(); // skip 'struct' + + // Optional type params: struct($N: u32, $T: Type) { ... } + var type_params = std.ArrayList(ast.StructTypeParam).empty; + if (self.current.tag == .l_paren) { + self.advance(); // skip '(' + while (self.current.tag != .r_paren and self.current.tag != .eof) { + if (type_params.items.len > 0) { + try self.expect(.comma); + } + // Expect $name : constraint + try self.expect(.dollar); + if (self.current.tag != .identifier) { + return self.fail("expected type parameter name after '$'"); + } + const param_name = self.tokenSlice(self.current); + self.advance(); + try self.expect(.colon); + const constraint = try self.parseTypeExpr(); + try type_params.append(self.allocator, .{ .name = param_name, .constraint = constraint }); + } + try self.expect(.r_paren); + } + + try self.expect(.l_brace); + + var field_names = std.ArrayList([]const u8).empty; + var field_types = std.ArrayList(*Node).empty; + var field_defaults = std.ArrayList(?*Node).empty; + + while (self.current.tag != .r_brace and self.current.tag != .eof) { + // Parse field group: name1, name2, ...: type (= default)?; + var group_names = std.ArrayList([]const u8).empty; + + if (self.current.tag != .identifier) { + return self.fail("expected field name in struct"); + } + try group_names.append(self.allocator, self.tokenSlice(self.current)); + self.advance(); + + while (self.current.tag == .comma) { + self.advance(); // skip ',' + if (self.current.tag != .identifier) { + return self.fail("expected field name after ','"); + } + try group_names.append(self.allocator, self.tokenSlice(self.current)); + self.advance(); + } + + try self.expect(.colon); + const field_type = try self.parseTypeExpr(); + + // Check for default value: = expr + var default_val: ?*Node = null; + if (self.current.tag == .equal) { + self.advance(); + default_val = try self.parseExpr(); + } + + // All names in the group share the same type and default + for (group_names.items) |fname| { + try field_names.append(self.allocator, fname); + try field_types.append(self.allocator, field_type); + try field_defaults.append(self.allocator, default_val); + } + + if (self.current.tag == .semicolon) { + self.advance(); + } + } + try self.expect(.r_brace); + + return try self.createNode(start_pos, .{ .struct_decl = .{ + .name = name, + .field_names = try field_names.toOwnedSlice(self.allocator), + .field_types = try field_types.toOwnedSlice(self.allocator), + .field_defaults = try field_defaults.toOwnedSlice(self.allocator), + .type_params = try type_params.toOwnedSlice(self.allocator), + } }); + } + + fn parseStructLiteral(self: *Parser, struct_name: ?[]const u8, type_expr: ?*Node, start_pos: u32) anyerror!*Node { + try self.expect(.l_brace); + + var field_inits = std.ArrayList(ast.StructFieldInit).empty; + + while (self.current.tag != .r_brace and self.current.tag != .eof) { + if (field_inits.items.len > 0) { + try self.expect(.comma); + } + + // Check if this is a named field: identifier followed by '=' + if (self.current.tag == .identifier) { + const saved_lexer = self.lexer; + const saved_current = self.current; + const saved_prev_end = self.prev_end; + const fname = self.tokenSlice(self.current); + const ident_start = self.current.loc.start; + self.advance(); + + if (self.current.tag == .equal) { + // Named field: name = expr + self.advance(); // skip '=' + const value = try self.parseExpr(); + try field_inits.append(self.allocator, .{ .name = fname, .value = value }); + continue; + } else if (self.current.tag == .comma or self.current.tag == .r_brace) { + // Shorthand: just an identifier (name = identifier with same name) + const ident_node = try self.createNode(ident_start, .{ .identifier = .{ .name = fname } }); + try field_inits.append(self.allocator, .{ .name = fname, .value = ident_node }); + continue; + } + + // Not named — backtrack and parse as positional expression + self.lexer = saved_lexer; + self.current = saved_current; + self.prev_end = saved_prev_end; + } + + // Positional field: just an expression + const value = try self.parseExpr(); + try field_inits.append(self.allocator, .{ .name = null, .value = value }); + } + try self.expect(.r_brace); + + return try self.createNode(start_pos, .{ .struct_literal = .{ + .struct_name = struct_name, + .type_expr = type_expr, + .field_inits = try field_inits.toOwnedSlice(self.allocator), + } }); + } + + fn reconstructQualifiedName(self: *Parser, node: *Node) ![]const u8 { + if (node.data == .identifier) return node.data.identifier.name; + if (node.data == .field_access) { + const obj_name = try self.reconstructQualifiedName(node.data.field_access.object); + return std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ obj_name, node.data.field_access.field }); + } + return error.ParseError; + } + + /// Parse a parenthesized parameter list: `(name: type, $T: Type, args: ..Any)` + /// Handles `$` generic params, `..` variadic marker, and comptime detection. + /// Expects opening `(` already NOT consumed — this function consumes `(` through `)`. + fn parseParams(self: *Parser) anyerror![]const ast.Param { + try self.expect(.l_paren); + var params = std.ArrayList(ast.Param).empty; + while (self.current.tag != .r_paren and self.current.tag != .eof) { + if (params.items.len > 0) { + try self.expect(.comma); + } + var is_ct_param = false; + if (self.current.tag == .dollar) { + is_ct_param = true; + self.advance(); + } + if (self.current.tag != .identifier) { + return self.fail("expected parameter name"); + } + const param_name = self.tokenSlice(self.current); + const param_name_span = ast.Span{ .start = self.current.loc.start, .end = self.current.loc.end }; + self.advance(); + try self.expect(.colon); + const is_variadic = self.current.tag == .dot_dot; + if (is_variadic) self.advance(); + const param_type = try self.parseTypeExpr(); + var is_comptime_param = false; + if (is_ct_param and param_type.data == .type_expr) { + const constraint_name = param_type.data.type_expr.name; + if (std.mem.eql(u8, constraint_name, "Type")) { + param_type.data = .{ .type_expr = .{ .name = param_name, .is_generic = true } }; + } else { + is_comptime_param = true; + } + } + try params.append(self.allocator, .{ .name = param_name, .name_span = param_name_span, .type_expr = param_type, .is_variadic = is_variadic, .is_comptime = is_comptime_param }); + } + for (params.items, 0..) |param, i| { + if (param.is_variadic and i != params.items.len - 1) { + return self.fail("variadic parameter must be the last parameter"); + } + } + try self.expect(.r_paren); + return try params.toOwnedSlice(self.allocator); + } + + /// Collect generic type params and comptime value params from parameter annotations. + fn collectTypeParams(self: *Parser, params: []const ast.Param) ![]const ast.StructTypeParam { + var type_params = std.ArrayList(ast.StructTypeParam).empty; + for (params) |param| { + if (param.is_comptime) { + var found = false; + for (type_params.items) |existing| { + if (std.mem.eql(u8, existing.name, param.name)) { + found = true; + break; + } + } + if (!found) { + try type_params.append(self.allocator, .{ .name = param.name, .constraint = param.type_expr }); + } + } else if (param.type_expr.data == .type_expr and param.type_expr.data.type_expr.is_generic) { + var found = false; + for (type_params.items) |existing| { + if (std.mem.eql(u8, existing.name, param.type_expr.data.type_expr.name)) { + found = true; + break; + } + } + if (!found) { + const type_constraint = try self.createNode(param.type_expr.span.start, .{ .type_expr = .{ .name = "Type" } }); + try type_params.append(self.allocator, .{ .name = param.type_expr.data.type_expr.name, .constraint = type_constraint }); + } + } + } + return try type_params.toOwnedSlice(self.allocator); + } + + fn parseFnDecl(self: *Parser, name: []const u8, start_pos: u32) anyerror!*Node { + const params = try self.parseParams(); + + // Optional return type + var return_type: ?*Node = null; + if (self.current.tag == .arrow) { + self.advance(); + return_type = try self.parseTypeExpr(); + } + + // Body: block `{ ... }`, arrow `=> expr;`, or #builtin marker + const body = if (self.current.tag == .hash_builtin) blk: { + const bi_start = self.current.loc.start; + self.advance(); + try self.expect(.semicolon); + break :blk try self.createNode(bi_start, .{ .builtin_expr = {} }); + } else if (self.current.tag == .fat_arrow) blk: { + self.advance(); + const expr = try self.parseExpr(); + try self.expect(.semicolon); + const stmts = try self.allocator.alloc(*Node, 1); + stmts[0] = expr; + const block_start = expr.span.start; + const block = try self.createNode(block_start, .{ .block = .{ .stmts = stmts } }); + break :blk block; + } else try self.parseBlock(); + + const type_params = try self.collectTypeParams(params); + + return try self.createNode(start_pos, .{ .fn_decl = .{ + .name = name, + .params = params, + .return_type = return_type, + .body = body, + .type_params = type_params, + } }); + } + + fn parseBlock(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + try self.expect(.l_brace); + var stmts = std.ArrayList(*Node).empty; + while (self.current.tag != .r_brace and self.current.tag != .eof) { + const stmt = try self.parseStmt(); + try stmts.append(self.allocator, stmt); + } + try self.expect(.r_brace); + return try self.createNode(start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } }); + } + + /// Block-form if/match/while/bare blocks don't require trailing semicolon. + fn expectSemicolonAfter(self: *Parser, expr: *Node) anyerror!void { + const needs_semi = switch (expr.data) { + .if_expr => |ie| ie.is_inline, + .match_expr => false, + .while_expr => false, + .for_expr => false, + .block => false, + else => true, + }; + if (needs_semi) { + try self.expect(.semicolon); + } else if (self.current.tag == .semicolon) { + self.advance(); // consume optional ; + } + } + + pub fn parseStmt(self: *Parser) anyerror!*Node { + // Check if this is a declaration (IDENT followed by ::, :=, or : type) + if (self.current.tag == .identifier) { + const saved_lexer = self.lexer; + const saved_current = self.current; + const saved_prev_end = self.prev_end; + const start = self.current.loc.start; + const name = self.tokenSlice(self.current); + self.advance(); + + if (self.current.tag == .colon_colon) { + self.advance(); + return self.parseConstBinding(name, start); + } + if (self.current.tag == .colon_equal) { + self.advance(); + const value = try self.parseExpr(); + try self.expectSemicolonAfter(value); + return try self.createNode(start, .{ .var_decl = .{ .name = name, .type_annotation = null, .value = value } }); + } + if (self.current.tag == .colon) { + self.advance(); + return self.parseTypedBinding(name, start); + } + + // Check for assignment operators + if (self.isAssignOp()) { + const op = self.assignOp(); + self.advance(); + const value = try self.parseExpr(); + try self.expect(.semicolon); + const target = try self.createNode(start, .{ .identifier = .{ .name = name } }); + return try self.createNode(start, .{ .assignment = .{ .target = target, .op = op, .value = value } }); + } + + // Not a declaration or assignment — backtrack and parse as expression + self.lexer = saved_lexer; + self.current = saved_current; + self.prev_end = saved_prev_end; + } + + // Return statement: return expr; or return; + if (self.current.tag == .kw_return) { + const start = self.current.loc.start; + self.advance(); + if (self.current.tag == .semicolon) { + self.advance(); + return try self.createNode(start, .{ .return_stmt = .{ .value = null } }); + } + const value = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .return_stmt = .{ .value = value } }); + } + + // Defer statement: defer ; + if (self.current.tag == .kw_defer) { + const start = self.current.loc.start; + self.advance(); + const deferred = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .defer_stmt = .{ .expr = deferred } }); + } + + // Break statement: break; + if (self.current.tag == .kw_break) { + const start = self.current.loc.start; + self.advance(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .break_expr = {} }); + } + + // Continue statement: continue; + if (self.current.tag == .kw_continue) { + const start = self.current.loc.start; + self.advance(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .continue_expr = {} }); + } + + // Insert directive: #insert ; + if (self.current.tag == .hash_insert) { + const start = self.current.loc.start; + self.advance(); + const inner = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(start, .{ .insert_expr = .{ .expr = inner } }); + } + + // Expression statement + const expr = try self.parseExpr(); + + // Check for field assignment: expr = value; (e.g. a.b = 1;) + if (self.isAssignOp()) { + const op = self.assignOp(); + self.advance(); + const value = try self.parseExpr(); + try self.expect(.semicolon); + return try self.createNode(expr.span.start, .{ .assignment = .{ .target = expr, .op = op, .value = value } }); + } + + // Block-form if/match/while/bare blocks don't require trailing semicolon + try self.expectSemicolonAfter(expr); + return expr; + } + + // ---- Expression parsing (Pratt / precedence climbing) ---- + + pub fn parseExpr(self: *Parser) anyerror!*Node { + return self.parseBinary(0); + } + + fn parseBinary(self: *Parser, min_prec: u8) anyerror!*Node { + const lhs = try self.parseUnary(); + return self.parseBinaryRhs(lhs, min_prec); + } + + fn parseBinaryRhs(self: *Parser, initial_lhs: *Node, min_prec: u8) anyerror!*Node { + var lhs = initial_lhs; + + while (true) { + const prec = self.binaryPrec(); + if (prec == 0 or prec < min_prec) break; + + const op = self.binaryOp() orelse break; + self.advance(); + + const rhs = try self.parseBinary(prec + 1); + + // Chained comparison detection: if op is a comparison and the next + // token is also a comparison at the same precedence, accumulate + // into a ChainedComparison node. + if (isComparisonOp(op) and self.binaryPrec() == prec and self.isComparisonToken()) { + var operands = std.ArrayList(*Node).empty; + var ops = std.ArrayList(ast.BinaryOp.Op).empty; + try operands.append(self.allocator, lhs); + try operands.append(self.allocator, rhs); + try ops.append(self.allocator, op); + + while (self.binaryPrec() == prec and self.isComparisonToken()) { + const chain_op = self.binaryOp() orelse break; + self.advance(); + const chain_rhs = try self.parseBinary(prec + 1); + try operands.append(self.allocator, chain_rhs); + try ops.append(self.allocator, chain_op); + } + + lhs = try self.createNode(lhs.span.start, .{ .chained_comparison = .{ + .operands = try operands.toOwnedSlice(self.allocator), + .ops = try ops.toOwnedSlice(self.allocator), + } }); + } else { + lhs = try self.createNode(lhs.span.start, .{ .binary_op = .{ .op = op, .lhs = lhs, .rhs = rhs } }); + } + } + + return lhs; + } + + fn parseUnary(self: *Parser) anyerror!*Node { + if (self.current.tag == .minus) { + const start = self.current.loc.start; + self.advance(); + const operand = try self.parseUnary(); + return try self.createNode(start, .{ .unary_op = .{ .op = .negate, .operand = operand } }); + } + if (self.current.tag == .bang) { + const start = self.current.loc.start; + self.advance(); + const operand = try self.parseUnary(); + return try self.createNode(start, .{ .unary_op = .{ .op = .not, .operand = operand } }); + } + if (self.current.tag == .kw_xx) { + const start = self.current.loc.start; + self.advance(); + const operand = try self.parseUnary(); + return try self.createNode(start, .{ .unary_op = .{ .op = .xx, .operand = operand } }); + } + // cast(Type) expr — prefix operator with type parameter + if (self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), "cast")) { + const saved_lexer = self.lexer; + const next_tok = self.lexer.next(); + self.lexer = saved_lexer; + if (next_tok.tag == .l_paren) { + const start = self.current.loc.start; + self.advance(); // consume 'cast' + self.advance(); // consume '(' + const type_arg = try self.parseExpr(); + try self.expect(.r_paren); + const operand = try self.parseUnary(); + const callee = try self.createNode(start, .{ .identifier = .{ .name = "cast" } }); + const args = try self.allocator.alloc(*Node, 2); + args[0] = type_arg; + args[1] = operand; + return try self.createNode(start, .{ .call = .{ .callee = callee, .args = args } }); + } + } + return self.parsePostfix(); + } + + fn parsePostfix(self: *Parser) anyerror!*Node { + var expr = try self.parsePrimary(); + + while (true) { + if (self.current.tag == .l_paren) { + // Call + self.advance(); + var args = std.ArrayList(*Node).empty; + while (self.current.tag != .r_paren and self.current.tag != .eof) { + if (args.items.len > 0) { + try self.expect(.comma); + } + // Spread operator: ..expr + if (self.current.tag == .dot_dot) { + const spread_start = self.current.loc.start; + self.advance(); + const operand = try self.parseExpr(); + try args.append(self.allocator, try self.createNode(spread_start, .{ .spread_expr = .{ .operand = operand } })); + } else { + try args.append(self.allocator, try self.parseExpr()); + } + } + try self.expect(.r_paren); + expr = try self.createNode(expr.span.start, .{ .call = .{ .callee = expr, .args = try args.toOwnedSlice(self.allocator) } }); + } else if (self.current.tag == .dot) { + self.advance(); + if (self.current.tag == .l_brace) { + // Struct literal: Type.{ ... } + if (expr.data == .identifier) { + // Simple name: Vec4.{ ... } + expr = try self.parseStructLiteral(expr.data.identifier.name, null, expr.span.start); + } else if (expr.data == .field_access) { + // Qualified name: std.Vec4.{ ... } + const qname = try self.reconstructQualifiedName(expr); + expr = try self.parseStructLiteral(qname, null, expr.span.start); + } else { + // Expression type: Vec(3, f32).{ ... } + expr = try self.parseStructLiteral(null, expr, expr.span.start); + } + } else { + // Field access + if (self.current.tag != .identifier) { + return self.fail("expected field name after '.'"); + } + const field = self.tokenSlice(self.current); + self.advance(); + expr = try self.createNode(expr.span.start, .{ .field_access = .{ .object = expr, .field = field } }); + } + } else if (self.current.tag == .l_bracket) { + // Index access: expr[expr] + self.advance(); + const index = try self.parseExpr(); + try self.expect(.r_bracket); + expr = try self.createNode(expr.span.start, .{ .index_expr = .{ .object = expr, .index = index } }); + } else { + break; + } + } + + return expr; + } + + fn parsePrimary(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + switch (self.current.tag) { + .int_literal => { + const text = self.tokenSlice(self.current); + const base: u8 = if (text.len > 2 and text[0] == '0' and (text[1] == 'x' or text[1] == 'X')) + 16 + else if (text.len > 2 and text[0] == '0' and (text[1] == 'b' or text[1] == 'B')) + 2 + else + 10; + const digits = if (base != 10) text[2..] else text; + const value = std.fmt.parseInt(i64, digits, base) catch { + return self.fail("integer literal overflow"); + }; + self.advance(); + return try self.createNode(start, .{ .int_literal = .{ .value = value } }); + }, + .float_literal => { + const text = self.tokenSlice(self.current); + const value = std.fmt.parseFloat(f64, text) catch { + return self.fail("float literal overflow"); + }; + self.advance(); + return try self.createNode(start, .{ .float_literal = .{ .value = value } }); + }, + .string_literal => { + // raw includes quotes + const raw = self.tokenSlice(self.current); + self.advance(); + return try self.createNode(start, .{ .string_literal = .{ .raw = raw[1 .. raw.len - 1] } }); + }, + .kw_true => { + self.advance(); + return try self.createNode(start, .{ .bool_literal = .{ .value = true } }); + }, + .kw_false => { + self.advance(); + return try self.createNode(start, .{ .bool_literal = .{ .value = false } }); + }, + .identifier => { + const name = self.tokenSlice(self.current); + // Check if this identifier is a type name (e.g. s32, u8, s128) + if (Type.fromName(name) != null) { + self.advance(); + return try self.createNode(start, .{ .type_expr = .{ .name = name } }); + } + self.advance(); + return try self.createNode(start, .{ .identifier = .{ .name = name } }); + }, + .dot => { + self.advance(); + // Anonymous struct literal: .{ ... } + if (self.current.tag == .l_brace) { + return self.parseStructLiteral(null, null, start); + } + // Array literal: .[expr, expr, ...] + if (self.current.tag == .l_bracket) { + self.advance(); // skip '[' + var elements = std.ArrayList(*Node).empty; + while (self.current.tag != .r_bracket and self.current.tag != .eof) { + if (elements.items.len > 0) { + try self.expect(.comma); + } + const elem = try self.parseExpr(); + try elements.append(self.allocator, elem); + } + try self.expect(.r_bracket); + return try self.createNode(start, .{ .array_literal = .{ .elements = try elements.toOwnedSlice(self.allocator) } }); + } + // Enum literal: .variant_name + if (self.current.tag != .identifier) { + return self.fail("expected variant name, '{', or '[' after '.'"); + } + const name = self.tokenSlice(self.current); + self.advance(); + // Union literal: .variant(payload) + if (self.current.tag == .l_paren) { + self.advance(); // skip '(' + const payload = try self.parseExpr(); + try self.expect(.r_paren); + return try self.createNode(start, .{ .union_literal = .{ + .union_name = null, + .variant_name = name, + .payload = payload, + } }); + } + return try self.createNode(start, .{ .enum_literal = .{ .name = name } }); + }, + .l_paren => { + // Lambda: (params) => expr + if (self.isLambda()) { + return self.parseLambda(); + } + // Grouped expression + self.advance(); + const expr = try self.parseExpr(); + try self.expect(.r_paren); + return expr; + }, + .kw_f32, .kw_f64, .kw_Type => { + // Type keyword used as expression (for type aliases: SOME_TYPE :: f64;) + const name = self.tokenSlice(self.current); + self.advance(); + return try self.createNode(start, .{ .type_expr = .{ .name = name } }); + }, + .kw_struct => { + // Anonymous struct expression: struct { value: T; count: u32; } + return try self.parseStructDecl("__anon", start); + }, + .kw_union => { + // Anonymous union expression: union { variant: T; other: u32; } + return try self.parseUnionDecl("__anon", start); + }, + .kw_if => { + return self.parseIfExpr(); + }, + .kw_while => { + return self.parseWhileExpr(); + }, + .kw_for => { + return self.parseForExpr(); + }, + .kw_break => { + self.advance(); + return try self.createNode(start, .{ .break_expr = {} }); + }, + .kw_continue => { + self.advance(); + return try self.createNode(start, .{ .continue_expr = {} }); + }, + .kw_return => { + self.advance(); + // return with optional value + const value = if (self.current.tag != .semicolon and self.current.tag != .eof) + try self.parseExpr() + else + null; + return try self.createNode(start, .{ .return_stmt = .{ .value = value } }); + }, + .l_brace => { + return self.parseBlock(); + }, + .triple_minus => { + self.advance(); + return try self.createNode(start, .{ .undef_literal = {} }); + }, + .hash_run => { + self.advance(); // skip '#run' + const inner = try self.parseExpr(); + return try self.createNode(start, .{ .comptime_expr = .{ .expr = inner } }); + }, + else => { + return self.fail("unexpected token in expression"); + }, + } + } + + fn parseIfExpr(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + self.advance(); // skip 'if' + + // Parse condition at prec 5 (arithmetic+), leaving all comparisons + // unconsumed for manual handling with match disambiguation. + var condition = try self.parseBinary(5); + + // Handle comparisons with chain detection and match disambiguation. + // All comparisons (< <= > >= == !=) are at the same precedence. + if (self.isComparisonToken()) { + var operands = std.ArrayList(*Node).empty; + var ops = std.ArrayList(ast.BinaryOp.Op).empty; + try operands.append(self.allocator, condition); + + while (self.isComparisonToken()) { + // Match disambiguation: == followed by { is a match expression + if (self.current.tag == .equal_equal) { + self.advance(); + if (self.current.tag == .l_brace) { + // Match expression: if expr == { case ... } + // Only valid as the first comparison (no chain before it) + if (ops.items.len == 0) { + return self.parseMatchBody(condition, start); + } + // Chain followed by == { is an error — fall through to + // regular comparison (will likely fail at parse time) + } + const rhs = try self.parseBinary(5); + try operands.append(self.allocator, rhs); + try ops.append(self.allocator, .eq); + } else { + const cmp_op = self.binaryOp() orelse break; + self.advance(); + const rhs = try self.parseBinary(5); + try operands.append(self.allocator, rhs); + try ops.append(self.allocator, cmp_op); + } + } + + if (ops.items.len == 1) { + // Single comparison — regular binary_op + condition = try self.createNode(condition.span.start, .{ .binary_op = .{ + .op = ops.items[0], + .lhs = operands.items[0], + .rhs = operands.items[1], + } }); + } else { + // Chained comparison + condition = try self.createNode(condition.span.start, .{ .chained_comparison = .{ + .operands = try operands.toOwnedSlice(self.allocator), + .ops = try ops.toOwnedSlice(self.allocator), + } }); + } + } + + // Handle and/or with proper Pratt precedence + condition = try self.parseBinaryRhs(condition, 1); + + // Inline form: if cond then expr [else expr] + if (self.current.tag == .kw_then) { + self.advance(); + const then_branch = try self.parseExpr(); + var else_branch: ?*Node = null; + if (self.current.tag == .kw_else) { + self.advance(); + else_branch = try self.parseExpr(); + } + return try self.createNode(start, .{ .if_expr = .{ + .condition = condition, + .then_branch = then_branch, + .else_branch = else_branch, + .is_inline = true, + } }); + } + + // Block form: if cond { ... } else { ... } + const then_branch = try self.parseBlock(); + var else_branch: ?*Node = null; + if (self.current.tag == .kw_else) { + self.advance(); + if (self.current.tag == .kw_if) { + else_branch = try self.parseIfExpr(); + } else { + else_branch = try self.parseBlock(); + } + } + return try self.createNode(start, .{ .if_expr = .{ + .condition = condition, + .then_branch = then_branch, + .else_branch = else_branch, + .is_inline = false, + } }); + } + + fn parseWhileExpr(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + self.advance(); // skip 'while' + + const condition = try self.parseExpr(); + const body = try self.parseBlock(); + + return try self.createNode(start, .{ .while_expr = .{ + .condition = condition, + .body = body, + } }); + } + + fn parseForExpr(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + self.advance(); // skip 'for' + + const iterable = try self.parseExpr(); + const body = try self.parseBlock(); + + return try self.createNode(start, .{ .for_expr = .{ + .iterable = iterable, + .body = body, + } }); + } + + fn parseMatchBody(self: *Parser, subject: *Node, start_pos: u32) anyerror!*Node { + try self.expect(.l_brace); + var arms = std.ArrayList(ast.MatchArm).empty; + while (self.current.tag == .kw_case) { + const arm_start = self.current.loc.start; + self.advance(); // skip 'case' + // Allow keyword tokens (struct, enum, union) as type category names in match arms + const pattern: *Node = if (self.current.tag == .kw_struct or self.current.tag == .kw_enum or self.current.tag == .kw_union) blk: { + const name = self.tokenSlice(self.current); + self.advance(); + break :blk try self.createNode(arm_start, .{ .identifier = .{ .name = name } }); + } else try self.parsePrimary(); // .variant + try self.expect(.colon); + + if (self.current.tag == .kw_break) { + self.advance(); + try self.expect(.semicolon); + const body = try self.createNode(arm_start, .{ .block = .{ .stmts = &.{} } }); + try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = true }); + } else { + const stmts_start = self.current.loc.start; + var stmts = std.ArrayList(*Node).empty; + while (self.current.tag != .kw_case and self.current.tag != .kw_else and self.current.tag != .r_brace and self.current.tag != .eof) { + try stmts.append(self.allocator, try self.parseStmt()); + } + const body = try self.createNode(stmts_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } }); + try arms.append(self.allocator, .{ .pattern = pattern, .body = body, .is_break = false }); + } + } + // Optional else arm (default) + if (self.current.tag == .kw_else) { + const else_start = self.current.loc.start; + self.advance(); // skip 'else' + try self.expect(.colon); + var stmts = std.ArrayList(*Node).empty; + while (self.current.tag != .r_brace and self.current.tag != .eof) { + try stmts.append(self.allocator, try self.parseStmt()); + } + const body = try self.createNode(else_start, .{ .block = .{ .stmts = try stmts.toOwnedSlice(self.allocator) } }); + try arms.append(self.allocator, .{ .pattern = null, .body = body, .is_break = false }); + } + try self.expect(.r_brace); + return try self.createNode(start_pos, .{ .match_expr = .{ .subject = subject, .arms = try arms.toOwnedSlice(self.allocator) } }); + } + + fn isLambda(self: *Parser) bool { + // Peek ahead: save state, scan to matching ), check if => or -> ... => follows + const saved_lexer = self.lexer; + const saved_current = self.current; + const saved_prev_end = self.prev_end; + defer { + self.lexer = saved_lexer; + self.current = saved_current; + self.prev_end = saved_prev_end; + } + + self.advance(); // skip '(' + var depth: u32 = 1; + while (depth > 0 and self.current.tag != .eof) { + if (self.current.tag == .l_paren) depth += 1; + if (self.current.tag == .r_paren) depth -= 1; + if (depth > 0) self.advance(); + } + if (self.current.tag == .r_paren) { + self.advance(); // skip ')' + if (self.current.tag == .fat_arrow) return true; + // (params) -> ReturnType => expr + if (self.current.tag == .arrow) { + self.advance(); // skip '->' + // Skip past the return type tokens until we see '=>' or something unexpected + while (self.current.tag != .eof) { + if (self.current.tag == .fat_arrow) return true; + // Return type tokens: identifiers, dots, parens, type keywords, dollar, brackets + if (self.current.tag == .identifier or self.current.tag.isTypeKeyword() or + self.current.tag == .dot or self.current.tag == .dollar or + self.current.tag == .l_bracket or self.current.tag == .r_bracket or + self.current.tag == .l_paren or self.current.tag == .r_paren or + self.current.tag == .comma or self.current.tag == .int_literal) + { + self.advance(); + } else break; + } + } + } + return false; + } + + fn parseLambda(self: *Parser) anyerror!*Node { + const start = self.current.loc.start; + const params = try self.parseParams(); + + // Optional return type: (params) -> Type => expr + var return_type: ?*Node = null; + if (self.current.tag == .arrow) { + self.advance(); + return_type = try self.parseTypeExpr(); + } + + try self.expect(.fat_arrow); + const body = try self.parseExpr(); + const type_params = try self.collectTypeParams(params); + return try self.createNode(start, .{ .lambda = .{ + .params = params, + .return_type = return_type, + .body = body, + .type_params = type_params, + } }); + } + + // ---- Helpers ---- + + fn isFunctionDef(self: *Parser) bool { + // Peek ahead: save state, scan to matching ), check what follows + const saved_lexer = self.lexer; + const saved_current = self.current; + const saved_prev_end = self.prev_end; + defer { + self.lexer = saved_lexer; + self.current = saved_current; + self.prev_end = saved_prev_end; + } + + self.advance(); // skip '(' + var depth: u32 = 1; + while (depth > 0 and self.current.tag != .eof) { + if (self.current.tag == .l_paren) depth += 1; + if (self.current.tag == .r_paren) depth -= 1; + if (depth > 0) self.advance(); + } + if (self.current.tag == .r_paren) { + self.advance(); // skip ')' + // Function if followed by '{', '->', '#builtin', or '=>' + return self.current.tag == .l_brace or self.current.tag == .arrow or self.current.tag == .hash_builtin or self.current.tag == .fat_arrow; + } + return false; + } + + fn isAssignOp(self: *const Parser) bool { + return switch (self.current.tag) { + .equal, .plus_equal, .minus_equal, .star_equal, .slash_equal, .percent_equal => true, + else => false, + }; + } + + fn assignOp(self: *const Parser) ast.Assignment.Op { + return switch (self.current.tag) { + .equal => .assign, + .plus_equal => .add_assign, + .minus_equal => .sub_assign, + .star_equal => .mul_assign, + .slash_equal => .div_assign, + .percent_equal => .mod_assign, + else => unreachable, + }; + } + + fn binaryPrec(self: *const Parser) u8 { + return switch (self.current.tag) { + .kw_or => 1, + .kw_and => 2, + .equal_equal, .bang_equal, .less, .less_equal, .greater, .greater_equal => 4, + .plus, .minus => 5, + .star, .slash, .percent => 6, + else => 0, + }; + } + + fn binaryOp(self: *const Parser) ?ast.BinaryOp.Op { + return switch (self.current.tag) { + .kw_and => .and_op, + .kw_or => .or_op, + .plus => .add, + .minus => .sub, + .star => .mul, + .slash => .div, + .percent => .mod, + .equal_equal => .eq, + .bang_equal => .neq, + .less => .lt, + .less_equal => .lte, + .greater => .gt, + .greater_equal => .gte, + else => null, + }; + } + + fn isComparisonOp(op: ast.BinaryOp.Op) bool { + return switch (op) { + .lt, .lte, .gt, .gte, .eq, .neq => true, + else => false, + }; + } + + fn isComparisonToken(self: *const Parser) bool { + return switch (self.current.tag) { + .less, .less_equal, .greater, .greater_equal, .equal_equal, .bang_equal => true, + else => false, + }; + } + + fn advance(self: *Parser) void { + self.prev_end = self.current.loc.end; + self.current = self.lexer.next(); + } + + fn expect(self: *Parser, tag: Tag) !void { + if (self.current.tag != tag) { + const expected = tag.lexeme() orelse @tagName(tag); + return self.failFmt("expected '{s}'", .{expected}); + } + self.advance(); + } + + fn failFmt(self: *Parser, comptime fmt: []const u8, args: anytype) error{ParseError} { + const msg = std.fmt.allocPrint(self.allocator, fmt, args) catch return error.ParseError; + return self.fail(msg); + } + + fn tokenSlice(self: *const Parser, token: Token) []const u8 { + return self.source[token.loc.start..token.loc.end]; + } + + fn fail(self: *Parser, msg: []const u8) error{ParseError} { + self.err_msg = msg; + self.err_offset = self.current.loc.start; + if (self.diagnostics) |diags| { + diags.add(.err, msg, .{ .start = self.current.loc.start, .end = self.current.loc.end }); + } + return error.ParseError; + } +}; + +test "parse minimal main" { + const source = "main :: () { 42; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expect(root.data == .root); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .fn_decl); + try std.testing.expectEqualStrings("main", decl.data.fn_decl.name); + const body = decl.data.fn_decl.body; + try std.testing.expect(body.data == .block); + try std.testing.expectEqual(@as(usize, 1), body.data.block.stmts.len); + try std.testing.expect(body.data.block.stmts[0].data == .int_literal); + try std.testing.expectEqual(@as(i64, 42), body.data.block.stmts[0].data.int_literal.value); +} + +test "parse #run const binding" { + const source = "x :: #run compute(5);"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .const_decl); + try std.testing.expectEqualStrings("x", decl.data.const_decl.name); + try std.testing.expect(decl.data.const_decl.value.data == .comptime_expr); + // inner expr is a call + try std.testing.expect(decl.data.const_decl.value.data.comptime_expr.expr.data == .call); +} + +test "parse top-level #run" { + const source = "#run main();"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .comptime_expr); + // inner expr is a call + try std.testing.expect(decl.data.comptime_expr.expr.data == .call); +} + +test "parse flat import" { + const source = "#import \"modules/std/math.sx\";"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .import_decl); + try std.testing.expectEqualStrings("modules/std/math.sx", decl.data.import_decl.path); + try std.testing.expect(decl.data.import_decl.name == null); +} + +test "parse namespaced import" { + const source = "std :: #import \"modules/std/std.sx\";"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .import_decl); + try std.testing.expectEqualStrings("modules/std/std.sx", decl.data.import_decl.path); + try std.testing.expectEqualStrings("std", decl.data.import_decl.name.?); +} + +test "parse void function with builtin body" { + const source = "foo :: () #builtin;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .fn_decl); + try std.testing.expectEqualStrings("foo", decl.data.fn_decl.name); + try std.testing.expect(decl.data.fn_decl.body.data == .builtin_expr); +} + +test "parse void function with arrow body" { + const source = "foo :: () => 42;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + try std.testing.expectEqual(@as(usize, 1), root.data.root.decls.len); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .fn_decl); + try std.testing.expectEqualStrings("foo", decl.data.fn_decl.name); + try std.testing.expect(decl.data.fn_decl.body.data == .int_literal); + try std.testing.expectEqual(@as(i64, 42), decl.data.fn_decl.body.data.int_literal.value); +} + +test "parse hex and binary literals" { + const source = "main :: () { 0xFF; 0b1010; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + const body = root.data.root.decls[0].data.fn_decl.body; + try std.testing.expectEqual(@as(usize, 2), body.data.block.stmts.len); + try std.testing.expectEqual(@as(i64, 255), body.data.block.stmts[0].data.int_literal.value); + try std.testing.expectEqual(@as(i64, 10), body.data.block.stmts[1].data.int_literal.value); +} + +test "parse array type with identifier length" { + const source = "foo :: (arr: [N]f32) => arr;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .fn_decl); + const param_type = decl.data.fn_decl.params[0].type_expr; + try std.testing.expect(param_type.data == .array_type_expr); + // length is an identifier "N", not an int literal + try std.testing.expect(param_type.data.array_type_expr.length.data == .identifier); + try std.testing.expectEqualStrings("N", param_type.data.array_type_expr.length.data.identifier.name); + try std.testing.expect(param_type.data.array_type_expr.element_type.data == .type_expr); +} + +test "parse lambda with generic params" { + const source = "f :: (x: $T) => x;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .const_decl); + const lambda = decl.data.const_decl.value; + try std.testing.expect(lambda.data == .lambda); + try std.testing.expectEqual(@as(usize, 1), lambda.data.lambda.params.len); + try std.testing.expectEqualStrings("x", lambda.data.lambda.params[0].name); + // has generic type param + try std.testing.expectEqual(@as(usize, 1), lambda.data.lambda.type_params.len); + try std.testing.expectEqualStrings("T", lambda.data.lambda.type_params[0].name); +} + +test "parse lambda with return type" { + const source = "f :: (x: s32) -> s32 => x;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + const decl = root.data.root.decls[0]; + try std.testing.expect(decl.data == .const_decl); + const lambda = decl.data.const_decl.value; + try std.testing.expect(lambda.data == .lambda); + try std.testing.expect(lambda.data.lambda.return_type != null); + try std.testing.expect(lambda.data.lambda.return_type.?.data == .type_expr); + try std.testing.expectEqualStrings("s32", lambda.data.lambda.return_type.?.data.type_expr.name); +} + +test "parse match with else arm" { + const source = + \\main :: () { + \\ x := 5; + \\ if x == { + \\ case 1: 10; + \\ else: 99; + \\ }; + \\} + ; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const root = try parser.parse(); + const body = root.data.root.decls[0].data.fn_decl.body; + // second stmt is the match expr (after var decl) + const match_node = body.data.block.stmts[1]; + try std.testing.expect(match_node.data == .match_expr); + const arms = match_node.data.match_expr.arms; + try std.testing.expectEqual(@as(usize, 2), arms.len); + // first arm has a pattern + try std.testing.expect(arms[0].pattern != null); + // second arm is the else arm (null pattern) + try std.testing.expect(arms[1].pattern == null); +} + +test "integer literal overflow error" { + const source = "main :: () { 99999999999999999999; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var parser = Parser.init(arena.allocator(), source); + const result = parser.parse(); + try std.testing.expectError(error.ParseError, result); + try std.testing.expectEqualStrings("integer literal overflow", parser.err_msg.?); +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..96d64ea --- /dev/null +++ b/src/root.zig @@ -0,0 +1,19 @@ +pub const llvm_api = @import("llvm_api.zig"); +pub const token = @import("token.zig"); +pub const lexer = @import("lexer.zig"); +pub const ast = @import("ast.zig"); +pub const parser = @import("parser.zig"); +pub const types = @import("types.zig"); +pub const codegen = @import("codegen.zig"); +pub const builtins = @import("builtins.zig"); +pub const errors = @import("errors.zig"); +pub const sema = @import("sema.zig"); +pub const imports = @import("imports.zig"); +pub const core = @import("core.zig"); + +pub const lsp = struct { + pub const server = @import("lsp/server.zig"); + pub const transport = @import("lsp/transport.zig"); + pub const types = @import("lsp/types.zig"); + pub const document = @import("lsp/document.zig"); +}; diff --git a/src/sema.zig b/src/sema.zig new file mode 100644 index 0000000..72ee22d --- /dev/null +++ b/src/sema.zig @@ -0,0 +1,1006 @@ +const std = @import("std"); +const ast = @import("ast.zig"); +const Node = ast.Node; +const Span = ast.Span; +const Type = @import("types.zig").Type; +const errors = @import("errors.zig"); +const Diagnostic = errors.Diagnostic; + +pub const SymbolKind = enum { + variable, + constant, + function, + enum_type, + struct_type, + type_alias, + param, + namespace, +}; + +pub const Symbol = struct { + name: []const u8, + kind: SymbolKind, + ty: ?Type, + def_span: Span, + scope_depth: u32, +}; + +pub const Reference = struct { + span: Span, + symbol_index: u32, +}; + +pub const FnSignature = struct { + param_types: []const Type, + return_type: Type, + is_variadic: bool = false, +}; + +pub const StructTypeInfo = struct { + field_names: []const []const u8, + field_types: []const Type, +}; + +pub const TypeMap = std.AutoHashMap(*const Node, Type); + +pub const SemaResult = struct { + symbols: []const Symbol, + references: []const Reference, + diagnostics: []const Diagnostic, + fn_signatures: std.StringHashMap(FnSignature), + struct_types: std.StringHashMap(StructTypeInfo), + enum_types: std.StringHashMap([]const []const u8), + type_aliases: std.StringHashMap([]const u8), + type_map: TypeMap, +}; + +pub const Analyzer = struct { + allocator: std.mem.Allocator, + symbols: std.ArrayList(Symbol), + references: std.ArrayList(Reference), + diagnostics: std.ArrayList(Diagnostic), + scope_depth: u32, + /// Stack of symbol counts at each scope entry, for popScope cleanup. + scope_starts: std.ArrayList(u32), + // Type registries + fn_signatures: std.StringHashMap(FnSignature), + struct_types: std.StringHashMap(StructTypeInfo), + enum_types: std.StringHashMap([]const []const u8), + type_aliases: std.StringHashMap([]const u8), + type_map: TypeMap, + + pub fn init(allocator: std.mem.Allocator) Analyzer { + return .{ + .allocator = allocator, + .symbols = std.ArrayList(Symbol).empty, + .references = std.ArrayList(Reference).empty, + .diagnostics = std.ArrayList(Diagnostic).empty, + .scope_depth = 0, + .scope_starts = std.ArrayList(u32).empty, + .fn_signatures = std.StringHashMap(FnSignature).init(allocator), + .struct_types = std.StringHashMap(StructTypeInfo).init(allocator), + .enum_types = std.StringHashMap([]const []const u8).init(allocator), + .type_aliases = std.StringHashMap([]const u8).init(allocator), + .type_map = TypeMap.init(allocator), + }; + } + + pub fn analyze(self: *Analyzer, root: *Node) !SemaResult { + if (root.data != .root) return error.InvalidRoot; + + // Pass 1: Register all top-level declarations so forward references work. + for (root.data.root.decls) |decl| { + try self.registerTopLevelDecl(decl); + } + + // Pass 2: Analyze bodies (all top-level names are now in scope). + for (root.data.root.decls) |decl| { + try self.analyzeTopLevelDecl(decl); + } + + return .{ + .symbols = try self.symbols.toOwnedSlice(self.allocator), + .references = try self.references.toOwnedSlice(self.allocator), + .diagnostics = try self.diagnostics.toOwnedSlice(self.allocator), + .fn_signatures = self.fn_signatures, + .struct_types = self.struct_types, + .enum_types = self.enum_types, + .type_aliases = self.type_aliases, + .type_map = self.type_map, + }; + } + + /// Pass 1: register the name/kind/type of a top-level declaration without + /// analysing its body or value expression. + fn registerTopLevelDecl(self: *Analyzer, node: *Node) !void { + try self.registerTopLevelDeclPrefixed(node, null); + } + + fn registerTopLevelDeclPrefixed(self: *Analyzer, node: *Node, ns_prefix: ?[]const u8) !void { + switch (node.data) { + .fn_decl => |fd| { + const ret_ty = resolveReturnType(fd); + try self.addSymbol(fd.name, .function, ret_ty, node.span); + // Populate fn_signatures registry + var param_types = std.ArrayList(Type).empty; + var has_variadic = false; + for (fd.params) |param| { + const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(32); + if (param.is_variadic) { + has_variadic = true; + // Variadic param becomes a slice type + const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32"; + try param_types.append(self.allocator, .{ .slice_type = .{ .element_name = elem_name } }); + } else { + try param_types.append(self.allocator, pt); + } + } + const key = if (ns_prefix) |pfx| + try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, fd.name }) + else + fd.name; + try self.fn_signatures.put(key, .{ + .param_types = try param_types.toOwnedSlice(self.allocator), + .return_type = ret_ty orelse .void_type, + .is_variadic = has_variadic, + }); + }, + .const_decl => |cd| { + const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value); + const kind = classifyConstDecl(cd); + try self.addSymbol(cd.name, kind, ty, node.span); + // Populate type_aliases registry + if (cd.value.data == .type_expr) { + try self.type_aliases.put(cd.name, cd.value.data.type_expr.name); + } + // Lambda as function + if (cd.value.data == .lambda) { + const lam = cd.value.data.lambda; + var param_types = std.ArrayList(Type).empty; + for (lam.params) |param| { + const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(32); + try param_types.append(self.allocator, pt); + } + const ret = if (lam.return_type) |rt| Type.fromTypeExpr(rt) orelse .void_type else .void_type; + const key = if (ns_prefix) |pfx| + try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, cd.name }) + else + cd.name; + try self.fn_signatures.put(key, .{ + .param_types = try param_types.toOwnedSlice(self.allocator), + .return_type = ret, + }); + } + }, + .var_decl => |vd| { + const ty = resolveTypeAnnotation(vd.type_annotation); + try self.addSymbol(vd.name, .variable, ty, node.span); + }, + .enum_decl => |ed| { + try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span); + try self.enum_types.put(ed.name, ed.variants); + }, + .struct_decl => |sd| { + try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span); + // Populate struct_types registry + var field_types = std.ArrayList(Type).empty; + for (sd.field_types) |ft| { + const resolved = Type.fromTypeExpr(ft) orelse Type.s(32); + try field_types.append(self.allocator, resolved); + } + try self.struct_types.put(sd.name, .{ + .field_names = sd.field_names, + .field_types = try field_types.toOwnedSlice(self.allocator), + }); + }, + .union_decl => |ud| { + try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span); + }, + .namespace_decl => |ns| { + try self.addSymbol(ns.name, .namespace, null, node.span); + // Recurse into namespace decls with qualified prefix (in own scope + // so inner names don't collide with flat imports of the same names) + try self.pushScope(); + for (ns.decls) |d| { + try self.registerTopLevelDeclPrefixed(d, ns.name); + } + self.popScope(); + }, + else => {}, + } + } + + /// Resolve a type annotation node to a Type. + /// Handles primitives, type_expr, array_type_expr, parameterized_type_expr, + /// type aliases, enum types, and struct types. + pub fn resolveTypeNode(self: *Analyzer, type_node: ?*Node) Type { + if (type_node) |tn| { + if (Type.fromTypeExpr(tn)) |t| return t; + // Array type: [N]T + if (tn.data == .array_type_expr) { + const ate = tn.data.array_type_expr; + const length: u32 = @intCast(ate.length.data.int_literal.value); + const elem_type = self.resolveTypeNode(ate.element_type); + const elem_name = elem_type.displayName(self.allocator) catch return .void_type; + return .{ .array_type = .{ .element_name = elem_name, .length = length } }; + } + // Parameterized type: Vector(N, T) or generic struct + if (tn.data == .parameterized_type_expr) { + // For now, skip generic instantiation — just return void_type + // (will be extended when generic support is added to sema) + return .void_type; + } + // type_expr or identifier — check aliases, enums, structs + if (tn.data == .type_expr or tn.data == .identifier) { + const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name; + if (Type.fromName(name)) |t| return t; + if (self.type_aliases.get(name)) |target| { + if (Type.fromName(target)) |t| return t; + if (self.struct_types.contains(target)) return .{ .struct_type = target }; + } + if (self.enum_types.contains(name)) return .{ .enum_type = name }; + if (self.struct_types.contains(name)) return .{ .struct_type = name }; + } + return .void_type; + } + return .void_type; + } + + /// Infer the type of an expression node without LLVM. + /// Uses fn_signatures for call return types, struct_types for field access, + /// symbols for identifier types, and Type.widen for arithmetic promotion. + pub fn inferExprType(self: *Analyzer, node: *const Node) Type { + return switch (node.data) { + .int_literal => Type.s(32), + .float_literal => .f32, + .bool_literal => .boolean, + .string_literal => .string_type, + .insert_expr => .void_type, + .comptime_expr => |ct| self.inferExprType(ct.expr), + .binary_op => |binop| { + switch (binop.op) { + .eq, .neq, .lt, .lte, .gt, .gte, .and_op, .or_op => return .boolean, + else => { + const lhs_ty = self.inferExprType(binop.lhs); + const rhs_ty = self.inferExprType(binop.rhs); + return Type.widen(lhs_ty, rhs_ty); + }, + } + }, + .chained_comparison => .boolean, + .identifier => |ident| { + // Search symbols backwards for matching name at or above current scope + var i = self.symbols.items.len; + while (i > 0) { + i -= 1; + const sym = self.symbols.items[i]; + if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, ident.name)) { + return sym.ty orelse Type.s(32); + } + } + return Type.s(32); + }, + .if_expr => |ie| { + return self.inferExprType(ie.then_branch); + }, + .block => |blk| { + if (blk.stmts.len > 0) { + return self.inferExprType(blk.stmts[blk.stmts.len - 1]); + } + return .void_type; + }, + .match_expr => |me| { + for (me.arms) |arm| { + if (!arm.is_break) return self.inferExprType(arm.body); + } + return .void_type; + }, + .call => |call_node| { + const callee_name = self.resolveCalleeName(call_node) orelse return Type.s(32); + // Check fn_signatures registry + if (self.fn_signatures.get(callee_name)) |sig| { + return sig.return_type; + } + // Built-in: sqrt returns same type as argument + const base = if (std.mem.lastIndexOfScalar(u8, callee_name, '.')) |idx| callee_name[idx + 1 ..] else callee_name; + if (std.mem.eql(u8, base, "sqrt")) { + if (call_node.args.len > 0) return self.inferExprType(call_node.args[0]); + return .f32; + } + return Type.s(32); + }, + .unary_op => |unop| { + return self.inferExprType(unop.operand); + }, + .field_access => |fa| { + const obj_ty = self.inferExprType(fa.object); + if (obj_ty == .string_type) { + if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); + if (std.mem.eql(u8, fa.field, "ptr")) return .string_type; + } + if (obj_ty.isStruct()) { + if (self.struct_types.get(obj_ty.struct_type)) |info| { + for (info.field_names, 0..) |fname, idx| { + if (std.mem.eql(u8, fname, fa.field)) { + return info.field_types[idx]; + } + } + } + } + if (obj_ty.isArray()) { + return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(32); + } + return Type.s(32); + }, + .index_expr => |ie| { + const obj_ty = self.inferExprType(ie.object); + if (obj_ty == .string_type) return Type.s(32); + if (obj_ty.isArray()) { + return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(32); + } + return Type.s(32); + }, + .while_expr => .void_type, + .for_expr => .void_type, + .spread_expr => .void_type, + .break_expr => .void_type, + .continue_expr => .void_type, + .enum_literal => .{ .enum_type = "" }, + .union_literal => |ul| { + if (ul.union_name) |name| return .{ .union_type = name }; + return .void_type; + }, + .struct_literal => |sl| { + if (sl.struct_name) |name| { + if (self.struct_types.contains(name)) return .{ .struct_type = name }; + if (self.type_aliases.get(name)) |target| { + if (self.struct_types.contains(target)) return .{ .struct_type = target }; + } + } else if (sl.type_expr) |te| { + return self.inferExprType(te); + } + return .void_type; + }, + .array_literal => .void_type, + .type_expr => |te| .{ .meta_type = .{ .name = te.name } }, + else => .void_type, + }; + } + + /// Resolve the callee name from a call node (handles identifiers and field_access). + fn resolveCalleeName(self: *Analyzer, call_node: ast.Call) ?[]const u8 { + _ = self; + if (call_node.callee.data == .identifier) { + return call_node.callee.data.identifier.name; + } + if (call_node.callee.data == .field_access) { + const fa = call_node.callee.data.field_access; + if (fa.object.data == .identifier) { + // Return qualified name — caller will look up in fn_signatures + // We can't allocate here easily, so just return the field name + // and let the caller try both qualified and unqualified + return fa.field; + } + } + return null; + } + + /// Pass 2: analyse the body/value of a top-level declaration. + /// The symbol itself was already registered in Pass 1. + fn analyzeTopLevelDecl(self: *Analyzer, node: *Node) !void { + switch (node.data) { + .fn_decl => |fd| { + try self.pushScope(); + for (fd.params) |param| { + const param_type = Type.fromTypeExpr(param.type_expr); + try self.addSymbol(param.name, .param, param_type, param.name_span); + } + try self.analyzeNode(fd.body); + self.popScope(); + }, + .const_decl => |cd| { + try self.analyzeNode(cd.value); + }, + .var_decl => |vd| { + if (vd.value) |val| { + try self.analyzeNode(val); + } + }, + .enum_decl, .struct_decl, .union_decl, .array_type_expr, .array_literal, .parameterized_type_expr, .index_expr, .insert_expr => {}, + .namespace_decl => |ns| { + try self.pushScope(); + for (ns.decls) |d| { + try self.registerTopLevelDecl(d); + } + for (ns.decls) |d| { + try self.analyzeTopLevelDecl(d); + } + self.popScope(); + }, + else => { + try self.analyzeNode(node); + }, + } + } + + fn pushScope(self: *Analyzer) !void { + try self.scope_starts.append(self.allocator, @intCast(self.symbols.items.len)); + self.scope_depth += 1; + } + + fn popScope(self: *Analyzer) void { + if (self.scope_starts.items.len > 0) { + _ = self.scope_starts.pop(); + self.scope_depth -= 1; + } + } + + fn addSymbol(self: *Analyzer, name: []const u8, kind: SymbolKind, ty: ?Type, span: Span) !void { + // Check for duplicate only within the current scope window. + const scope_start: usize = if (self.scope_starts.items.len > 0) + self.scope_starts.items[self.scope_starts.items.len - 1] + else + 0; + for (self.symbols.items[scope_start..]) |sym| { + if (sym.scope_depth == self.scope_depth and std.mem.eql(u8, sym.name, name)) { + try self.diagnostics.append(self.allocator, .{ + .level = .warn, + .span = span, + .message = "duplicate declaration", + }); + break; + } + } + + try self.symbols.append(self.allocator, .{ + .name = name, + .kind = kind, + .ty = ty, + .def_span = span, + .scope_depth = self.scope_depth, + }); + } + + fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void { + // Search backwards to find the most recent declaration with this name + // that is at or above the current scope depth. + var i = self.symbols.items.len; + while (i > 0) { + i -= 1; + const sym = self.symbols.items[i]; + if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, name)) { + try self.references.append(self.allocator, .{ + .span = span, + .symbol_index = @intCast(i), + }); + return; + } + } + + // Built-in names that aren't declared in source + if (std.mem.eql(u8, name, "io")) return; + if (std.mem.eql(u8, name, "true") or std.mem.eql(u8, name, "false")) return; + if (std.mem.eql(u8, name, "cast")) return; + + try self.diagnostics.append(self.allocator, .{ + .level = .warn, + .span = span, + .message = "undefined variable", + }); + } + + fn analyzeNode(self: *Analyzer, node: *Node) !void { + switch (node.data) { + .fn_decl => |fd| { + try self.addSymbol(fd.name, .function, resolveReturnType(fd), node.span); + try self.pushScope(); + // Add params as symbols + for (fd.params) |param| { + const param_type = Type.fromTypeExpr(param.type_expr); + try self.addSymbol(param.name, .param, param_type, param.name_span); + } + try self.analyzeNode(fd.body); + self.popScope(); + }, + .block => |blk| { + try self.pushScope(); + for (blk.stmts) |stmt| { + try self.analyzeNode(stmt); + } + self.popScope(); + }, + .const_decl => |cd| { + // Analyze value first (so it can't reference itself) + try self.analyzeNode(cd.value); + const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value); + const kind = classifyConstDecl(cd); + try self.addSymbol(cd.name, kind, ty, node.span); + }, + .var_decl => |vd| { + if (vd.value) |val| { + try self.analyzeNode(val); + } + const ty = resolveTypeAnnotation(vd.type_annotation); + try self.addSymbol(vd.name, .variable, ty, node.span); + }, + .enum_decl => |ed| { + try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span); + }, + .struct_decl => |sd| { + try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span); + }, + .identifier => |id| { + try self.resolveIdentifier(id.name, node.span); + }, + .binary_op => |bop| { + try self.analyzeNode(bop.lhs); + try self.analyzeNode(bop.rhs); + }, + .chained_comparison => |cc| { + for (cc.operands) |operand| { + try self.analyzeNode(operand); + } + }, + .unary_op => |uop| { + try self.analyzeNode(uop.operand); + }, + .call => |call| { + try self.analyzeNode(call.callee); + for (call.args) |arg| { + try self.analyzeNode(arg); + } + }, + .field_access => |fa| { + try self.analyzeNode(fa.object); + }, + .if_expr => |ie| { + try self.analyzeNode(ie.condition); + try self.analyzeNode(ie.then_branch); + if (ie.else_branch) |eb| { + try self.analyzeNode(eb); + } + }, + .match_expr => |me| { + try self.analyzeNode(me.subject); + for (me.arms) |arm| { + try self.analyzeNode(arm.body); + } + }, + .while_expr => |we| { + try self.analyzeNode(we.condition); + try self.analyzeNode(we.body); + }, + .for_expr => |fe| { + try self.analyzeNode(fe.iterable); + try self.analyzeNode(fe.body); + }, + .spread_expr => |se| try self.analyzeNode(se.operand), + .break_expr, .continue_expr => {}, + .assignment => |asgn| { + try self.analyzeNode(asgn.target); + try self.analyzeNode(asgn.value); + }, + .return_stmt => |ret| { + if (ret.value) |val| { + try self.analyzeNode(val); + } + }, + .defer_stmt => |ds| { + try self.analyzeNode(ds.expr); + }, + .comptime_expr => |ct| { + try self.analyzeNode(ct.expr); + }, + .insert_expr => |ins| { + try self.analyzeNode(ins.expr); + }, + .lambda => |lam| { + try self.pushScope(); + for (lam.params) |param| { + const param_type = Type.fromTypeExpr(param.type_expr); + try self.addSymbol(param.name, .param, param_type, param.name_span); + } + try self.analyzeNode(lam.body); + self.popScope(); + }, + .struct_literal => |sl| { + if (sl.type_expr) |te| try self.analyzeNode(te); + for (sl.field_inits) |fi| { + try self.analyzeNode(fi.value); + } + }, + .union_decl => |ud| { + try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span); + }, + .union_literal => |ul| { + if (ul.payload) |p| { + try self.analyzeNode(p); + } + }, + // Leaf nodes — nothing to recurse into + .int_literal, + .float_literal, + .bool_literal, + .string_literal, + .enum_literal, + .type_expr, + .param, + .match_arm, + .undef_literal, + .builtin_expr, + .import_decl, + .array_type_expr, + .array_literal, + .parameterized_type_expr, + .index_expr, + => {}, + .namespace_decl => |ns| { + for (ns.decls) |d| { + try self.analyzeNode(d); + } + }, + .root => { + // Should not appear nested + }, + } + + // Populate TypeMap for expression nodes + switch (node.data) { + .int_literal, + .float_literal, + .bool_literal, + .string_literal, + .identifier, + .binary_op, + .chained_comparison, + .unary_op, + .call, + .field_access, + .if_expr, + .match_expr, + .block, + .comptime_expr, + .enum_literal, + .struct_literal, + .union_literal, + .array_literal, + .index_expr, + .type_expr, + .insert_expr, + .while_expr, + .for_expr, + .spread_expr, + .break_expr, + .continue_expr, + => { + const ty = self.inferExprType(node); + self.type_map.put(node, ty) catch {}; + }, + else => {}, + } + } + + fn resolveReturnType(fd: ast.FnDecl) ?Type { + if (fd.return_type) |rt| { + return Type.fromTypeExpr(rt); + } + return null; + } + + fn resolveTypeAnnotation(type_node: ?*Node) ?Type { + if (type_node) |tn| { + return Type.fromTypeExpr(tn); + } + return null; + } + + fn inferValueType(value: *Node) ?Type { + return switch (value.data) { + .int_literal => Type.s(32), + .float_literal => .f64, + .bool_literal => .boolean, + .string_literal => .string_type, + .type_expr => null, // type alias — no value type + .lambda => null, + .comptime_expr => null, + .insert_expr => null, + else => null, + }; + } + + fn classifyConstDecl(cd: ast.ConstDecl) SymbolKind { + return switch (cd.value.data) { + .type_expr => .type_alias, + .lambda => .function, + else => .constant, + }; + } +}; + +/// Convenience: parse and analyze in one call. +pub fn analyzeSource(allocator: std.mem.Allocator, root: *Node) !SemaResult { + var analyzer = Analyzer.init(allocator); + return analyzer.analyze(root); +} + +/// Find the symbol whose definition span contains the given byte offset. +pub fn findSymbolAtOffset(symbols: []const Symbol, offset: u32) ?usize { + for (symbols, 0..) |sym, i| { + if (offset >= sym.def_span.start and offset < sym.def_span.end) { + return i; + } + } + return null; +} + +/// Find the reference at the given byte offset. +pub fn findReferenceAtOffset(references: []const Reference, offset: u32) ?usize { + for (references, 0..) |ref_, i| { + if (offset >= ref_.span.start and offset < ref_.span.end) { + return i; + } + } + return null; +} + +/// Walk the AST to find the innermost node whose span contains the offset. +pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node { + if (offset < node.span.start or offset >= node.span.end) return null; + + // Try to find a more specific child node + switch (node.data) { + .root => |r| { + for (r.decls) |decl| { + if (findNodeAtOffset(decl, offset)) |found| return found; + } + }, + .fn_decl => |fd| { + if (fd.return_type) |rt| { + if (findNodeAtOffset(rt, offset)) |found| return found; + } + if (findNodeAtOffset(fd.body, offset)) |found| return found; + }, + .block => |blk| { + for (blk.stmts) |stmt| { + if (findNodeAtOffset(stmt, offset)) |found| return found; + } + }, + .const_decl => |cd| { + if (cd.type_annotation) |ta| { + if (findNodeAtOffset(ta, offset)) |found| return found; + } + if (findNodeAtOffset(cd.value, offset)) |found| return found; + }, + .var_decl => |vd| { + if (vd.type_annotation) |ta| { + if (findNodeAtOffset(ta, offset)) |found| return found; + } + if (vd.value) |val| { + if (findNodeAtOffset(val, offset)) |found| return found; + } + }, + .binary_op => |bop| { + if (findNodeAtOffset(bop.lhs, offset)) |found| return found; + if (findNodeAtOffset(bop.rhs, offset)) |found| return found; + }, + .chained_comparison => |cc| { + for (cc.operands) |operand| { + if (findNodeAtOffset(operand, offset)) |found| return found; + } + }, + .unary_op => |uop| { + if (findNodeAtOffset(uop.operand, offset)) |found| return found; + }, + .call => |call| { + if (findNodeAtOffset(call.callee, offset)) |found| return found; + for (call.args) |arg| { + if (findNodeAtOffset(arg, offset)) |found| return found; + } + }, + .field_access => |fa| { + if (findNodeAtOffset(fa.object, offset)) |found| return found; + }, + .if_expr => |ie| { + if (findNodeAtOffset(ie.condition, offset)) |found| return found; + if (findNodeAtOffset(ie.then_branch, offset)) |found| return found; + if (ie.else_branch) |eb| { + if (findNodeAtOffset(eb, offset)) |found| return found; + } + }, + .match_expr => |me| { + if (findNodeAtOffset(me.subject, offset)) |found| return found; + for (me.arms) |arm| { + if (findNodeAtOffset(arm.body, offset)) |found| return found; + if (arm.pattern) |pat| { + if (findNodeAtOffset(pat, offset)) |found| return found; + } + } + }, + .while_expr => |we| { + if (findNodeAtOffset(we.condition, offset)) |found| return found; + if (findNodeAtOffset(we.body, offset)) |found| return found; + }, + .for_expr => |fe| { + if (findNodeAtOffset(fe.iterable, offset)) |found| return found; + if (findNodeAtOffset(fe.body, offset)) |found| return found; + }, + .spread_expr => |se| { + if (findNodeAtOffset(se.operand, offset)) |found| return found; + }, + .break_expr, .continue_expr => {}, + .assignment => |asgn| { + if (findNodeAtOffset(asgn.target, offset)) |found| return found; + if (findNodeAtOffset(asgn.value, offset)) |found| return found; + }, + .return_stmt => |ret| { + if (ret.value) |val| { + if (findNodeAtOffset(val, offset)) |found| return found; + } + }, + .defer_stmt => |ds| { + if (findNodeAtOffset(ds.expr, offset)) |found| return found; + }, + .comptime_expr => |ct| { + if (findNodeAtOffset(ct.expr, offset)) |found| return found; + }, + .insert_expr => |ins| { + if (findNodeAtOffset(ins.expr, offset)) |found| return found; + }, + .lambda => |lam| { + if (findNodeAtOffset(lam.body, offset)) |found| return found; + }, + .struct_literal => |sl| { + for (sl.field_inits) |fi| { + if (findNodeAtOffset(fi.value, offset)) |found| return found; + } + }, + .union_literal => |ul| { + if (ul.payload) |p| { + if (findNodeAtOffset(p, offset)) |found| return found; + } + }, + // Leaf nodes + .identifier, + .int_literal, + .float_literal, + .bool_literal, + .string_literal, + .enum_literal, + .type_expr, + .param, + .match_arm, + .undef_literal, + .builtin_expr, + .enum_decl, + .struct_decl, + .union_decl, + .import_decl, + .array_type_expr, + .array_literal, + .parameterized_type_expr, + .index_expr, + => {}, + .namespace_decl => |ns| { + for (ns.decls) |d| { + if (findNodeAtOffset(d, offset)) |found| return found; + } + }, + } + + return node; +} + +test "sema: collect top-level declarations" { + const parser_mod = @import("parser.zig"); + + const source = "main :: () { 42; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var parser = parser_mod.Parser.init(alloc, source); + const root = try parser.parse(); + + var analyzer = Analyzer.init(alloc); + const result = try analyzer.analyze(root); + + // Should have one symbol: main (function) + try std.testing.expectEqual(@as(usize, 1), result.symbols.len); + try std.testing.expectEqualStrings("main", result.symbols[0].name); + try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind); +} + +test "sema: function params as symbols" { + const parser_mod = @import("parser.zig"); + + const source = "add :: (a: s32, b: s32) -> s32 { a + b; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var parser = parser_mod.Parser.init(alloc, source); + const root = try parser.parse(); + + var analyzer = Analyzer.init(alloc); + const result = try analyzer.analyze(root); + + // Symbols: add (function), a (param), b (param) + try std.testing.expectEqual(@as(usize, 3), result.symbols.len); + try std.testing.expectEqualStrings("add", result.symbols[0].name); + try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind); + try std.testing.expectEqualStrings("a", result.symbols[1].name); + try std.testing.expectEqual(SymbolKind.param, result.symbols[1].kind); + try std.testing.expectEqualStrings("b", result.symbols[2].name); + try std.testing.expectEqual(SymbolKind.param, result.symbols[2].kind); + + // References: a and b used in body should be resolved + try std.testing.expect(result.references.len >= 2); +} + +test "sema: variable declaration and reference" { + const parser_mod = @import("parser.zig"); + + const source = "main :: () { x := 42; x; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var parser = parser_mod.Parser.init(alloc, source); + const root = try parser.parse(); + + var analyzer = Analyzer.init(alloc); + const result = try analyzer.analyze(root); + + // Symbols: main (function), x (variable) + try std.testing.expectEqual(@as(usize, 2), result.symbols.len); + try std.testing.expectEqualStrings("main", result.symbols[0].name); + try std.testing.expectEqualStrings("x", result.symbols[1].name); + try std.testing.expectEqual(SymbolKind.variable, result.symbols[1].kind); + + // x should have a reference + try std.testing.expect(result.references.len >= 1); + // The reference should point to symbol index 1 (x) + try std.testing.expectEqual(@as(u32, 1), result.references[0].symbol_index); +} + +test "sema: undefined variable diagnostic" { + const parser_mod = @import("parser.zig"); + + const source = "main :: () { y; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var parser = parser_mod.Parser.init(alloc, source); + const root = try parser.parse(); + + var analyzer = Analyzer.init(alloc); + const result = try analyzer.analyze(root); + + // Should have a diagnostic for undefined 'y' + try std.testing.expect(result.diagnostics.len >= 1); + try std.testing.expectEqualStrings("undefined variable", result.diagnostics[0].message); +} + +test "sema: enum and struct declarations" { + const parser_mod = @import("parser.zig"); + + const source = "Color :: enum { red; green; blue; } Vec2 :: struct { x, y: f32; } main :: () { 0; }"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var parser = parser_mod.Parser.init(alloc, source); + const root = try parser.parse(); + + var analyzer = Analyzer.init(alloc); + const result = try analyzer.analyze(root); + + // Symbols: Color (enum), Vec2 (struct), main (function) + try std.testing.expectEqual(@as(usize, 3), result.symbols.len); + try std.testing.expectEqualStrings("Color", result.symbols[0].name); + try std.testing.expectEqual(SymbolKind.enum_type, result.symbols[0].kind); + try std.testing.expectEqualStrings("Vec2", result.symbols[1].name); + try std.testing.expectEqual(SymbolKind.struct_type, result.symbols[1].kind); + try std.testing.expectEqualStrings("main", result.symbols[2].name); +} diff --git a/src/token.zig b/src/token.zig new file mode 100644 index 0000000..e946907 --- /dev/null +++ b/src/token.zig @@ -0,0 +1,175 @@ +pub const Tag = enum { + // Literals + int_literal, + float_literal, + string_literal, + + // Identifiers and keywords + identifier, + kw_if, + kw_else, + kw_then, + kw_true, + kw_false, + kw_enum, + kw_case, + kw_break, + kw_continue, + kw_while, + kw_for, + kw_return, + kw_defer, + kw_f32, + kw_f64, + kw_struct, + kw_union, + kw_xx, + kw_and, + kw_or, + kw_Type, // Type (metatype keyword) + + // Symbols + colon, // : + colon_colon, // :: + colon_equal, // := + semicolon, // ; + comma, // , + dot, // . + dot_dot, // .. + dollar, // $ + + // Operators + plus, // + + minus, // - + star, // * + slash, // / + equal, // = + equal_equal, // == + bang, // ! + bang_equal, // != + less, // < + less_equal, // <= + greater, // > + greater_equal, // >= + plus_equal, // += + minus_equal, // -= + star_equal, // *= + slash_equal, // /= + percent, // % + percent_equal, // %= + + // Delimiters + l_paren, // ( + r_paren, // ) + l_brace, // { + r_brace, // } + l_bracket, // [ + r_bracket, // ] + + // Arrows + arrow, // -> + fat_arrow, // => + + // Directives + hash_run, // #run + hash_import, // #import + hash_insert, // #insert + hash_builtin, // #builtin + triple_minus, // --- + + // Special + eof, + invalid, + + pub fn lexeme(tag: Tag) ?[]const u8 { + return switch (tag) { + .colon => ":", + .colon_colon => "::", + .colon_equal => ":=", + .semicolon => ";", + .comma => ",", + .dot => ".", + .dot_dot => "..", + .dollar => "$", + .plus => "+", + .minus => "-", + .star => "*", + .slash => "/", + .equal => "=", + .equal_equal => "==", + .bang => "!", + .bang_equal => "!=", + .less => "<", + .less_equal => "<=", + .greater => ">", + .greater_equal => ">=", + .plus_equal => "+=", + .minus_equal => "-=", + .star_equal => "*=", + .slash_equal => "/=", + .percent => "%", + .percent_equal => "%=", + .l_paren => "(", + .r_paren => ")", + .l_brace => "{", + .r_brace => "}", + .l_bracket => "[", + .r_bracket => "]", + .arrow => "->", + .fat_arrow => "=>", + .triple_minus => "---", + else => null, + }; + } + + pub fn isTypeKeyword(tag: Tag) bool { + return switch (tag) { + .kw_f32, .kw_f64, .kw_Type => true, + else => false, + }; + } +}; + +pub const Token = struct { + tag: Tag, + loc: Loc, + + pub const Loc = struct { + start: u32, + end: u32, + }; + + pub fn slice(self: Token, source: []const u8) []const u8 { + return source[self.loc.start..self.loc.end]; + } +}; + +pub const keywords = std.StaticStringMap(Tag).initComptime(.{ + .{ "if", .kw_if }, + .{ "else", .kw_else }, + .{ "then", .kw_then }, + .{ "true", .kw_true }, + .{ "false", .kw_false }, + .{ "enum", .kw_enum }, + .{ "case", .kw_case }, + .{ "break", .kw_break }, + .{ "continue", .kw_continue }, + .{ "while", .kw_while }, + .{ "for", .kw_for }, + .{ "return", .kw_return }, + .{ "defer", .kw_defer }, + .{ "f32", .kw_f32 }, + .{ "f64", .kw_f64 }, + .{ "struct", .kw_struct }, + .{ "union", .kw_union }, + .{ "xx", .kw_xx }, + .{ "and", .kw_and }, + .{ "or", .kw_or }, + .{ "Type", .kw_Type }, +}); + +pub fn getKeyword(bytes: []const u8) ?Tag { + return keywords.get(bytes); +} + +const std = @import("std"); diff --git a/src/types.zig b/src/types.zig new file mode 100644 index 0000000..e06aeda --- /dev/null +++ b/src/types.zig @@ -0,0 +1,323 @@ +const std = @import("std"); +const ast = @import("ast.zig"); +const Node = ast.Node; + +pub const Type = union(enum) { + // Variable-width integers (1–64 bits) + signed: u8, + unsigned: u8, + // Fixed-width floats + f32, + f64, + // Other + void_type, + boolean, + string_type, + enum_type: []const u8, + struct_type: []const u8, + union_type: []const u8, + array_type: ArrayTypeInfo, + slice_type: SliceTypeInfo, + vector_type: VectorTypeInfo, + any_type, + meta_type: MetaTypeInfo, + + pub const SliceTypeInfo = struct { + element_name: []const u8, + }; + + pub const ArrayTypeInfo = struct { + element_name: []const u8, + length: u32, + }; + + pub const VectorTypeInfo = struct { + element_name: []const u8, + length: u32, + }; + + pub const MetaTypeInfo = struct { + name: []const u8, + }; + + // Convenience constructors + pub fn s(width: u8) Type { + return .{ .signed = width }; + } + + pub fn u(width: u8) Type { + return .{ .unsigned = width }; + } + + pub fn fromName(name: []const u8) ?Type { + // Named types (check before variable-width integers since "string" starts with 's') + if (std.mem.eql(u8, name, "string")) return .string_type; + if (std.mem.eql(u8, name, "bool")) return .boolean; + if (std.mem.eql(u8, name, "f32")) return .f32; + if (std.mem.eql(u8, name, "f64")) return .f64; + if (std.mem.eql(u8, name, "Any")) return .any_type; + // Variable-width integers: s1..s64, u1..u64 + if (name.len >= 2 and (name[0] == 's' or name[0] == 'u')) { + const width = std.fmt.parseInt(u8, name[1..], 10) catch return null; + if (width < 1 or width > 64) return null; + return if (name[0] == 's') Type.s(width) else Type.u(width); + } + return null; + } + + pub fn fromTypeExpr(node: *Node) ?Type { + if (node.data != .type_expr) return null; + return fromName(node.data.type_expr.name); + } + + pub fn isEnum(self: Type) bool { + return switch (self) { + .enum_type => true, + else => false, + }; + } + + pub fn isStruct(self: Type) bool { + return switch (self) { + .struct_type => true, + else => false, + }; + } + + pub fn isUnion(self: Type) bool { + return switch (self) { + .union_type => true, + else => false, + }; + } + + pub fn isAny(self: Type) bool { + return switch (self) { + .any_type => true, + else => false, + }; + } + + pub fn isSlice(self: Type) bool { + return switch (self) { + .slice_type => true, + else => false, + }; + } + + pub fn sliceElementType(self: Type) ?Type { + return switch (self) { + .slice_type => |info| fromName(info.element_name), + else => null, + }; + } + + pub fn isArray(self: Type) bool { + return switch (self) { + .array_type => true, + else => false, + }; + } + + pub fn isVector(self: Type) bool { + return switch (self) { + .vector_type => true, + else => false, + }; + } + + pub fn vectorElementType(self: Type) ?Type { + return switch (self) { + .vector_type => |info| fromName(info.element_name), + else => null, + }; + } + + pub fn isFloat(self: Type) bool { + return switch (self) { + .f32, .f64 => true, + else => false, + }; + } + + pub fn isInt(self: Type) bool { + return self.isSigned() or self.isUnsigned(); + } + + pub fn isSigned(self: Type) bool { + return switch (self) { + .signed => true, + else => false, + }; + } + + pub fn isUnsigned(self: Type) bool { + return switch (self) { + .unsigned => true, + else => false, + }; + } + + pub fn bitWidth(self: Type) u32 { + return switch (self) { + .signed => |w| w, + .unsigned => |w| w, + .f32 => 32, + .f64 => 64, + .boolean => 1, + else => 0, + }; + } + + /// Check if this type can be implicitly converted to `target` without `xx`. + /// Safe (implicit) conversions: + /// - Same type + /// - Both unsigned int, target width >= source width + /// - Both signed int, target width >= source width + /// - Unsigned to signed, target width strictly > source width + /// - Any int to any float + /// - Float to wider float (f32 → f64) + /// Everything else requires `xx`. + pub fn isImplicitlyConvertibleTo(self: Type, target: Type) bool { + if (std.meta.eql(self, target)) return true; + + const src_float = self.isFloat(); + const dst_float = target.isFloat(); + const src_int = self.isInt(); + + // Float → wider float + if (src_float and dst_float) { + return target.bitWidth() >= self.bitWidth(); + } + + // Int → float (always safe) + if (src_int and dst_float) return true; + + // Both unsigned → target width >= source width + if (self.isUnsigned() and target.isUnsigned()) { + return target.bitWidth() >= self.bitWidth(); + } + + // Both signed → target width >= source width + if (self.isSigned() and target.isSigned()) { + return target.bitWidth() >= self.bitWidth(); + } + + // Unsigned → signed: target must be strictly wider + if (self.isUnsigned() and target.isSigned()) { + return target.bitWidth() > self.bitWidth(); + } + + // Everything else requires xx + return false; + } + + /// Format type name for mangling and display (e.g. "s32", "u8", "f64") + pub fn displayName(self: Type, allocator: std.mem.Allocator) ![]const u8 { + return switch (self) { + .signed => |w| { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, 's'); + var tmp: [4]u8 = undefined; + const width_str = std.fmt.bufPrint(&tmp, "{d}", .{w}) catch unreachable; + try buf.appendSlice(allocator, width_str); + return try buf.toOwnedSlice(allocator); + }, + .unsigned => |w| { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, 'u'); + var tmp: [4]u8 = undefined; + const width_str = std.fmt.bufPrint(&tmp, "{d}", .{w}) catch unreachable; + try buf.appendSlice(allocator, width_str); + return try buf.toOwnedSlice(allocator); + }, + .f32 => "f32", + .f64 => "f64", + .boolean => "bool", + .string_type => "string", + .void_type => "void", + .any_type => "Any", + .enum_type => |name| name, + .struct_type => |name| name, + .union_type => |name| name, + .slice_type => |info| { + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(allocator, "[]"); + try buf.appendSlice(allocator, info.element_name); + return try buf.toOwnedSlice(allocator); + }, + .array_type => |info| { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, '['); + var tmp: [10]u8 = undefined; + const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; + try buf.appendSlice(allocator, len_str); + try buf.append(allocator, ']'); + try buf.appendSlice(allocator, info.element_name); + return try buf.toOwnedSlice(allocator); + }, + .vector_type => |info| { + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(allocator, "Vector("); + var tmp: [10]u8 = undefined; + const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; + try buf.appendSlice(allocator, len_str); + try buf.appendSlice(allocator, ","); + try buf.appendSlice(allocator, info.element_name); + try buf.append(allocator, ')'); + return try buf.toOwnedSlice(allocator); + }, + .meta_type => |info| info.name, + }; + } + + /// Widen two types to a common type for binary operations. + /// Used for arithmetic type promotion (e.g., s16 + s32 → s32, int + float → float). + pub fn widen(a: Type, b: Type) Type { + // Same type → return it + if (std.meta.eql(a, b)) return a; + + // Vector + vector of same dimensions → return a + if (a.isVector() and b.isVector()) return a; + // Vector + scalar → return vector (scalar will be broadcast) + if (a.isVector() and !b.isVector()) return a; + if (b.isVector() and !a.isVector()) return b; + + const a_float = a.isFloat(); + const b_float = b.isFloat(); + const a_int = a.isInt(); + const b_int = b.isInt(); + + // Both float → wider float + if (a_float and b_float) { + return if (a.bitWidth() >= b.bitWidth()) a else b; + } + + // int + float → float + if (a_int and b_float) return b; + if (b_int and a_float) return a; + + // Both signed → wider signed + if (a.isSigned() and b.isSigned()) { + return Type.s(@intCast(@max(a.bitWidth(), b.bitWidth()))); + } + + // Both unsigned → wider unsigned + if (a.isUnsigned() and b.isUnsigned()) { + return Type.u(@intCast(@max(a.bitWidth(), b.bitWidth()))); + } + + // signed + unsigned (mixed) + if (a_int and b_int) { + const aw = a.bitWidth(); + const bw = b.bitWidth(); + const max_w = @max(aw, bw); + // If same width, need one extra bit for sign; otherwise max is enough + const need: u32 = if (aw == bw) max_w + 1 else max_w; + const capped: u8 = @intCast(@min(need, 128)); + return Type.s(capped); + } + + return a; + } +};