diff --git a/src/main.zig b/src/main.zig index ad79863b..f8534b65 100644 --- a/src/main.zig +++ b/src/main.zig @@ -23,6 +23,13 @@ pub fn main(init: std.process.Init) !void { return; } + // `migrate` has its own flag (`--dry-run`) the generic flag loop below would + // reject, so dispatch it here before that loop runs. + if (std.mem.eql(u8, command, "migrate")) { + runMigrate(allocator, io, args[2..]); + return; + } + // Parse flags and positional arguments var input_path: ?[]const u8 = null; var target_config = sx.target.TargetConfig{}; @@ -407,6 +414,7 @@ fn printUsage() void { \\ ir Print LLVM IR to stdout \\ asm Emit assembly (.s) file \\ lsp Start language server (LSP) + \\ migrate Rewrite old tuple syntax to new (`(a,b)`->`.(a,b)`, type `(A,B)`->`Tuple(A,B)`); `--dry-run` prints only the worklist, `--force` emits output despite unmigrated ambiguous sites \\ \\Options: \\ --target Target triple or shorthand: wasm, macos, linux, windows, ios, ios-sim (default: host) @@ -517,6 +525,72 @@ fn compilePipeline(allocator: std.mem.Allocator, io: std.Io, input_path: []const return comp; } +/// `sx migrate [--dry-run] [--force] ` — tuple-syntax migration tool. +/// +/// Without flags: parse-only, rewrite the old tuple syntax, print the migrated +/// source to stdout and any ambiguous-site worklist entries to stderr. A +/// NON-EMPTY worklist is a hard failure (exit 2) — the migration is incomplete, +/// so we do NOT print the rewritten source (which could be redirected over the +/// input, silently shipping half-migrated code) unless `--force` is passed. +/// +/// With `--dry-run`: print ONLY the worklist (to stderr), no rewritten source — +/// so ambiguous sites can be audited first. A non-empty worklist still exits 2. +/// +/// With `--force`: print the rewritten source even when the worklist is +/// non-empty (the ambiguous sites are left in the OLD syntax). Exit is still 2 +/// so a script can detect the partial migration. +fn runMigrate(allocator: std.mem.Allocator, io: std.Io, sub_args: []const []const u8) void { + var dry_run = false; + var force = false; + var input_path: ?[]const u8 = null; + for (sub_args) |a| { + if (std.mem.eql(u8, a, "--dry-run")) { + dry_run = true; + } else if (std.mem.eql(u8, a, "--force")) { + force = true; + } else if (std.mem.startsWith(u8, a, "-")) { + std.debug.print("error: unknown flag '{s}' for migrate\n", .{a}); + std.process.exit(1); + } else { + input_path = a; + } + } + const path = input_path orelse { + std.debug.print("usage: sx migrate [--dry-run] [--force] \n", .{}); + std.process.exit(1); + }; + + const source = readSource(allocator, io, path) catch |err| { + std.debug.print("error: cannot read '{s}': {}\n", .{ path, err }); + std.process.exit(1); + }; + const result = sx.migrate.migrateSource(allocator, io, path, source) catch |err| { + std.debug.print("error: migrate failed for '{s}': {}\n", .{ path, err }); + std.process.exit(1); + }; + + // Worklist (ambiguous sites) always goes to stderr. + for (result.worklist) |w| { + std.debug.print("{s}:{d}:{d}: {s}: {s}\n", .{ path, w.line, w.col, w.reason, w.text }); + } + + const has_worklist = result.worklist.len > 0; + + // Emit the rewritten source unless we'd be shipping a half-migrated file: a + // non-empty worklist in non-dry-run mode suppresses output unless --force. + if (!dry_run and (!has_worklist or force)) { + _ = std.c.write(1, result.output.ptr, result.output.len); + } + + if (has_worklist) { + std.debug.print( + "{d} ambiguous site(s) unmigrated; resolve by hand or pass --force\n", + .{result.worklist.len}, + ); + std.process.exit(2); + } +} + fn dumpSxIR(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8, stdlib_paths: []const []const u8) !void { const source = try readSource(allocator, io, input_path); var comp = sx.core.Compilation.init(allocator, io, input_path, source, .{}, stdlib_paths); diff --git a/src/migrate.test.zig b/src/migrate.test.zig new file mode 100644 index 00000000..9e50c77e --- /dev/null +++ b/src/migrate.test.zig @@ -0,0 +1,330 @@ +// Tests for migrate.zig — the `sx migrate` tuple-syntax rewriter. +// +// Each case parses an in-memory snippet (full decls, so it parses standalone), +// runs the AST-walk migrator, and asserts the rewritten text and/or worklist. +// The compiler grammar is UNCHANGED here: the migrator READS the old tuple +// syntax `(a, b)` / `(A, B)` and EMITS the new `.(a, b)` / `Tuple(A, B)` text. + +const std = @import("std"); +const Parser = @import("parser.zig").Parser; +const migrate = @import("migrate.zig"); + +/// Parse `src` (must be valid old-syntax sx decls), migrate, return the +/// rewritten text. Asserts the worklist is empty (use `runWith` for ambiguous +/// cases). +fn run(alloc: std.mem.Allocator, src: [:0]const u8) ![]const u8 { + const res = try runWith(alloc, src); + try std.testing.expectEqual(@as(usize, 0), res.worklist.len); + return res.output; +} + +fn runWith(alloc: std.mem.Allocator, src: [:0]const u8) !migrate.MigrationResult { + var parser = Parser.init(alloc, src); + const root = try parser.parse(); + return migrate.migrateRoot(alloc, src, root); +} + +/// Assert that `needle` appears in `haystack` (substring), with a helpful +/// failure message that prints the full migrated text. +fn expectContains(haystack: []const u8, needle: []const u8) !void { + if (std.mem.indexOf(u8, haystack, needle) == null) { + std.debug.print("\nexpected to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack }); + return error.NotFound; + } +} + +fn expectNotContains(haystack: []const u8, needle: []const u8) !void { + if (std.mem.indexOf(u8, haystack, needle) != null) { + std.debug.print("\nexpected NOT to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack }); + return error.UnexpectedlyFound; + } +} + +// ── VALUE tuples → .(...) ──────────────────────────────────────────────── + +test "migrate value: positional (40,2) -> .(40,2)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { x := (40, 2); }\n"); + try expectContains(out, ".(40, 2)"); + try expectNotContains(out, " (40, 2)"); // the old, un-dotted form is gone +} + +test "migrate value: named (x:1,y:2) -> .(x = 1, y = 2)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { x := (x: 1, y: 2); }\n"); + try expectContains(out, ".(x = 1, y = 2)"); +} + +test "migrate value: 1-tuple (x,) -> .(x)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { y := 9; x := (y,); }\n"); + try expectContains(out, ".(y)"); + try expectNotContains(out, "(y,)"); +} + +test "migrate value: spread (..xs) -> .(..xs)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: (xs: i32) { t := (..xs); }\n"); + try expectContains(out, ".(..xs)"); +} + +test "migrate value: operator operands (1,2)==(1,2)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { b := (1, 2) == (1, 2); }\n"); + // Both operands rewritten. + try expectContains(out, ".(1, 2) == .(1, 2)"); +} + +test "migrate value+type: return body -> Tuple(i64,i64){ .(b,a) }" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), + \\swap :: (a: i64, b: i64) -> (i64, i64) { (b, a) } + \\ + ); + try expectContains(out, "-> Tuple(i64, i64)"); + try expectContains(out, ".(b, a)"); +} + +test "migrate value: empty () value -> .()" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + // `x := ()` — empty tuple value. + const out = try run(arena.allocator(), "f :: () { x := (); }\n"); + try expectContains(out, ".()"); +} + +// ── TYPE tuples → Tuple(...) ───────────────────────────────────────────── + +test "migrate type: annotation a:(i32,string) -> a:Tuple(i32,string)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { a : (i32, string) = ---; }\n"); + try expectContains(out, "Tuple(i32, string)"); +} + +test "migrate type: named (x:i32,y:string) -> Tuple(x: i32, y: string) keeps colon" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { a : (x: i32, y: string) = ---; }\n"); + try expectContains(out, "Tuple(x: i32, y: string)"); +} + +test "migrate type: struct field xs:(i32,i32) -> Tuple(i32,i32)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "S :: struct { xs: (i32, i32); }\n"); + try expectContains(out, "Tuple(i32, i32)"); +} + +test "migrate type: pack (..Ts) -> Tuple(..Ts)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "S :: struct { xs: (..Ts); }\n"); + try expectContains(out, "Tuple(..Ts)"); +} + +test "migrate type: 1-tuple (T,) -> Tuple(T) drops comma" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "S :: struct { xs: (i32,); }\n"); + try expectContains(out, "Tuple(i32)"); + try expectNotContains(out, "(i32,)"); +} + +// ── Worklist: ambiguous value-vs-type call arg ────────────────────────── + +test "migrate worklist: size_of((Box,i32)) is NOT rewritten, records worklist" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const res = try runWith(arena.allocator(), + \\f :: () { n := size_of((Box, i32)); } + \\ + ); + // Ambiguous inner tuple left untouched: no `.(` rewrite of `(Box, i32)`. + try expectNotContains(res.output, ".(Box, i32)"); + try expectContains(res.output, "(Box, i32)"); + // One worklist entry recorded. + try std.testing.expectEqual(@as(usize, 1), res.worklist.len); + try expectContains(res.worklist[0].text, "(Box, i32)"); +} + +test "migrate value: call arg with literal-only tuple IS rewritten" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + // `take((1, 2))` — all elements are concrete values → safe to rewrite. + const res = try runWith(arena.allocator(), "f :: () { take((1, 2)); }\n"); + try expectContains(res.output, ".(1, 2)"); + try std.testing.expectEqual(@as(usize, 0), res.worklist.len); +} + +// ── Nested tuples (recursive rewrite, ONE edit per outermost tuple) ────── + +test "migrate nested value: ((1,2),(3,4)) -> .(.(1, 2), .(3, 4))" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { x := ((1, 2), (3, 4)); }\n"); + try expectContains(out, ".(.(1, 2), .(3, 4))"); + // No stray un-migrated inner tuple, no trailing junk paren. + try expectNotContains(out, ".(1, 2), 3)"); +} + +test "migrate nested value: ((1,2),3) -> .(.(1, 2), 3)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { x := ((1, 2), 3); }\n"); + try expectContains(out, ".(.(1, 2), 3)"); + try expectNotContains(out, "(1, 2), 3))"); // the broken old output +} + +test "migrate nested named value: (a:(p:1,q:2),b:3) -> .(a = .(p = 1, q = 2), b = 3)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { n := (a: (p: 1, q: 2), b: 3); }\n"); + try expectContains(out, ".(a = .(p = 1, q = 2), b = 3)"); +} + +test "migrate nested type: ((i32,i32),i64) -> Tuple(Tuple(i32, i32), i64)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { a : ((i32, i32), i64) = ---; }\n"); + try expectContains(out, "Tuple(Tuple(i32, i32), i64)"); +} + +// ── Failable multi-returns: `!` channel stays OUTSIDE Tuple(...) ───────── + +test "migrate failable: -> (T, !) -> -> T !" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () -> (i32, !) { }\n"); + try expectContains(out, "-> i32 !"); + try expectNotContains(out, "Tuple("); + try expectNotContains(out, ".("); +} + +test "migrate failable: -> (T, !Named) keeps the named set" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), + \\E :: error { Bad } + \\f :: () -> (i32, !E) { } + \\ + ); + try expectContains(out, "-> i32 !E"); + try expectNotContains(out, "Tuple("); +} + +test "migrate failable: -> (T1, T2, !) -> -> Tuple(T1, T2) !" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () -> (i32, i64, !) { }\n"); + try expectContains(out, "-> Tuple(i32, i64) !"); +} + +test "migrate failable: bare -> ! unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () -> ! { }\n"); + try expectContains(out, "-> !"); + try expectNotContains(out, "Tuple"); +} + +// ── Inverted call-arg classification (conservative) ───────────────────── + +test "migrate worklist: empty () call arg is worklisted (unit type ambiguity)" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const res = try runWith(arena.allocator(), "f :: () { n := size_of(()); }\n"); + // NOT silently rewritten to `.()`. + try expectNotContains(res.output, "size_of(.())"); + try expectContains(res.output, "size_of(())"); + try std.testing.expectEqual(@as(usize, 1), res.worklist.len); +} + +test "migrate worklist: Vec(3) call-arg element is worklisted" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const res = try runWith(arena.allocator(), "f :: () { n := size_of((Vec(3), i32)); }\n"); + try expectNotContains(res.output, ".(Vec(3), i32)"); + try expectContains(res.output, "(Vec(3), i32)"); + try std.testing.expectEqual(@as(usize, 1), res.worklist.len); +} + +test "migrate worklist: pkg.T qualified path call-arg element is worklisted" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const res = try runWith(arena.allocator(), "f :: () { n := size_of((pkg.T, i32)); }\n"); + try expectNotContains(res.output, ".(pkg.T, i32)"); + try expectContains(res.output, "(pkg.T, i32)"); + try std.testing.expectEqual(@as(usize, 1), res.worklist.len); +} + +// ── Negatives: distinct AST nodes must NOT be touched ──────────────────── + +test "migrate negative: function type (i32,i32)->i32 unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { g : (i32, i32) -> i32 = ---; }\n"); + try expectContains(out, "(i32, i32) -> i32"); + try expectNotContains(out, "Tuple(i32, i32)"); +} + +test "migrate negative: function param list (self:*T,x:i32) unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "S :: struct {}\nm :: (self: *S, x: i32) { }\n"); + try expectContains(out, "(self: *S, x: i32)"); + try expectNotContains(out, "Tuple("); + try expectNotContains(out, ".(self"); +} + +test "migrate negative: array literal .[1,2,3] unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { a := .[1, 2, 3]; }\n"); + try expectContains(out, ".[1, 2, 3]"); +} + +test "migrate negative: struct literal .{x=1} unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { a := .{ x = 1 }; }\n"); + try expectContains(out, ".{ x = 1 }"); +} + +test "migrate negative: Closure(i32)->i32 type unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: () { c : Closure(i32) -> i32 = ---; }\n"); + try expectContains(out, "Closure(i32) -> i32"); + try expectNotContains(out, "Tuple("); +} + +test "migrate negative: grouping (a+b)*c unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), "f :: (a: i32, b: i32, c: i32) { x := (a + b) * c; }\n"); + try expectContains(out, "(a + b) * c"); + try expectNotContains(out, ".(a + b)"); +} + +test "migrate negative: match capture case .some: (val) unchanged" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const out = try run(arena.allocator(), + \\check :: (v: ?i32) -> i32 { + \\ return if v == { + \\ case .some: (val) { val } + \\ case .none: { 0 } + \\ }; + \\} + \\ + ); + try expectContains(out, "case .some: (val)"); + try expectNotContains(out, ".(val)"); +} diff --git a/src/migrate.zig b/src/migrate.zig new file mode 100644 index 00000000..9fe79cbb --- /dev/null +++ b/src/migrate.zig @@ -0,0 +1,512 @@ +//! Tuple-syntax migration tool (`sx migrate`). +//! +//! Reads OLD-syntax `.sx` source (tuple TYPES `(A, B)`, tuple VALUES `(a, b)`) +//! and emits NEW-syntax text (`Tuple(A, B)` / `.(a, b)`). The compiler grammar +//! is UNCHANGED — this tool only reads the old syntax and rewrites it as text. +//! +//! Strategy: parse-only (read -> Compilation -> parse), then walk the parsed +//! AST with a comptime-reflection child walker that recurses into every +//! `*Node`-bearing field of every node variant. Two node kinds drive a rewrite: +//! +//! * `tuple_type_expr` — produced by the parser in grammatically-forced TYPE +//! positions (`-> (...)`, `: (...)` annotations, struct-field/param types). +//! Rewritten to `Tuple(...)`. SPECIAL CASE: a failable multi-return whose +//! last element is the error-channel marker `!` keeps the channel OUTSIDE +//! the `Tuple(...)` (see `rewriteTupleType`). +//! +//! * `tuple_literal` — produced in VALUE positions. Rewritten to `.(...)`. +//! In CALL-ARG position the value/type distinction is ambiguous, so we only +//! auto-rewrite when EVERY element is a concrete value literal; anything +//! else (bare identifier, `Vec(3)`, `pkg.T`, empty `()`, ...) is recorded on +//! the worklist and left untouched — never guess (CLAUDE.md silent-fallback +//! rule). +//! +//! Nesting: the rewrite is RECURSIVE but emits exactly ONE edit per OUTERMOST +//! tuple. The replacement text for a tuple is built by recursively migrating its +//! nested tuple elements (and any non-tuple subexpressions, e.g. calls) directly +//! into that text. We never emit a separate, overlapping child edit for anything +//! inside a tuple's span — `applyEdits` asserts non-overlap as a tripwire. +//! +//! Edits are collected against the ORIGINAL source byte offsets and applied +//! DESCENDING by start offset so earlier offsets stay valid; comments and +//! formatting outside the edited spans are preserved verbatim. + +const std = @import("std"); +const ast = @import("ast.zig"); +const core = @import("core.zig"); + +const Node = ast.Node; + +/// A single text replacement against the original source: `source[start..end]` +/// becomes `replacement`. +pub const Edit = struct { + start: u32, + end: u32, + replacement: []const u8, +}; + +/// An ambiguous site we refused to rewrite. `line`/`col` are 1-based. +pub const Worklist = struct { + line: u32, + col: u32, + text: []const u8, + reason: []const u8, +}; + +pub const MigrationResult = struct { + /// The rewritten source (a fresh allocation owning its bytes). + output: []const u8, + /// Ambiguous sites left untouched, in source order. + worklist: []const Worklist, +}; + +/// Walk state: collects edits + worklist entries while recursing the AST. +const Walker = struct { + allocator: std.mem.Allocator, + source: []const u8, + edits: std.ArrayList(Edit) = .empty, + worklist: std.ArrayList(Worklist) = .empty, + + /// Recurse into `node`. `is_call_arg` is true when this node is a DIRECT + /// argument of a `call` / `ffi_intrinsic_call` — the only context in which a + /// `tuple_literal` may be value-vs-type ambiguous. + /// + /// On hitting an OUTERMOST tuple we compute its full replacement (recursively + /// baking any nested tuples / subexprs into the text) and emit a SINGLE edit; + /// we do NOT continue the edit-emitting walk into the tuple's span (that would + /// produce overlapping edits). Worklist collection for ambiguous nested + /// call-args still happens, inside the recursive text builder. + fn walk(self: *Walker, node: *const Node, is_call_arg: bool) anyerror!void { + switch (node.data) { + .tuple_type_expr => |tt| { + const replacement = try self.buildTupleTypeText(node, tt); + if (replacement) |rep| { + try self.edits.append(self.allocator, .{ + .start = node.span.start, + .end = node.span.end, + .replacement = rep, + }); + } + // Do NOT recurse into the tuple's element subtrees here — they + // are already baked into `replacement`. (A `null` replacement + // means "leave unchanged"; that only happens for `-> !`, which + // has no value elements to rewrite anyway.) + return; + }, + .tuple_literal => |tl| { + if (is_call_arg and !tupleIsAllConcreteValues(tl)) { + // Ambiguous in call-arg position (could be a type argument, + // a parameterized type, a qualified path, the unit type + // `()`, ...). Refuse to guess — record + leave untouched, and + // keep walking into elements so nested unambiguous tuples are + // still migrated. + try self.recordWorklist(node); + for (tl.elements) |el| try self.walk(el.value, false); + } else { + const rep = try self.buildTupleValueText(node, tl); + try self.edits.append(self.allocator, .{ + .start = node.span.start, + .end = node.span.end, + .replacement = rep, + }); + } + return; + }, + // A `call`'s direct args get the call-arg flag; the callee does not. + .call => |c| { + try self.walk(c.callee, false); + for (c.args) |a| try self.walk(a, true); + return; + }, + .ffi_intrinsic_call => |c| { + try self.walk(c.return_type, false); + for (c.args) |a| try self.walk(a, true); + return; + }, + else => {}, + } + // Generic recursion for every other node: visit each child *Node found + // by reflection over the active union payload. Call-arg context does NOT + // propagate past a non-call node. + try self.walkChildren(node); + } + + /// Reflect over the active payload of `node.data` and recurse into every + /// `*Node` reachable through its fields (directly, through optionals, + /// slices, and nested aggregate structs/unions). + fn walkChildren(self: *Walker, node: *const Node) anyerror!void { + switch (node.data) { + inline else => |payload| { + try self.walkValue(@TypeOf(payload), payload); + }, + } + } + + /// Recurse into any `*Node` reachable from `value` of type `T`. + fn walkValue(self: *Walker, comptime T: type, value: T) anyerror!void { + if (T == *Node or T == *const Node) { + try self.walk(value, false); + return; + } + switch (@typeInfo(T)) { + .pointer => |ptr| { + switch (ptr.size) { + .slice => { + if (comptime containsNode(ptr.child)) { + for (value) |elem| try self.walkValue(ptr.child, elem); + } + }, + // Non-slice pointers other than *Node (handled above) carry + // no AST children we rewrite. + else => {}, + } + }, + .optional => |opt| { + if (comptime containsNode(opt.child)) { + if (value) |inner| try self.walkValue(opt.child, inner); + } + }, + .@"struct" => |st| { + inline for (st.fields) |f| { + if (comptime containsNode(f.type)) { + try self.walkValue(f.type, @field(value, f.name)); + } + } + }, + .@"union" => |un| { + if (comptime unionContainsNode(un)) { + switch (value) { + inline else => |inner| try self.walkValue(@TypeOf(inner), inner), + } + } + }, + else => {}, + } + } + + /// Build the replacement text for a `tuple_type_expr`, baking nested tuples + /// recursively. Returns `null` when the node should be left unchanged. + /// + /// Failable multi-return handling — the error channel `!` (an + /// `error_type_expr` element, always last) stays OUTSIDE the `Tuple(...)`: + /// * `(!)` → unchanged (no value tuple). + /// * `(T, !)` → `T !` (single value: drop the parens). + /// * `(T1, T2, !)` → `Tuple(T1, T2) !`. + fn buildTupleTypeText(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr) !?[]const u8 { + // Detect a trailing error-channel marker. + const n = tt.field_types.len; + const has_err = n > 0 and tt.field_types[n - 1].data == .error_type_expr; + + if (has_err) { + const err_node = tt.field_types[n - 1]; + // Raw text of the error marker, e.g. `!` or `!JsonError`. + const err_text = self.source[err_node.span.start..err_node.span.end]; + const value_count = n - 1; + if (value_count == 0) { + // `-> !` (no value tuple) — leave unchanged. + return null; + } + if (value_count == 1) { + // `(T, !)` → `T !` — strip the parens, no Tuple wrapper. + const t_text = try self.migratedTypeElement(tt.field_types[0]); + return try std.fmt.allocPrint(self.allocator, "{s} {s}", .{ t_text, err_text }); + } + // `(T1, T2, ..., !)` → `Tuple(T1, T2, ...) !`. + const inner = try self.buildTypeInner(node, tt, value_count); + return try std.fmt.allocPrint(self.allocator, "Tuple{s} {s}", .{ inner, err_text }); + } + + // Ordinary type tuple: `Tuple(...)`, names keep `:`. + const inner = try self.buildTypeInner(node, tt, n); + return try std.fmt.allocPrint(self.allocator, "Tuple{s}", .{inner}); + } + + /// Build the parenthesized inner `(...)` for a type tuple covering the first + /// `count` field types (a failable return passes `count < field_types.len` to + /// exclude the trailing `!`). Names keep their `:`. A 1-tuple drops its + /// trailing comma. + fn buildTypeInner(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr, count: usize) ![]const u8 { + var out = std.ArrayList(u8).empty; + try out.append(self.allocator, '('); + for (tt.field_types[0..count], 0..) |ft, i| { + if (i != 0) try out.appendSlice(self.allocator, ", "); + // Named type tuple keeps `name: ` verbatim. + if (tt.field_names) |names| { + // Synthetic `_` names mark positional slots — emit nothing. + if (!isSyntheticName(names[i], i)) { + try out.appendSlice(self.allocator, names[i]); + try out.appendSlice(self.allocator, ": "); + } + } + const el_text = try self.migratedTypeElement(ft); + try out.appendSlice(self.allocator, el_text); + } + try out.append(self.allocator, ')'); + _ = node; + return out.toOwnedSlice(self.allocator); + } + + /// Migrate a single TYPE element subtree to text. A nested tuple type is + /// baked recursively; everything else is copied verbatim from source but with + /// any nested tuples inside it rewritten. + fn migratedTypeElement(self: *Walker, ft: *const Node) anyerror![]const u8 { + if (ft.data == .tuple_type_expr) { + const rep = try self.buildTupleTypeText(ft, ft.data.tuple_type_expr); + return rep orelse self.source[ft.span.start..ft.span.end]; + } + return self.migratedSubtree(ft, false); + } + + /// Build the replacement text for a `tuple_literal`, baking nested tuples + /// recursively. Names flip `:` → ` = `. + fn buildTupleValueText(self: *Walker, node: *const Node, tl: ast.TupleLiteral) ![]const u8 { + var out = std.ArrayList(u8).empty; + try out.appendSlice(self.allocator, ".("); + for (tl.elements, 0..) |el, i| { + if (i != 0) try out.appendSlice(self.allocator, ", "); + if (el.name) |name| { + try out.appendSlice(self.allocator, name); + try out.appendSlice(self.allocator, " = "); + } + // Spread element: `..xs` — the parser models it as a spread_expr + // whose operand is the spread target; copy its source verbatim + // (its own nested tuples, if any, get migrated by migratedSubtree). + const el_text = try self.migratedValueElement(el.value); + try out.appendSlice(self.allocator, el_text); + } + try out.append(self.allocator, ')'); + _ = node; + return out.toOwnedSlice(self.allocator); + } + + /// Migrate a single VALUE element subtree to text. A nested tuple literal is + /// baked recursively; everything else is copied verbatim with nested tuples + /// inside rewritten. + fn migratedValueElement(self: *Walker, value: *const Node) anyerror![]const u8 { + if (value.data == .tuple_literal) { + const tl = value.data.tuple_literal; + // A nested tuple in a VALUE position is unambiguously a value (it is + // never itself a direct call-arg), so always rewrite it. + return self.buildTupleValueText(value, tl); + } + return self.migratedSubtree(value, false); + } + + /// Return the migrated text for an arbitrary subtree by collecting the edits + /// its descendants produce (relative to `node.span`) and splicing them into + /// the raw source slice. Worklist entries discovered inside are appended to + /// the shared worklist. This is how a NON-tuple element of a tuple (e.g. a + /// `call` with its own nested tuple args) gets its inner tuples migrated + /// while preserving its surrounding formatting verbatim. + fn migratedSubtree(self: *Walker, node: *const Node, is_call_arg: bool) ![]const u8 { + // Sub-walk with a private edit list but the SHARED worklist. + var sub = Walker{ + .allocator = self.allocator, + .source = self.source, + .worklist = self.worklist, + }; + try sub.walk(node, is_call_arg); + // Carry any worklist entries the sub-walk found back to the parent. + self.worklist = sub.worklist; + + const base = node.span.start; + const raw = self.source[node.span.start..node.span.end]; + if (sub.edits.items.len == 0) return raw; + // Splice sub-edits (offsets are absolute; rebase to the slice). + return applyEditsRebased(self.allocator, raw, base, sub.edits.items); + } + + fn recordWorklist(self: *Walker, node: *const Node) !void { + const lc = lineCol(self.source, node.span.start); + try self.worklist.append(self.allocator, .{ + .line = lc.line, + .col = lc.col, + .text = self.source[node.span.start..node.span.end], + .reason = "ambiguous value-vs-type call arg; resolve to `Tuple(...)` or `.(...)` by hand", + }); + } +}; + +/// A synthetic positional name is exactly `_` for slot `i` (the parser +/// fills these in for positional slots of an otherwise-named tuple). Treat such +/// a name as "no name" so a mixed tuple's positional slots stay positional. +fn isSyntheticName(name: []const u8, i: usize) bool { + if (name.len < 2 or name[0] != '_') return false; + var buf: [24]u8 = undefined; + const expect = std.fmt.bufPrint(&buf, "_{d}", .{i}) catch return false; + return std.mem.eql(u8, name, expect); +} + +/// True when EVERY element of a call-arg `tuple_literal` is a concrete value +/// literal (or an unambiguous value-operator expression over such). Only then is +/// it safe to auto-rewrite the tuple to `.(...)` in call-arg position — anything +/// else (bare identifier, parameterized type `Vec(3)`, qualified path `pkg.T`, +/// empty `()`, ...) is ambiguous and goes to the worklist. +fn tupleIsAllConcreteValues(tl: ast.TupleLiteral) bool { + // An empty `()` in call-arg position is ambiguous (unit type vs empty value). + if (tl.elements.len == 0) return false; + for (tl.elements) |el| { + if (!nodeIsConcreteValue(el.value)) return false; + } + return true; +} + +/// A node is a "concrete value" when it can only denote a runtime value — never +/// a type. Conservative: int/float/string/bool/char literals, null/undef, enum +/// literals, array/struct literals, and value-operator expressions (binary / +/// unary ops, comparisons) whose operands are themselves concrete values. A +/// nested tuple literal of concrete values is concrete too. Everything else +/// (identifiers, calls, field access, parameterized/qualified type syntax, ...) +/// is NOT — it could be or contain a type. +fn nodeIsConcreteValue(node: *const Node) bool { + return switch (node.data) { + .int_literal, + .float_literal, + .bool_literal, + .string_literal, + .null_literal, + .undef_literal, + .enum_literal, + .array_literal, + .struct_literal, + => true, + .binary_op => |b| nodeIsConcreteValue(b.lhs) and nodeIsConcreteValue(b.rhs), + .chained_comparison => |c| blk: { + for (c.operands) |o| { + if (!nodeIsConcreteValue(o)) break :blk false; + } + break :blk true; + }, + .unary_op => |u| nodeIsConcreteValue(u.operand), + .tuple_literal => |t| tupleIsAllConcreteValues(t), + else => false, + }; +} + +/// Comptime: does type `T` (transitively) contain a `*Node` we'd recurse into? +/// Prunes the reflection walk so we never descend into pure-scalar payloads. +fn containsNode(comptime T: type) bool { + if (T == *Node or T == *const Node or T == Node) return true; + return switch (@typeInfo(T)) { + .pointer => |ptr| switch (ptr.size) { + .slice => containsNode(ptr.child), + .one => ptr.child == Node, // *Node handled above; other *X: no + else => false, + }, + .optional => |opt| containsNode(opt.child), + .array => |arr| containsNode(arr.child), + .@"struct" => |st| blk: { + inline for (st.fields) |f| { + if (containsNode(f.type)) break :blk true; + } + break :blk false; + }, + .@"union" => |un| unionContainsNode(un), + else => false, + }; +} + +fn unionContainsNode(comptime un: std.builtin.Type.Union) bool { + inline for (un.fields) |f| { + if (containsNode(f.type)) return true; + } + return false; +} + +const LineCol = struct { line: u32, col: u32 }; + +fn lineCol(source: []const u8, offset: u32) LineCol { + var line: u32 = 1; + var col: u32 = 1; + var i: usize = 0; + while (i < offset and i < source.len) : (i += 1) { + if (source[i] == '\n') { + line += 1; + col = 1; + } else { + col += 1; + } + } + return .{ .line = line, .col = col }; +} + +/// Migrate a source string in memory. Parse-only; never resolves imports or +/// lowers. Returns the rewritten text + any ambiguous worklist entries. +/// +/// `file_path` is used only for diagnostics labeling. +pub fn migrateSource( + allocator: std.mem.Allocator, + io: std.Io, + file_path: []const u8, + source: [:0]const u8, +) !MigrationResult { + var comp = core.Compilation.init(allocator, io, file_path, source, .{}, &.{}); + defer comp.deinit(); + comp.parse() catch { + comp.renderErrors(); + return error.ParseFailed; + }; + const root = comp.root orelse return error.ParseFailed; + return migrateRoot(allocator, source, root); +} + +/// Migrate from an already-parsed `root`. Split from `migrateSource` so unit +/// tests can parse in memory (via `Parser.init`) without an `std.Io`. +pub fn migrateRoot( + allocator: std.mem.Allocator, + source: []const u8, + root: *const Node, +) !MigrationResult { + var walker = Walker{ .allocator = allocator, .source = source }; + for (root.data.root.decls) |decl| { + try walker.walk(decl, false); + } + const output = try applyEdits(allocator, source, walker.edits.items); + return .{ + .output = output, + .worklist = try walker.worklist.toOwnedSlice(allocator), + }; +} + +/// Apply edits to a COPY of the original source. Edits are sorted DESCENDING by +/// start so each splice leaves earlier offsets valid. Overlapping edits are a +/// hard error — the recursive rewrite must emit exactly one edit per outermost +/// tuple, so two edits sharing any byte is a bug. +pub fn applyEdits(allocator: std.mem.Allocator, source: []const u8, edits_in: []const Edit) ![]const u8 { + const edits = try allocator.dupe(Edit, edits_in); + std.mem.sort(Edit, edits, {}, struct { + fn lessThan(_: void, a: Edit, b: Edit) bool { + return a.start > b.start; // descending + } + }.lessThan); + + // Tripwire: after the descending sort, each edit's end must not exceed the + // next (lower-start) edit's start. Any overlap means the recursive rewrite + // double-emitted — refuse to produce corrupt output. + var prev_start: ?u32 = null; + for (edits) |e| { + if (prev_start) |ps| { + if (e.end > ps) return error.OverlappingEdits; + } + prev_start = e.start; + } + + var out = try std.ArrayList(u8).initCapacity(allocator, source.len); + try out.appendSlice(allocator, source); + for (edits) |e| { + // Splice source[e.start..e.end] -> e.replacement. + try out.replaceRange(allocator, e.start, e.end - e.start, e.replacement); + } + return out.toOwnedSlice(allocator); +} + +/// Apply edits whose `start`/`end` are ABSOLUTE source offsets to a `slice` that +/// begins at absolute offset `base`. Used by `migratedSubtree` to splice a +/// non-tuple subtree's inner tuple rewrites into its raw slice. +fn applyEditsRebased(allocator: std.mem.Allocator, slice: []const u8, base: u32, edits_in: []const Edit) ![]const u8 { + var rebased = try allocator.alloc(Edit, edits_in.len); + for (edits_in, 0..) |e, i| { + rebased[i] = .{ .start = e.start - base, .end = e.end - base, .replacement = e.replacement }; + } + return applyEdits(allocator, slice, rebased); +} diff --git a/src/root.zig b/src/root.zig index fb8af253..360dce40 100644 --- a/src/root.zig +++ b/src/root.zig @@ -20,6 +20,8 @@ pub const core = @import("core.zig"); pub const c_import = @import("c_import.zig"); pub const c_import_tests = @import("c_import.test.zig"); pub const corpus_run_tests = @import("corpus_run.test.zig"); +pub const migrate = @import("migrate.zig"); +pub const migrate_tests = @import("migrate.test.zig"); pub const ir = @import("ir/ir.zig"); pub const lsp = struct {