add sx migrate tuple-syntax migration tool

Temporary scaffolding for the tuple-syntax cutover. Parses old-grammar
.sx and rewrites tuple syntax to the new spelling:
  - tuple TYPES   `(A, B)`        -> `Tuple(A, B)`   (named keeps `:`)
  - tuple VALUES  `(a, b)`        -> `.(a, b)`        (named flips `:` -> `=`)
  - 1-tuples / empty / spread     -> `.(x)` / `.()` / `.(..xs)`, `Tuple(..Ts)`
  - failable returns: the `!` channel stays OUTSIDE Tuple
      `-> (T, !)`        -> `-> T !`
      `-> (T1, T2, !)`   -> `-> Tuple(T1, T2) !`

AST-walk based: rewrites only `tuple_literal` / `tuple_type_expr` nodes
(function types, param lists, match bindings, arrays, struct literals,
Closure sigs, groupings are left untouched). Nested tuples rewrite
recursively as a single non-overlapping edit per outermost tuple.

Value-vs-type ambiguity (call-arg tuples whose elements could be types,
e.g. `size_of((Box, i32))`, empty `()`) is never guessed: such sites go
to a worklist. A non-empty worklist exits nonzero and suppresses the
"looks-done" stdout output unless `--force` is passed.

`sx migrate <f>` prints migrated source; `--dry-run` prints only the
worklist. Built against the old grammar; removed after the cutover.
This commit is contained in:
agra
2026-06-25 15:23:18 +03:00
parent 820cd62fa1
commit c882c6c63e
4 changed files with 918 additions and 0 deletions

View File

@@ -23,6 +23,13 @@ pub fn main(init: std.process.Init) !void {
return;
}
// `migrate` has its own flag (`--dry-run`) the generic flag loop below would
// reject, so dispatch it here before that loop runs.
if (std.mem.eql(u8, command, "migrate")) {
runMigrate(allocator, io, args[2..]);
return;
}
// Parse flags and positional arguments
var input_path: ?[]const u8 = null;
var target_config = sx.target.TargetConfig{};
@@ -407,6 +414,7 @@ fn printUsage() void {
\\ ir Print LLVM IR to stdout
\\ asm Emit assembly (.s) file
\\ lsp Start language server (LSP)
\\ migrate Rewrite old tuple syntax to new (`(a,b)`->`.(a,b)`, type `(A,B)`->`Tuple(A,B)`); `--dry-run` prints only the worklist, `--force` emits output despite unmigrated ambiguous sites
\\
\\Options:
\\ --target <target> Target triple or shorthand: wasm, macos, linux, windows, ios, ios-sim (default: host)
@@ -517,6 +525,72 @@ fn compilePipeline(allocator: std.mem.Allocator, io: std.Io, input_path: []const
return comp;
}
/// `sx migrate [--dry-run] [--force] <file.sx>` — tuple-syntax migration tool.
///
/// Without flags: parse-only, rewrite the old tuple syntax, print the migrated
/// source to stdout and any ambiguous-site worklist entries to stderr. A
/// NON-EMPTY worklist is a hard failure (exit 2) — the migration is incomplete,
/// so we do NOT print the rewritten source (which could be redirected over the
/// input, silently shipping half-migrated code) unless `--force` is passed.
///
/// With `--dry-run`: print ONLY the worklist (to stderr), no rewritten source —
/// so ambiguous sites can be audited first. A non-empty worklist still exits 2.
///
/// With `--force`: print the rewritten source even when the worklist is
/// non-empty (the ambiguous sites are left in the OLD syntax). Exit is still 2
/// so a script can detect the partial migration.
fn runMigrate(allocator: std.mem.Allocator, io: std.Io, sub_args: []const []const u8) void {
var dry_run = false;
var force = false;
var input_path: ?[]const u8 = null;
for (sub_args) |a| {
if (std.mem.eql(u8, a, "--dry-run")) {
dry_run = true;
} else if (std.mem.eql(u8, a, "--force")) {
force = true;
} else if (std.mem.startsWith(u8, a, "-")) {
std.debug.print("error: unknown flag '{s}' for migrate\n", .{a});
std.process.exit(1);
} else {
input_path = a;
}
}
const path = input_path orelse {
std.debug.print("usage: sx migrate [--dry-run] [--force] <file.sx>\n", .{});
std.process.exit(1);
};
const source = readSource(allocator, io, path) catch |err| {
std.debug.print("error: cannot read '{s}': {}\n", .{ path, err });
std.process.exit(1);
};
const result = sx.migrate.migrateSource(allocator, io, path, source) catch |err| {
std.debug.print("error: migrate failed for '{s}': {}\n", .{ path, err });
std.process.exit(1);
};
// Worklist (ambiguous sites) always goes to stderr.
for (result.worklist) |w| {
std.debug.print("{s}:{d}:{d}: {s}: {s}\n", .{ path, w.line, w.col, w.reason, w.text });
}
const has_worklist = result.worklist.len > 0;
// Emit the rewritten source unless we'd be shipping a half-migrated file: a
// non-empty worklist in non-dry-run mode suppresses output unless --force.
if (!dry_run and (!has_worklist or force)) {
_ = std.c.write(1, result.output.ptr, result.output.len);
}
if (has_worklist) {
std.debug.print(
"{d} ambiguous site(s) unmigrated; resolve by hand or pass --force\n",
.{result.worklist.len},
);
std.process.exit(2);
}
}
fn dumpSxIR(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8, stdlib_paths: []const []const u8) !void {
const source = try readSource(allocator, io, input_path);
var comp = sx.core.Compilation.init(allocator, io, input_path, source, .{}, stdlib_paths);

330
src/migrate.test.zig Normal file
View File

@@ -0,0 +1,330 @@
// Tests for migrate.zig — the `sx migrate` tuple-syntax rewriter.
//
// Each case parses an in-memory snippet (full decls, so it parses standalone),
// runs the AST-walk migrator, and asserts the rewritten text and/or worklist.
// The compiler grammar is UNCHANGED here: the migrator READS the old tuple
// syntax `(a, b)` / `(A, B)` and EMITS the new `.(a, b)` / `Tuple(A, B)` text.
const std = @import("std");
const Parser = @import("parser.zig").Parser;
const migrate = @import("migrate.zig");
/// Parse `src` (must be valid old-syntax sx decls), migrate, return the
/// rewritten text. Asserts the worklist is empty (use `runWith` for ambiguous
/// cases).
fn run(alloc: std.mem.Allocator, src: [:0]const u8) ![]const u8 {
const res = try runWith(alloc, src);
try std.testing.expectEqual(@as(usize, 0), res.worklist.len);
return res.output;
}
fn runWith(alloc: std.mem.Allocator, src: [:0]const u8) !migrate.MigrationResult {
var parser = Parser.init(alloc, src);
const root = try parser.parse();
return migrate.migrateRoot(alloc, src, root);
}
/// Assert that `needle` appears in `haystack` (substring), with a helpful
/// failure message that prints the full migrated text.
fn expectContains(haystack: []const u8, needle: []const u8) !void {
if (std.mem.indexOf(u8, haystack, needle) == null) {
std.debug.print("\nexpected to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack });
return error.NotFound;
}
}
fn expectNotContains(haystack: []const u8, needle: []const u8) !void {
if (std.mem.indexOf(u8, haystack, needle) != null) {
std.debug.print("\nexpected NOT to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack });
return error.UnexpectedlyFound;
}
}
// ── VALUE tuples → .(...) ────────────────────────────────────────────────
test "migrate value: positional (40,2) -> .(40,2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := (40, 2); }\n");
try expectContains(out, ".(40, 2)");
try expectNotContains(out, " (40, 2)"); // the old, un-dotted form is gone
}
test "migrate value: named (x:1,y:2) -> .(x = 1, y = 2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := (x: 1, y: 2); }\n");
try expectContains(out, ".(x = 1, y = 2)");
}
test "migrate value: 1-tuple (x,) -> .(x)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { y := 9; x := (y,); }\n");
try expectContains(out, ".(y)");
try expectNotContains(out, "(y,)");
}
test "migrate value: spread (..xs) -> .(..xs)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: (xs: i32) { t := (..xs); }\n");
try expectContains(out, ".(..xs)");
}
test "migrate value: operator operands (1,2)==(1,2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { b := (1, 2) == (1, 2); }\n");
// Both operands rewritten.
try expectContains(out, ".(1, 2) == .(1, 2)");
}
test "migrate value+type: return body -> Tuple(i64,i64){ .(b,a) }" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\swap :: (a: i64, b: i64) -> (i64, i64) { (b, a) }
\\
);
try expectContains(out, "-> Tuple(i64, i64)");
try expectContains(out, ".(b, a)");
}
test "migrate value: empty () value -> .()" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
// `x := ()` — empty tuple value.
const out = try run(arena.allocator(), "f :: () { x := (); }\n");
try expectContains(out, ".()");
}
// ── TYPE tuples → Tuple(...) ─────────────────────────────────────────────
test "migrate type: annotation a:(i32,string) -> a:Tuple(i32,string)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : (i32, string) = ---; }\n");
try expectContains(out, "Tuple(i32, string)");
}
test "migrate type: named (x:i32,y:string) -> Tuple(x: i32, y: string) keeps colon" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : (x: i32, y: string) = ---; }\n");
try expectContains(out, "Tuple(x: i32, y: string)");
}
test "migrate type: struct field xs:(i32,i32) -> Tuple(i32,i32)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (i32, i32); }\n");
try expectContains(out, "Tuple(i32, i32)");
}
test "migrate type: pack (..Ts) -> Tuple(..Ts)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (..Ts); }\n");
try expectContains(out, "Tuple(..Ts)");
}
test "migrate type: 1-tuple (T,) -> Tuple(T) drops comma" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (i32,); }\n");
try expectContains(out, "Tuple(i32)");
try expectNotContains(out, "(i32,)");
}
// ── Worklist: ambiguous value-vs-type call arg ──────────────────────────
test "migrate worklist: size_of((Box,i32)) is NOT rewritten, records worklist" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(),
\\f :: () { n := size_of((Box, i32)); }
\\
);
// Ambiguous inner tuple left untouched: no `.(` rewrite of `(Box, i32)`.
try expectNotContains(res.output, ".(Box, i32)");
try expectContains(res.output, "(Box, i32)");
// One worklist entry recorded.
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
try expectContains(res.worklist[0].text, "(Box, i32)");
}
test "migrate value: call arg with literal-only tuple IS rewritten" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
// `take((1, 2))` — all elements are concrete values → safe to rewrite.
const res = try runWith(arena.allocator(), "f :: () { take((1, 2)); }\n");
try expectContains(res.output, ".(1, 2)");
try std.testing.expectEqual(@as(usize, 0), res.worklist.len);
}
// ── Nested tuples (recursive rewrite, ONE edit per outermost tuple) ──────
test "migrate nested value: ((1,2),(3,4)) -> .(.(1, 2), .(3, 4))" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := ((1, 2), (3, 4)); }\n");
try expectContains(out, ".(.(1, 2), .(3, 4))");
// No stray un-migrated inner tuple, no trailing junk paren.
try expectNotContains(out, ".(1, 2), 3)");
}
test "migrate nested value: ((1,2),3) -> .(.(1, 2), 3)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := ((1, 2), 3); }\n");
try expectContains(out, ".(.(1, 2), 3)");
try expectNotContains(out, "(1, 2), 3))"); // the broken old output
}
test "migrate nested named value: (a:(p:1,q:2),b:3) -> .(a = .(p = 1, q = 2), b = 3)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { n := (a: (p: 1, q: 2), b: 3); }\n");
try expectContains(out, ".(a = .(p = 1, q = 2), b = 3)");
}
test "migrate nested type: ((i32,i32),i64) -> Tuple(Tuple(i32, i32), i64)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : ((i32, i32), i64) = ---; }\n");
try expectContains(out, "Tuple(Tuple(i32, i32), i64)");
}
// ── Failable multi-returns: `!` channel stays OUTSIDE Tuple(...) ─────────
test "migrate failable: -> (T, !) -> -> T !" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> (i32, !) { }\n");
try expectContains(out, "-> i32 !");
try expectNotContains(out, "Tuple(");
try expectNotContains(out, ".(");
}
test "migrate failable: -> (T, !Named) keeps the named set" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\E :: error { Bad }
\\f :: () -> (i32, !E) { }
\\
);
try expectContains(out, "-> i32 !E");
try expectNotContains(out, "Tuple(");
}
test "migrate failable: -> (T1, T2, !) -> -> Tuple(T1, T2) !" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> (i32, i64, !) { }\n");
try expectContains(out, "-> Tuple(i32, i64) !");
}
test "migrate failable: bare -> ! unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> ! { }\n");
try expectContains(out, "-> !");
try expectNotContains(out, "Tuple");
}
// ── Inverted call-arg classification (conservative) ─────────────────────
test "migrate worklist: empty () call arg is worklisted (unit type ambiguity)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of(()); }\n");
// NOT silently rewritten to `.()`.
try expectNotContains(res.output, "size_of(.())");
try expectContains(res.output, "size_of(())");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
test "migrate worklist: Vec(3) call-arg element is worklisted" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of((Vec(3), i32)); }\n");
try expectNotContains(res.output, ".(Vec(3), i32)");
try expectContains(res.output, "(Vec(3), i32)");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
test "migrate worklist: pkg.T qualified path call-arg element is worklisted" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of((pkg.T, i32)); }\n");
try expectNotContains(res.output, ".(pkg.T, i32)");
try expectContains(res.output, "(pkg.T, i32)");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
// ── Negatives: distinct AST nodes must NOT be touched ────────────────────
test "migrate negative: function type (i32,i32)->i32 unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { g : (i32, i32) -> i32 = ---; }\n");
try expectContains(out, "(i32, i32) -> i32");
try expectNotContains(out, "Tuple(i32, i32)");
}
test "migrate negative: function param list (self:*T,x:i32) unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct {}\nm :: (self: *S, x: i32) { }\n");
try expectContains(out, "(self: *S, x: i32)");
try expectNotContains(out, "Tuple(");
try expectNotContains(out, ".(self");
}
test "migrate negative: array literal .[1,2,3] unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a := .[1, 2, 3]; }\n");
try expectContains(out, ".[1, 2, 3]");
}
test "migrate negative: struct literal .{x=1} unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a := .{ x = 1 }; }\n");
try expectContains(out, ".{ x = 1 }");
}
test "migrate negative: Closure(i32)->i32 type unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { c : Closure(i32) -> i32 = ---; }\n");
try expectContains(out, "Closure(i32) -> i32");
try expectNotContains(out, "Tuple(");
}
test "migrate negative: grouping (a+b)*c unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: (a: i32, b: i32, c: i32) { x := (a + b) * c; }\n");
try expectContains(out, "(a + b) * c");
try expectNotContains(out, ".(a + b)");
}
test "migrate negative: match capture case .some: (val) unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\check :: (v: ?i32) -> i32 {
\\ return if v == {
\\ case .some: (val) { val }
\\ case .none: { 0 }
\\ };
\\}
\\
);
try expectContains(out, "case .some: (val)");
try expectNotContains(out, ".(val)");
}

512
src/migrate.zig Normal file
View File

@@ -0,0 +1,512 @@
//! Tuple-syntax migration tool (`sx migrate`).
//!
//! Reads OLD-syntax `.sx` source (tuple TYPES `(A, B)`, tuple VALUES `(a, b)`)
//! and emits NEW-syntax text (`Tuple(A, B)` / `.(a, b)`). The compiler grammar
//! is UNCHANGED — this tool only reads the old syntax and rewrites it as text.
//!
//! Strategy: parse-only (read -> Compilation -> parse), then walk the parsed
//! AST with a comptime-reflection child walker that recurses into every
//! `*Node`-bearing field of every node variant. Two node kinds drive a rewrite:
//!
//! * `tuple_type_expr` — produced by the parser in grammatically-forced TYPE
//! positions (`-> (...)`, `: (...)` annotations, struct-field/param types).
//! Rewritten to `Tuple(...)`. SPECIAL CASE: a failable multi-return whose
//! last element is the error-channel marker `!` keeps the channel OUTSIDE
//! the `Tuple(...)` (see `rewriteTupleType`).
//!
//! * `tuple_literal` — produced in VALUE positions. Rewritten to `.(...)`.
//! In CALL-ARG position the value/type distinction is ambiguous, so we only
//! auto-rewrite when EVERY element is a concrete value literal; anything
//! else (bare identifier, `Vec(3)`, `pkg.T`, empty `()`, ...) is recorded on
//! the worklist and left untouched — never guess (CLAUDE.md silent-fallback
//! rule).
//!
//! Nesting: the rewrite is RECURSIVE but emits exactly ONE edit per OUTERMOST
//! tuple. The replacement text for a tuple is built by recursively migrating its
//! nested tuple elements (and any non-tuple subexpressions, e.g. calls) directly
//! into that text. We never emit a separate, overlapping child edit for anything
//! inside a tuple's span — `applyEdits` asserts non-overlap as a tripwire.
//!
//! Edits are collected against the ORIGINAL source byte offsets and applied
//! DESCENDING by start offset so earlier offsets stay valid; comments and
//! formatting outside the edited spans are preserved verbatim.
const std = @import("std");
const ast = @import("ast.zig");
const core = @import("core.zig");
const Node = ast.Node;
/// A single text replacement against the original source: `source[start..end]`
/// becomes `replacement`.
pub const Edit = struct {
start: u32,
end: u32,
replacement: []const u8,
};
/// An ambiguous site we refused to rewrite. `line`/`col` are 1-based.
pub const Worklist = struct {
line: u32,
col: u32,
text: []const u8,
reason: []const u8,
};
pub const MigrationResult = struct {
/// The rewritten source (a fresh allocation owning its bytes).
output: []const u8,
/// Ambiguous sites left untouched, in source order.
worklist: []const Worklist,
};
/// Walk state: collects edits + worklist entries while recursing the AST.
const Walker = struct {
allocator: std.mem.Allocator,
source: []const u8,
edits: std.ArrayList(Edit) = .empty,
worklist: std.ArrayList(Worklist) = .empty,
/// Recurse into `node`. `is_call_arg` is true when this node is a DIRECT
/// argument of a `call` / `ffi_intrinsic_call` — the only context in which a
/// `tuple_literal` may be value-vs-type ambiguous.
///
/// On hitting an OUTERMOST tuple we compute its full replacement (recursively
/// baking any nested tuples / subexprs into the text) and emit a SINGLE edit;
/// we do NOT continue the edit-emitting walk into the tuple's span (that would
/// produce overlapping edits). Worklist collection for ambiguous nested
/// call-args still happens, inside the recursive text builder.
fn walk(self: *Walker, node: *const Node, is_call_arg: bool) anyerror!void {
switch (node.data) {
.tuple_type_expr => |tt| {
const replacement = try self.buildTupleTypeText(node, tt);
if (replacement) |rep| {
try self.edits.append(self.allocator, .{
.start = node.span.start,
.end = node.span.end,
.replacement = rep,
});
}
// Do NOT recurse into the tuple's element subtrees here — they
// are already baked into `replacement`. (A `null` replacement
// means "leave unchanged"; that only happens for `-> !`, which
// has no value elements to rewrite anyway.)
return;
},
.tuple_literal => |tl| {
if (is_call_arg and !tupleIsAllConcreteValues(tl)) {
// Ambiguous in call-arg position (could be a type argument,
// a parameterized type, a qualified path, the unit type
// `()`, ...). Refuse to guess — record + leave untouched, and
// keep walking into elements so nested unambiguous tuples are
// still migrated.
try self.recordWorklist(node);
for (tl.elements) |el| try self.walk(el.value, false);
} else {
const rep = try self.buildTupleValueText(node, tl);
try self.edits.append(self.allocator, .{
.start = node.span.start,
.end = node.span.end,
.replacement = rep,
});
}
return;
},
// A `call`'s direct args get the call-arg flag; the callee does not.
.call => |c| {
try self.walk(c.callee, false);
for (c.args) |a| try self.walk(a, true);
return;
},
.ffi_intrinsic_call => |c| {
try self.walk(c.return_type, false);
for (c.args) |a| try self.walk(a, true);
return;
},
else => {},
}
// Generic recursion for every other node: visit each child *Node found
// by reflection over the active union payload. Call-arg context does NOT
// propagate past a non-call node.
try self.walkChildren(node);
}
/// Reflect over the active payload of `node.data` and recurse into every
/// `*Node` reachable through its fields (directly, through optionals,
/// slices, and nested aggregate structs/unions).
fn walkChildren(self: *Walker, node: *const Node) anyerror!void {
switch (node.data) {
inline else => |payload| {
try self.walkValue(@TypeOf(payload), payload);
},
}
}
/// Recurse into any `*Node` reachable from `value` of type `T`.
fn walkValue(self: *Walker, comptime T: type, value: T) anyerror!void {
if (T == *Node or T == *const Node) {
try self.walk(value, false);
return;
}
switch (@typeInfo(T)) {
.pointer => |ptr| {
switch (ptr.size) {
.slice => {
if (comptime containsNode(ptr.child)) {
for (value) |elem| try self.walkValue(ptr.child, elem);
}
},
// Non-slice pointers other than *Node (handled above) carry
// no AST children we rewrite.
else => {},
}
},
.optional => |opt| {
if (comptime containsNode(opt.child)) {
if (value) |inner| try self.walkValue(opt.child, inner);
}
},
.@"struct" => |st| {
inline for (st.fields) |f| {
if (comptime containsNode(f.type)) {
try self.walkValue(f.type, @field(value, f.name));
}
}
},
.@"union" => |un| {
if (comptime unionContainsNode(un)) {
switch (value) {
inline else => |inner| try self.walkValue(@TypeOf(inner), inner),
}
}
},
else => {},
}
}
/// Build the replacement text for a `tuple_type_expr`, baking nested tuples
/// recursively. Returns `null` when the node should be left unchanged.
///
/// Failable multi-return handling — the error channel `!` (an
/// `error_type_expr` element, always last) stays OUTSIDE the `Tuple(...)`:
/// * `(!)` → unchanged (no value tuple).
/// * `(T, !)` → `T !` (single value: drop the parens).
/// * `(T1, T2, !)` → `Tuple(T1, T2) !`.
fn buildTupleTypeText(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr) !?[]const u8 {
// Detect a trailing error-channel marker.
const n = tt.field_types.len;
const has_err = n > 0 and tt.field_types[n - 1].data == .error_type_expr;
if (has_err) {
const err_node = tt.field_types[n - 1];
// Raw text of the error marker, e.g. `!` or `!JsonError`.
const err_text = self.source[err_node.span.start..err_node.span.end];
const value_count = n - 1;
if (value_count == 0) {
// `-> !` (no value tuple) — leave unchanged.
return null;
}
if (value_count == 1) {
// `(T, !)` → `T !` — strip the parens, no Tuple wrapper.
const t_text = try self.migratedTypeElement(tt.field_types[0]);
return try std.fmt.allocPrint(self.allocator, "{s} {s}", .{ t_text, err_text });
}
// `(T1, T2, ..., !)` → `Tuple(T1, T2, ...) !`.
const inner = try self.buildTypeInner(node, tt, value_count);
return try std.fmt.allocPrint(self.allocator, "Tuple{s} {s}", .{ inner, err_text });
}
// Ordinary type tuple: `Tuple(...)`, names keep `:`.
const inner = try self.buildTypeInner(node, tt, n);
return try std.fmt.allocPrint(self.allocator, "Tuple{s}", .{inner});
}
/// Build the parenthesized inner `(...)` for a type tuple covering the first
/// `count` field types (a failable return passes `count < field_types.len` to
/// exclude the trailing `!`). Names keep their `:`. A 1-tuple drops its
/// trailing comma.
fn buildTypeInner(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr, count: usize) ![]const u8 {
var out = std.ArrayList(u8).empty;
try out.append(self.allocator, '(');
for (tt.field_types[0..count], 0..) |ft, i| {
if (i != 0) try out.appendSlice(self.allocator, ", ");
// Named type tuple keeps `name: ` verbatim.
if (tt.field_names) |names| {
// Synthetic `_<i>` names mark positional slots — emit nothing.
if (!isSyntheticName(names[i], i)) {
try out.appendSlice(self.allocator, names[i]);
try out.appendSlice(self.allocator, ": ");
}
}
const el_text = try self.migratedTypeElement(ft);
try out.appendSlice(self.allocator, el_text);
}
try out.append(self.allocator, ')');
_ = node;
return out.toOwnedSlice(self.allocator);
}
/// Migrate a single TYPE element subtree to text. A nested tuple type is
/// baked recursively; everything else is copied verbatim from source but with
/// any nested tuples inside it rewritten.
fn migratedTypeElement(self: *Walker, ft: *const Node) anyerror![]const u8 {
if (ft.data == .tuple_type_expr) {
const rep = try self.buildTupleTypeText(ft, ft.data.tuple_type_expr);
return rep orelse self.source[ft.span.start..ft.span.end];
}
return self.migratedSubtree(ft, false);
}
/// Build the replacement text for a `tuple_literal`, baking nested tuples
/// recursively. Names flip `:` → ` = `.
fn buildTupleValueText(self: *Walker, node: *const Node, tl: ast.TupleLiteral) ![]const u8 {
var out = std.ArrayList(u8).empty;
try out.appendSlice(self.allocator, ".(");
for (tl.elements, 0..) |el, i| {
if (i != 0) try out.appendSlice(self.allocator, ", ");
if (el.name) |name| {
try out.appendSlice(self.allocator, name);
try out.appendSlice(self.allocator, " = ");
}
// Spread element: `..xs` — the parser models it as a spread_expr
// whose operand is the spread target; copy its source verbatim
// (its own nested tuples, if any, get migrated by migratedSubtree).
const el_text = try self.migratedValueElement(el.value);
try out.appendSlice(self.allocator, el_text);
}
try out.append(self.allocator, ')');
_ = node;
return out.toOwnedSlice(self.allocator);
}
/// Migrate a single VALUE element subtree to text. A nested tuple literal is
/// baked recursively; everything else is copied verbatim with nested tuples
/// inside rewritten.
fn migratedValueElement(self: *Walker, value: *const Node) anyerror![]const u8 {
if (value.data == .tuple_literal) {
const tl = value.data.tuple_literal;
// A nested tuple in a VALUE position is unambiguously a value (it is
// never itself a direct call-arg), so always rewrite it.
return self.buildTupleValueText(value, tl);
}
return self.migratedSubtree(value, false);
}
/// Return the migrated text for an arbitrary subtree by collecting the edits
/// its descendants produce (relative to `node.span`) and splicing them into
/// the raw source slice. Worklist entries discovered inside are appended to
/// the shared worklist. This is how a NON-tuple element of a tuple (e.g. a
/// `call` with its own nested tuple args) gets its inner tuples migrated
/// while preserving its surrounding formatting verbatim.
fn migratedSubtree(self: *Walker, node: *const Node, is_call_arg: bool) ![]const u8 {
// Sub-walk with a private edit list but the SHARED worklist.
var sub = Walker{
.allocator = self.allocator,
.source = self.source,
.worklist = self.worklist,
};
try sub.walk(node, is_call_arg);
// Carry any worklist entries the sub-walk found back to the parent.
self.worklist = sub.worklist;
const base = node.span.start;
const raw = self.source[node.span.start..node.span.end];
if (sub.edits.items.len == 0) return raw;
// Splice sub-edits (offsets are absolute; rebase to the slice).
return applyEditsRebased(self.allocator, raw, base, sub.edits.items);
}
fn recordWorklist(self: *Walker, node: *const Node) !void {
const lc = lineCol(self.source, node.span.start);
try self.worklist.append(self.allocator, .{
.line = lc.line,
.col = lc.col,
.text = self.source[node.span.start..node.span.end],
.reason = "ambiguous value-vs-type call arg; resolve to `Tuple(...)` or `.(...)` by hand",
});
}
};
/// A synthetic positional name is exactly `_<i>` for slot `i` (the parser
/// fills these in for positional slots of an otherwise-named tuple). Treat such
/// a name as "no name" so a mixed tuple's positional slots stay positional.
fn isSyntheticName(name: []const u8, i: usize) bool {
if (name.len < 2 or name[0] != '_') return false;
var buf: [24]u8 = undefined;
const expect = std.fmt.bufPrint(&buf, "_{d}", .{i}) catch return false;
return std.mem.eql(u8, name, expect);
}
/// True when EVERY element of a call-arg `tuple_literal` is a concrete value
/// literal (or an unambiguous value-operator expression over such). Only then is
/// it safe to auto-rewrite the tuple to `.(...)` in call-arg position — anything
/// else (bare identifier, parameterized type `Vec(3)`, qualified path `pkg.T`,
/// empty `()`, ...) is ambiguous and goes to the worklist.
fn tupleIsAllConcreteValues(tl: ast.TupleLiteral) bool {
// An empty `()` in call-arg position is ambiguous (unit type vs empty value).
if (tl.elements.len == 0) return false;
for (tl.elements) |el| {
if (!nodeIsConcreteValue(el.value)) return false;
}
return true;
}
/// A node is a "concrete value" when it can only denote a runtime value — never
/// a type. Conservative: int/float/string/bool/char literals, null/undef, enum
/// literals, array/struct literals, and value-operator expressions (binary /
/// unary ops, comparisons) whose operands are themselves concrete values. A
/// nested tuple literal of concrete values is concrete too. Everything else
/// (identifiers, calls, field access, parameterized/qualified type syntax, ...)
/// is NOT — it could be or contain a type.
fn nodeIsConcreteValue(node: *const Node) bool {
return switch (node.data) {
.int_literal,
.float_literal,
.bool_literal,
.string_literal,
.null_literal,
.undef_literal,
.enum_literal,
.array_literal,
.struct_literal,
=> true,
.binary_op => |b| nodeIsConcreteValue(b.lhs) and nodeIsConcreteValue(b.rhs),
.chained_comparison => |c| blk: {
for (c.operands) |o| {
if (!nodeIsConcreteValue(o)) break :blk false;
}
break :blk true;
},
.unary_op => |u| nodeIsConcreteValue(u.operand),
.tuple_literal => |t| tupleIsAllConcreteValues(t),
else => false,
};
}
/// Comptime: does type `T` (transitively) contain a `*Node` we'd recurse into?
/// Prunes the reflection walk so we never descend into pure-scalar payloads.
fn containsNode(comptime T: type) bool {
if (T == *Node or T == *const Node or T == Node) return true;
return switch (@typeInfo(T)) {
.pointer => |ptr| switch (ptr.size) {
.slice => containsNode(ptr.child),
.one => ptr.child == Node, // *Node handled above; other *X: no
else => false,
},
.optional => |opt| containsNode(opt.child),
.array => |arr| containsNode(arr.child),
.@"struct" => |st| blk: {
inline for (st.fields) |f| {
if (containsNode(f.type)) break :blk true;
}
break :blk false;
},
.@"union" => |un| unionContainsNode(un),
else => false,
};
}
fn unionContainsNode(comptime un: std.builtin.Type.Union) bool {
inline for (un.fields) |f| {
if (containsNode(f.type)) return true;
}
return false;
}
const LineCol = struct { line: u32, col: u32 };
fn lineCol(source: []const u8, offset: u32) LineCol {
var line: u32 = 1;
var col: u32 = 1;
var i: usize = 0;
while (i < offset and i < source.len) : (i += 1) {
if (source[i] == '\n') {
line += 1;
col = 1;
} else {
col += 1;
}
}
return .{ .line = line, .col = col };
}
/// Migrate a source string in memory. Parse-only; never resolves imports or
/// lowers. Returns the rewritten text + any ambiguous worklist entries.
///
/// `file_path` is used only for diagnostics labeling.
pub fn migrateSource(
allocator: std.mem.Allocator,
io: std.Io,
file_path: []const u8,
source: [:0]const u8,
) !MigrationResult {
var comp = core.Compilation.init(allocator, io, file_path, source, .{}, &.{});
defer comp.deinit();
comp.parse() catch {
comp.renderErrors();
return error.ParseFailed;
};
const root = comp.root orelse return error.ParseFailed;
return migrateRoot(allocator, source, root);
}
/// Migrate from an already-parsed `root`. Split from `migrateSource` so unit
/// tests can parse in memory (via `Parser.init`) without an `std.Io`.
pub fn migrateRoot(
allocator: std.mem.Allocator,
source: []const u8,
root: *const Node,
) !MigrationResult {
var walker = Walker{ .allocator = allocator, .source = source };
for (root.data.root.decls) |decl| {
try walker.walk(decl, false);
}
const output = try applyEdits(allocator, source, walker.edits.items);
return .{
.output = output,
.worklist = try walker.worklist.toOwnedSlice(allocator),
};
}
/// Apply edits to a COPY of the original source. Edits are sorted DESCENDING by
/// start so each splice leaves earlier offsets valid. Overlapping edits are a
/// hard error — the recursive rewrite must emit exactly one edit per outermost
/// tuple, so two edits sharing any byte is a bug.
pub fn applyEdits(allocator: std.mem.Allocator, source: []const u8, edits_in: []const Edit) ![]const u8 {
const edits = try allocator.dupe(Edit, edits_in);
std.mem.sort(Edit, edits, {}, struct {
fn lessThan(_: void, a: Edit, b: Edit) bool {
return a.start > b.start; // descending
}
}.lessThan);
// Tripwire: after the descending sort, each edit's end must not exceed the
// next (lower-start) edit's start. Any overlap means the recursive rewrite
// double-emitted — refuse to produce corrupt output.
var prev_start: ?u32 = null;
for (edits) |e| {
if (prev_start) |ps| {
if (e.end > ps) return error.OverlappingEdits;
}
prev_start = e.start;
}
var out = try std.ArrayList(u8).initCapacity(allocator, source.len);
try out.appendSlice(allocator, source);
for (edits) |e| {
// Splice source[e.start..e.end] -> e.replacement.
try out.replaceRange(allocator, e.start, e.end - e.start, e.replacement);
}
return out.toOwnedSlice(allocator);
}
/// Apply edits whose `start`/`end` are ABSOLUTE source offsets to a `slice` that
/// begins at absolute offset `base`. Used by `migratedSubtree` to splice a
/// non-tuple subtree's inner tuple rewrites into its raw slice.
fn applyEditsRebased(allocator: std.mem.Allocator, slice: []const u8, base: u32, edits_in: []const Edit) ![]const u8 {
var rebased = try allocator.alloc(Edit, edits_in.len);
for (edits_in, 0..) |e, i| {
rebased[i] = .{ .start = e.start - base, .end = e.end - base, .replacement = e.replacement };
}
return applyEdits(allocator, slice, rebased);
}

View File

@@ -20,6 +20,8 @@ pub const core = @import("core.zig");
pub const c_import = @import("c_import.zig");
pub const c_import_tests = @import("c_import.test.zig");
pub const corpus_run_tests = @import("corpus_run.test.zig");
pub const migrate = @import("migrate.zig");
pub const migrate_tests = @import("migrate.test.zig");
pub const ir = @import("ir/ir.zig");
pub const lsp = struct {