add sx migrate tuple-syntax migration tool

Temporary scaffolding for the tuple-syntax cutover. Parses old-grammar
.sx and rewrites tuple syntax to the new spelling:
  - tuple TYPES   `(A, B)`        -> `Tuple(A, B)`   (named keeps `:`)
  - tuple VALUES  `(a, b)`        -> `.(a, b)`        (named flips `:` -> `=`)
  - 1-tuples / empty / spread     -> `.(x)` / `.()` / `.(..xs)`, `Tuple(..Ts)`
  - failable returns: the `!` channel stays OUTSIDE Tuple
      `-> (T, !)`        -> `-> T !`
      `-> (T1, T2, !)`   -> `-> Tuple(T1, T2) !`

AST-walk based: rewrites only `tuple_literal` / `tuple_type_expr` nodes
(function types, param lists, match bindings, arrays, struct literals,
Closure sigs, groupings are left untouched). Nested tuples rewrite
recursively as a single non-overlapping edit per outermost tuple.

Value-vs-type ambiguity (call-arg tuples whose elements could be types,
e.g. `size_of((Box, i32))`, empty `()`) is never guessed: such sites go
to a worklist. A non-empty worklist exits nonzero and suppresses the
"looks-done" stdout output unless `--force` is passed.

`sx migrate <f>` prints migrated source; `--dry-run` prints only the
worklist. Built against the old grammar; removed after the cutover.
This commit is contained in:
agra
2026-06-25 15:23:18 +03:00
parent 820cd62fa1
commit c882c6c63e
4 changed files with 918 additions and 0 deletions

View File

@@ -23,6 +23,13 @@ pub fn main(init: std.process.Init) !void {
return; return;
} }
// `migrate` has its own flag (`--dry-run`) the generic flag loop below would
// reject, so dispatch it here before that loop runs.
if (std.mem.eql(u8, command, "migrate")) {
runMigrate(allocator, io, args[2..]);
return;
}
// Parse flags and positional arguments // Parse flags and positional arguments
var input_path: ?[]const u8 = null; var input_path: ?[]const u8 = null;
var target_config = sx.target.TargetConfig{}; var target_config = sx.target.TargetConfig{};
@@ -407,6 +414,7 @@ fn printUsage() void {
\\ ir Print LLVM IR to stdout \\ ir Print LLVM IR to stdout
\\ asm Emit assembly (.s) file \\ asm Emit assembly (.s) file
\\ lsp Start language server (LSP) \\ lsp Start language server (LSP)
\\ migrate Rewrite old tuple syntax to new (`(a,b)`->`.(a,b)`, type `(A,B)`->`Tuple(A,B)`); `--dry-run` prints only the worklist, `--force` emits output despite unmigrated ambiguous sites
\\ \\
\\Options: \\Options:
\\ --target <target> Target triple or shorthand: wasm, macos, linux, windows, ios, ios-sim (default: host) \\ --target <target> Target triple or shorthand: wasm, macos, linux, windows, ios, ios-sim (default: host)
@@ -517,6 +525,72 @@ fn compilePipeline(allocator: std.mem.Allocator, io: std.Io, input_path: []const
return comp; return comp;
} }
/// `sx migrate [--dry-run] [--force] <file.sx>` — tuple-syntax migration tool.
///
/// Without flags: parse-only, rewrite the old tuple syntax, print the migrated
/// source to stdout and any ambiguous-site worklist entries to stderr. A
/// NON-EMPTY worklist is a hard failure (exit 2) — the migration is incomplete,
/// so we do NOT print the rewritten source (which could be redirected over the
/// input, silently shipping half-migrated code) unless `--force` is passed.
///
/// With `--dry-run`: print ONLY the worklist (to stderr), no rewritten source —
/// so ambiguous sites can be audited first. A non-empty worklist still exits 2.
///
/// With `--force`: print the rewritten source even when the worklist is
/// non-empty (the ambiguous sites are left in the OLD syntax). Exit is still 2
/// so a script can detect the partial migration.
fn runMigrate(allocator: std.mem.Allocator, io: std.Io, sub_args: []const []const u8) void {
var dry_run = false;
var force = false;
var input_path: ?[]const u8 = null;
for (sub_args) |a| {
if (std.mem.eql(u8, a, "--dry-run")) {
dry_run = true;
} else if (std.mem.eql(u8, a, "--force")) {
force = true;
} else if (std.mem.startsWith(u8, a, "-")) {
std.debug.print("error: unknown flag '{s}' for migrate\n", .{a});
std.process.exit(1);
} else {
input_path = a;
}
}
const path = input_path orelse {
std.debug.print("usage: sx migrate [--dry-run] [--force] <file.sx>\n", .{});
std.process.exit(1);
};
const source = readSource(allocator, io, path) catch |err| {
std.debug.print("error: cannot read '{s}': {}\n", .{ path, err });
std.process.exit(1);
};
const result = sx.migrate.migrateSource(allocator, io, path, source) catch |err| {
std.debug.print("error: migrate failed for '{s}': {}\n", .{ path, err });
std.process.exit(1);
};
// Worklist (ambiguous sites) always goes to stderr.
for (result.worklist) |w| {
std.debug.print("{s}:{d}:{d}: {s}: {s}\n", .{ path, w.line, w.col, w.reason, w.text });
}
const has_worklist = result.worklist.len > 0;
// Emit the rewritten source unless we'd be shipping a half-migrated file: a
// non-empty worklist in non-dry-run mode suppresses output unless --force.
if (!dry_run and (!has_worklist or force)) {
_ = std.c.write(1, result.output.ptr, result.output.len);
}
if (has_worklist) {
std.debug.print(
"{d} ambiguous site(s) unmigrated; resolve by hand or pass --force\n",
.{result.worklist.len},
);
std.process.exit(2);
}
}
fn dumpSxIR(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8, stdlib_paths: []const []const u8) !void { fn dumpSxIR(allocator: std.mem.Allocator, io: std.Io, input_path: []const u8, stdlib_paths: []const []const u8) !void {
const source = try readSource(allocator, io, input_path); const source = try readSource(allocator, io, input_path);
var comp = sx.core.Compilation.init(allocator, io, input_path, source, .{}, stdlib_paths); var comp = sx.core.Compilation.init(allocator, io, input_path, source, .{}, stdlib_paths);

330
src/migrate.test.zig Normal file
View File

@@ -0,0 +1,330 @@
// Tests for migrate.zig — the `sx migrate` tuple-syntax rewriter.
//
// Each case parses an in-memory snippet (full decls, so it parses standalone),
// runs the AST-walk migrator, and asserts the rewritten text and/or worklist.
// The compiler grammar is UNCHANGED here: the migrator READS the old tuple
// syntax `(a, b)` / `(A, B)` and EMITS the new `.(a, b)` / `Tuple(A, B)` text.
const std = @import("std");
const Parser = @import("parser.zig").Parser;
const migrate = @import("migrate.zig");
/// Parse `src` (must be valid old-syntax sx decls), migrate, return the
/// rewritten text. Asserts the worklist is empty (use `runWith` for ambiguous
/// cases).
fn run(alloc: std.mem.Allocator, src: [:0]const u8) ![]const u8 {
const res = try runWith(alloc, src);
try std.testing.expectEqual(@as(usize, 0), res.worklist.len);
return res.output;
}
fn runWith(alloc: std.mem.Allocator, src: [:0]const u8) !migrate.MigrationResult {
var parser = Parser.init(alloc, src);
const root = try parser.parse();
return migrate.migrateRoot(alloc, src, root);
}
/// Assert that `needle` appears in `haystack` (substring), with a helpful
/// failure message that prints the full migrated text.
fn expectContains(haystack: []const u8, needle: []const u8) !void {
if (std.mem.indexOf(u8, haystack, needle) == null) {
std.debug.print("\nexpected to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack });
return error.NotFound;
}
}
fn expectNotContains(haystack: []const u8, needle: []const u8) !void {
if (std.mem.indexOf(u8, haystack, needle) != null) {
std.debug.print("\nexpected NOT to find:\n {s}\nin migrated output:\n{s}\n", .{ needle, haystack });
return error.UnexpectedlyFound;
}
}
// ── VALUE tuples → .(...) ────────────────────────────────────────────────
test "migrate value: positional (40,2) -> .(40,2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := (40, 2); }\n");
try expectContains(out, ".(40, 2)");
try expectNotContains(out, " (40, 2)"); // the old, un-dotted form is gone
}
test "migrate value: named (x:1,y:2) -> .(x = 1, y = 2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := (x: 1, y: 2); }\n");
try expectContains(out, ".(x = 1, y = 2)");
}
test "migrate value: 1-tuple (x,) -> .(x)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { y := 9; x := (y,); }\n");
try expectContains(out, ".(y)");
try expectNotContains(out, "(y,)");
}
test "migrate value: spread (..xs) -> .(..xs)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: (xs: i32) { t := (..xs); }\n");
try expectContains(out, ".(..xs)");
}
test "migrate value: operator operands (1,2)==(1,2)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { b := (1, 2) == (1, 2); }\n");
// Both operands rewritten.
try expectContains(out, ".(1, 2) == .(1, 2)");
}
test "migrate value+type: return body -> Tuple(i64,i64){ .(b,a) }" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\swap :: (a: i64, b: i64) -> (i64, i64) { (b, a) }
\\
);
try expectContains(out, "-> Tuple(i64, i64)");
try expectContains(out, ".(b, a)");
}
test "migrate value: empty () value -> .()" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
// `x := ()` — empty tuple value.
const out = try run(arena.allocator(), "f :: () { x := (); }\n");
try expectContains(out, ".()");
}
// ── TYPE tuples → Tuple(...) ─────────────────────────────────────────────
test "migrate type: annotation a:(i32,string) -> a:Tuple(i32,string)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : (i32, string) = ---; }\n");
try expectContains(out, "Tuple(i32, string)");
}
test "migrate type: named (x:i32,y:string) -> Tuple(x: i32, y: string) keeps colon" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : (x: i32, y: string) = ---; }\n");
try expectContains(out, "Tuple(x: i32, y: string)");
}
test "migrate type: struct field xs:(i32,i32) -> Tuple(i32,i32)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (i32, i32); }\n");
try expectContains(out, "Tuple(i32, i32)");
}
test "migrate type: pack (..Ts) -> Tuple(..Ts)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (..Ts); }\n");
try expectContains(out, "Tuple(..Ts)");
}
test "migrate type: 1-tuple (T,) -> Tuple(T) drops comma" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct { xs: (i32,); }\n");
try expectContains(out, "Tuple(i32)");
try expectNotContains(out, "(i32,)");
}
// ── Worklist: ambiguous value-vs-type call arg ──────────────────────────
test "migrate worklist: size_of((Box,i32)) is NOT rewritten, records worklist" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(),
\\f :: () { n := size_of((Box, i32)); }
\\
);
// Ambiguous inner tuple left untouched: no `.(` rewrite of `(Box, i32)`.
try expectNotContains(res.output, ".(Box, i32)");
try expectContains(res.output, "(Box, i32)");
// One worklist entry recorded.
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
try expectContains(res.worklist[0].text, "(Box, i32)");
}
test "migrate value: call arg with literal-only tuple IS rewritten" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
// `take((1, 2))` — all elements are concrete values → safe to rewrite.
const res = try runWith(arena.allocator(), "f :: () { take((1, 2)); }\n");
try expectContains(res.output, ".(1, 2)");
try std.testing.expectEqual(@as(usize, 0), res.worklist.len);
}
// ── Nested tuples (recursive rewrite, ONE edit per outermost tuple) ──────
test "migrate nested value: ((1,2),(3,4)) -> .(.(1, 2), .(3, 4))" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := ((1, 2), (3, 4)); }\n");
try expectContains(out, ".(.(1, 2), .(3, 4))");
// No stray un-migrated inner tuple, no trailing junk paren.
try expectNotContains(out, ".(1, 2), 3)");
}
test "migrate nested value: ((1,2),3) -> .(.(1, 2), 3)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { x := ((1, 2), 3); }\n");
try expectContains(out, ".(.(1, 2), 3)");
try expectNotContains(out, "(1, 2), 3))"); // the broken old output
}
test "migrate nested named value: (a:(p:1,q:2),b:3) -> .(a = .(p = 1, q = 2), b = 3)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { n := (a: (p: 1, q: 2), b: 3); }\n");
try expectContains(out, ".(a = .(p = 1, q = 2), b = 3)");
}
test "migrate nested type: ((i32,i32),i64) -> Tuple(Tuple(i32, i32), i64)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a : ((i32, i32), i64) = ---; }\n");
try expectContains(out, "Tuple(Tuple(i32, i32), i64)");
}
// ── Failable multi-returns: `!` channel stays OUTSIDE Tuple(...) ─────────
test "migrate failable: -> (T, !) -> -> T !" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> (i32, !) { }\n");
try expectContains(out, "-> i32 !");
try expectNotContains(out, "Tuple(");
try expectNotContains(out, ".(");
}
test "migrate failable: -> (T, !Named) keeps the named set" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\E :: error { Bad }
\\f :: () -> (i32, !E) { }
\\
);
try expectContains(out, "-> i32 !E");
try expectNotContains(out, "Tuple(");
}
test "migrate failable: -> (T1, T2, !) -> -> Tuple(T1, T2) !" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> (i32, i64, !) { }\n");
try expectContains(out, "-> Tuple(i32, i64) !");
}
test "migrate failable: bare -> ! unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () -> ! { }\n");
try expectContains(out, "-> !");
try expectNotContains(out, "Tuple");
}
// ── Inverted call-arg classification (conservative) ─────────────────────
test "migrate worklist: empty () call arg is worklisted (unit type ambiguity)" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of(()); }\n");
// NOT silently rewritten to `.()`.
try expectNotContains(res.output, "size_of(.())");
try expectContains(res.output, "size_of(())");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
test "migrate worklist: Vec(3) call-arg element is worklisted" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of((Vec(3), i32)); }\n");
try expectNotContains(res.output, ".(Vec(3), i32)");
try expectContains(res.output, "(Vec(3), i32)");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
test "migrate worklist: pkg.T qualified path call-arg element is worklisted" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const res = try runWith(arena.allocator(), "f :: () { n := size_of((pkg.T, i32)); }\n");
try expectNotContains(res.output, ".(pkg.T, i32)");
try expectContains(res.output, "(pkg.T, i32)");
try std.testing.expectEqual(@as(usize, 1), res.worklist.len);
}
// ── Negatives: distinct AST nodes must NOT be touched ────────────────────
test "migrate negative: function type (i32,i32)->i32 unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { g : (i32, i32) -> i32 = ---; }\n");
try expectContains(out, "(i32, i32) -> i32");
try expectNotContains(out, "Tuple(i32, i32)");
}
test "migrate negative: function param list (self:*T,x:i32) unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "S :: struct {}\nm :: (self: *S, x: i32) { }\n");
try expectContains(out, "(self: *S, x: i32)");
try expectNotContains(out, "Tuple(");
try expectNotContains(out, ".(self");
}
test "migrate negative: array literal .[1,2,3] unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a := .[1, 2, 3]; }\n");
try expectContains(out, ".[1, 2, 3]");
}
test "migrate negative: struct literal .{x=1} unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { a := .{ x = 1 }; }\n");
try expectContains(out, ".{ x = 1 }");
}
test "migrate negative: Closure(i32)->i32 type unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: () { c : Closure(i32) -> i32 = ---; }\n");
try expectContains(out, "Closure(i32) -> i32");
try expectNotContains(out, "Tuple(");
}
test "migrate negative: grouping (a+b)*c unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(), "f :: (a: i32, b: i32, c: i32) { x := (a + b) * c; }\n");
try expectContains(out, "(a + b) * c");
try expectNotContains(out, ".(a + b)");
}
test "migrate negative: match capture case .some: (val) unchanged" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const out = try run(arena.allocator(),
\\check :: (v: ?i32) -> i32 {
\\ return if v == {
\\ case .some: (val) { val }
\\ case .none: { 0 }
\\ };
\\}
\\
);
try expectContains(out, "case .some: (val)");
try expectNotContains(out, ".(val)");
}

512
src/migrate.zig Normal file
View File

@@ -0,0 +1,512 @@
//! Tuple-syntax migration tool (`sx migrate`).
//!
//! Reads OLD-syntax `.sx` source (tuple TYPES `(A, B)`, tuple VALUES `(a, b)`)
//! and emits NEW-syntax text (`Tuple(A, B)` / `.(a, b)`). The compiler grammar
//! is UNCHANGED — this tool only reads the old syntax and rewrites it as text.
//!
//! Strategy: parse-only (read -> Compilation -> parse), then walk the parsed
//! AST with a comptime-reflection child walker that recurses into every
//! `*Node`-bearing field of every node variant. Two node kinds drive a rewrite:
//!
//! * `tuple_type_expr` — produced by the parser in grammatically-forced TYPE
//! positions (`-> (...)`, `: (...)` annotations, struct-field/param types).
//! Rewritten to `Tuple(...)`. SPECIAL CASE: a failable multi-return whose
//! last element is the error-channel marker `!` keeps the channel OUTSIDE
//! the `Tuple(...)` (see `rewriteTupleType`).
//!
//! * `tuple_literal` — produced in VALUE positions. Rewritten to `.(...)`.
//! In CALL-ARG position the value/type distinction is ambiguous, so we only
//! auto-rewrite when EVERY element is a concrete value literal; anything
//! else (bare identifier, `Vec(3)`, `pkg.T`, empty `()`, ...) is recorded on
//! the worklist and left untouched — never guess (CLAUDE.md silent-fallback
//! rule).
//!
//! Nesting: the rewrite is RECURSIVE but emits exactly ONE edit per OUTERMOST
//! tuple. The replacement text for a tuple is built by recursively migrating its
//! nested tuple elements (and any non-tuple subexpressions, e.g. calls) directly
//! into that text. We never emit a separate, overlapping child edit for anything
//! inside a tuple's span — `applyEdits` asserts non-overlap as a tripwire.
//!
//! Edits are collected against the ORIGINAL source byte offsets and applied
//! DESCENDING by start offset so earlier offsets stay valid; comments and
//! formatting outside the edited spans are preserved verbatim.
const std = @import("std");
const ast = @import("ast.zig");
const core = @import("core.zig");
const Node = ast.Node;
/// A single text replacement against the original source: `source[start..end]`
/// becomes `replacement`.
pub const Edit = struct {
start: u32,
end: u32,
replacement: []const u8,
};
/// An ambiguous site we refused to rewrite. `line`/`col` are 1-based.
pub const Worklist = struct {
line: u32,
col: u32,
text: []const u8,
reason: []const u8,
};
pub const MigrationResult = struct {
/// The rewritten source (a fresh allocation owning its bytes).
output: []const u8,
/// Ambiguous sites left untouched, in source order.
worklist: []const Worklist,
};
/// Walk state: collects edits + worklist entries while recursing the AST.
const Walker = struct {
allocator: std.mem.Allocator,
source: []const u8,
edits: std.ArrayList(Edit) = .empty,
worklist: std.ArrayList(Worklist) = .empty,
/// Recurse into `node`. `is_call_arg` is true when this node is a DIRECT
/// argument of a `call` / `ffi_intrinsic_call` — the only context in which a
/// `tuple_literal` may be value-vs-type ambiguous.
///
/// On hitting an OUTERMOST tuple we compute its full replacement (recursively
/// baking any nested tuples / subexprs into the text) and emit a SINGLE edit;
/// we do NOT continue the edit-emitting walk into the tuple's span (that would
/// produce overlapping edits). Worklist collection for ambiguous nested
/// call-args still happens, inside the recursive text builder.
fn walk(self: *Walker, node: *const Node, is_call_arg: bool) anyerror!void {
switch (node.data) {
.tuple_type_expr => |tt| {
const replacement = try self.buildTupleTypeText(node, tt);
if (replacement) |rep| {
try self.edits.append(self.allocator, .{
.start = node.span.start,
.end = node.span.end,
.replacement = rep,
});
}
// Do NOT recurse into the tuple's element subtrees here — they
// are already baked into `replacement`. (A `null` replacement
// means "leave unchanged"; that only happens for `-> !`, which
// has no value elements to rewrite anyway.)
return;
},
.tuple_literal => |tl| {
if (is_call_arg and !tupleIsAllConcreteValues(tl)) {
// Ambiguous in call-arg position (could be a type argument,
// a parameterized type, a qualified path, the unit type
// `()`, ...). Refuse to guess — record + leave untouched, and
// keep walking into elements so nested unambiguous tuples are
// still migrated.
try self.recordWorklist(node);
for (tl.elements) |el| try self.walk(el.value, false);
} else {
const rep = try self.buildTupleValueText(node, tl);
try self.edits.append(self.allocator, .{
.start = node.span.start,
.end = node.span.end,
.replacement = rep,
});
}
return;
},
// A `call`'s direct args get the call-arg flag; the callee does not.
.call => |c| {
try self.walk(c.callee, false);
for (c.args) |a| try self.walk(a, true);
return;
},
.ffi_intrinsic_call => |c| {
try self.walk(c.return_type, false);
for (c.args) |a| try self.walk(a, true);
return;
},
else => {},
}
// Generic recursion for every other node: visit each child *Node found
// by reflection over the active union payload. Call-arg context does NOT
// propagate past a non-call node.
try self.walkChildren(node);
}
/// Reflect over the active payload of `node.data` and recurse into every
/// `*Node` reachable through its fields (directly, through optionals,
/// slices, and nested aggregate structs/unions).
fn walkChildren(self: *Walker, node: *const Node) anyerror!void {
switch (node.data) {
inline else => |payload| {
try self.walkValue(@TypeOf(payload), payload);
},
}
}
/// Recurse into any `*Node` reachable from `value` of type `T`.
fn walkValue(self: *Walker, comptime T: type, value: T) anyerror!void {
if (T == *Node or T == *const Node) {
try self.walk(value, false);
return;
}
switch (@typeInfo(T)) {
.pointer => |ptr| {
switch (ptr.size) {
.slice => {
if (comptime containsNode(ptr.child)) {
for (value) |elem| try self.walkValue(ptr.child, elem);
}
},
// Non-slice pointers other than *Node (handled above) carry
// no AST children we rewrite.
else => {},
}
},
.optional => |opt| {
if (comptime containsNode(opt.child)) {
if (value) |inner| try self.walkValue(opt.child, inner);
}
},
.@"struct" => |st| {
inline for (st.fields) |f| {
if (comptime containsNode(f.type)) {
try self.walkValue(f.type, @field(value, f.name));
}
}
},
.@"union" => |un| {
if (comptime unionContainsNode(un)) {
switch (value) {
inline else => |inner| try self.walkValue(@TypeOf(inner), inner),
}
}
},
else => {},
}
}
/// Build the replacement text for a `tuple_type_expr`, baking nested tuples
/// recursively. Returns `null` when the node should be left unchanged.
///
/// Failable multi-return handling — the error channel `!` (an
/// `error_type_expr` element, always last) stays OUTSIDE the `Tuple(...)`:
/// * `(!)` → unchanged (no value tuple).
/// * `(T, !)` → `T !` (single value: drop the parens).
/// * `(T1, T2, !)` → `Tuple(T1, T2) !`.
fn buildTupleTypeText(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr) !?[]const u8 {
// Detect a trailing error-channel marker.
const n = tt.field_types.len;
const has_err = n > 0 and tt.field_types[n - 1].data == .error_type_expr;
if (has_err) {
const err_node = tt.field_types[n - 1];
// Raw text of the error marker, e.g. `!` or `!JsonError`.
const err_text = self.source[err_node.span.start..err_node.span.end];
const value_count = n - 1;
if (value_count == 0) {
// `-> !` (no value tuple) — leave unchanged.
return null;
}
if (value_count == 1) {
// `(T, !)` → `T !` — strip the parens, no Tuple wrapper.
const t_text = try self.migratedTypeElement(tt.field_types[0]);
return try std.fmt.allocPrint(self.allocator, "{s} {s}", .{ t_text, err_text });
}
// `(T1, T2, ..., !)` → `Tuple(T1, T2, ...) !`.
const inner = try self.buildTypeInner(node, tt, value_count);
return try std.fmt.allocPrint(self.allocator, "Tuple{s} {s}", .{ inner, err_text });
}
// Ordinary type tuple: `Tuple(...)`, names keep `:`.
const inner = try self.buildTypeInner(node, tt, n);
return try std.fmt.allocPrint(self.allocator, "Tuple{s}", .{inner});
}
/// Build the parenthesized inner `(...)` for a type tuple covering the first
/// `count` field types (a failable return passes `count < field_types.len` to
/// exclude the trailing `!`). Names keep their `:`. A 1-tuple drops its
/// trailing comma.
fn buildTypeInner(self: *Walker, node: *const Node, tt: ast.TupleTypeExpr, count: usize) ![]const u8 {
var out = std.ArrayList(u8).empty;
try out.append(self.allocator, '(');
for (tt.field_types[0..count], 0..) |ft, i| {
if (i != 0) try out.appendSlice(self.allocator, ", ");
// Named type tuple keeps `name: ` verbatim.
if (tt.field_names) |names| {
// Synthetic `_<i>` names mark positional slots — emit nothing.
if (!isSyntheticName(names[i], i)) {
try out.appendSlice(self.allocator, names[i]);
try out.appendSlice(self.allocator, ": ");
}
}
const el_text = try self.migratedTypeElement(ft);
try out.appendSlice(self.allocator, el_text);
}
try out.append(self.allocator, ')');
_ = node;
return out.toOwnedSlice(self.allocator);
}
/// Migrate a single TYPE element subtree to text. A nested tuple type is
/// baked recursively; everything else is copied verbatim from source but with
/// any nested tuples inside it rewritten.
fn migratedTypeElement(self: *Walker, ft: *const Node) anyerror![]const u8 {
if (ft.data == .tuple_type_expr) {
const rep = try self.buildTupleTypeText(ft, ft.data.tuple_type_expr);
return rep orelse self.source[ft.span.start..ft.span.end];
}
return self.migratedSubtree(ft, false);
}
/// Build the replacement text for a `tuple_literal`, baking nested tuples
/// recursively. Names flip `:` → ` = `.
fn buildTupleValueText(self: *Walker, node: *const Node, tl: ast.TupleLiteral) ![]const u8 {
var out = std.ArrayList(u8).empty;
try out.appendSlice(self.allocator, ".(");
for (tl.elements, 0..) |el, i| {
if (i != 0) try out.appendSlice(self.allocator, ", ");
if (el.name) |name| {
try out.appendSlice(self.allocator, name);
try out.appendSlice(self.allocator, " = ");
}
// Spread element: `..xs` — the parser models it as a spread_expr
// whose operand is the spread target; copy its source verbatim
// (its own nested tuples, if any, get migrated by migratedSubtree).
const el_text = try self.migratedValueElement(el.value);
try out.appendSlice(self.allocator, el_text);
}
try out.append(self.allocator, ')');
_ = node;
return out.toOwnedSlice(self.allocator);
}
/// Migrate a single VALUE element subtree to text. A nested tuple literal is
/// baked recursively; everything else is copied verbatim with nested tuples
/// inside rewritten.
fn migratedValueElement(self: *Walker, value: *const Node) anyerror![]const u8 {
if (value.data == .tuple_literal) {
const tl = value.data.tuple_literal;
// A nested tuple in a VALUE position is unambiguously a value (it is
// never itself a direct call-arg), so always rewrite it.
return self.buildTupleValueText(value, tl);
}
return self.migratedSubtree(value, false);
}
/// Return the migrated text for an arbitrary subtree by collecting the edits
/// its descendants produce (relative to `node.span`) and splicing them into
/// the raw source slice. Worklist entries discovered inside are appended to
/// the shared worklist. This is how a NON-tuple element of a tuple (e.g. a
/// `call` with its own nested tuple args) gets its inner tuples migrated
/// while preserving its surrounding formatting verbatim.
fn migratedSubtree(self: *Walker, node: *const Node, is_call_arg: bool) ![]const u8 {
// Sub-walk with a private edit list but the SHARED worklist.
var sub = Walker{
.allocator = self.allocator,
.source = self.source,
.worklist = self.worklist,
};
try sub.walk(node, is_call_arg);
// Carry any worklist entries the sub-walk found back to the parent.
self.worklist = sub.worklist;
const base = node.span.start;
const raw = self.source[node.span.start..node.span.end];
if (sub.edits.items.len == 0) return raw;
// Splice sub-edits (offsets are absolute; rebase to the slice).
return applyEditsRebased(self.allocator, raw, base, sub.edits.items);
}
fn recordWorklist(self: *Walker, node: *const Node) !void {
const lc = lineCol(self.source, node.span.start);
try self.worklist.append(self.allocator, .{
.line = lc.line,
.col = lc.col,
.text = self.source[node.span.start..node.span.end],
.reason = "ambiguous value-vs-type call arg; resolve to `Tuple(...)` or `.(...)` by hand",
});
}
};
/// A synthetic positional name is exactly `_<i>` for slot `i` (the parser
/// fills these in for positional slots of an otherwise-named tuple). Treat such
/// a name as "no name" so a mixed tuple's positional slots stay positional.
fn isSyntheticName(name: []const u8, i: usize) bool {
if (name.len < 2 or name[0] != '_') return false;
var buf: [24]u8 = undefined;
const expect = std.fmt.bufPrint(&buf, "_{d}", .{i}) catch return false;
return std.mem.eql(u8, name, expect);
}
/// True when EVERY element of a call-arg `tuple_literal` is a concrete value
/// literal (or an unambiguous value-operator expression over such). Only then is
/// it safe to auto-rewrite the tuple to `.(...)` in call-arg position — anything
/// else (bare identifier, parameterized type `Vec(3)`, qualified path `pkg.T`,
/// empty `()`, ...) is ambiguous and goes to the worklist.
fn tupleIsAllConcreteValues(tl: ast.TupleLiteral) bool {
// An empty `()` in call-arg position is ambiguous (unit type vs empty value).
if (tl.elements.len == 0) return false;
for (tl.elements) |el| {
if (!nodeIsConcreteValue(el.value)) return false;
}
return true;
}
/// A node is a "concrete value" when it can only denote a runtime value — never
/// a type. Conservative: int/float/string/bool/char literals, null/undef, enum
/// literals, array/struct literals, and value-operator expressions (binary /
/// unary ops, comparisons) whose operands are themselves concrete values. A
/// nested tuple literal of concrete values is concrete too. Everything else
/// (identifiers, calls, field access, parameterized/qualified type syntax, ...)
/// is NOT — it could be or contain a type.
fn nodeIsConcreteValue(node: *const Node) bool {
return switch (node.data) {
.int_literal,
.float_literal,
.bool_literal,
.string_literal,
.null_literal,
.undef_literal,
.enum_literal,
.array_literal,
.struct_literal,
=> true,
.binary_op => |b| nodeIsConcreteValue(b.lhs) and nodeIsConcreteValue(b.rhs),
.chained_comparison => |c| blk: {
for (c.operands) |o| {
if (!nodeIsConcreteValue(o)) break :blk false;
}
break :blk true;
},
.unary_op => |u| nodeIsConcreteValue(u.operand),
.tuple_literal => |t| tupleIsAllConcreteValues(t),
else => false,
};
}
/// Comptime: does type `T` (transitively) contain a `*Node` we'd recurse into?
/// Prunes the reflection walk so we never descend into pure-scalar payloads.
fn containsNode(comptime T: type) bool {
if (T == *Node or T == *const Node or T == Node) return true;
return switch (@typeInfo(T)) {
.pointer => |ptr| switch (ptr.size) {
.slice => containsNode(ptr.child),
.one => ptr.child == Node, // *Node handled above; other *X: no
else => false,
},
.optional => |opt| containsNode(opt.child),
.array => |arr| containsNode(arr.child),
.@"struct" => |st| blk: {
inline for (st.fields) |f| {
if (containsNode(f.type)) break :blk true;
}
break :blk false;
},
.@"union" => |un| unionContainsNode(un),
else => false,
};
}
fn unionContainsNode(comptime un: std.builtin.Type.Union) bool {
inline for (un.fields) |f| {
if (containsNode(f.type)) return true;
}
return false;
}
const LineCol = struct { line: u32, col: u32 };
fn lineCol(source: []const u8, offset: u32) LineCol {
var line: u32 = 1;
var col: u32 = 1;
var i: usize = 0;
while (i < offset and i < source.len) : (i += 1) {
if (source[i] == '\n') {
line += 1;
col = 1;
} else {
col += 1;
}
}
return .{ .line = line, .col = col };
}
/// Migrate a source string in memory. Parse-only; never resolves imports or
/// lowers. Returns the rewritten text + any ambiguous worklist entries.
///
/// `file_path` is used only for diagnostics labeling.
pub fn migrateSource(
allocator: std.mem.Allocator,
io: std.Io,
file_path: []const u8,
source: [:0]const u8,
) !MigrationResult {
var comp = core.Compilation.init(allocator, io, file_path, source, .{}, &.{});
defer comp.deinit();
comp.parse() catch {
comp.renderErrors();
return error.ParseFailed;
};
const root = comp.root orelse return error.ParseFailed;
return migrateRoot(allocator, source, root);
}
/// Migrate from an already-parsed `root`. Split from `migrateSource` so unit
/// tests can parse in memory (via `Parser.init`) without an `std.Io`.
pub fn migrateRoot(
allocator: std.mem.Allocator,
source: []const u8,
root: *const Node,
) !MigrationResult {
var walker = Walker{ .allocator = allocator, .source = source };
for (root.data.root.decls) |decl| {
try walker.walk(decl, false);
}
const output = try applyEdits(allocator, source, walker.edits.items);
return .{
.output = output,
.worklist = try walker.worklist.toOwnedSlice(allocator),
};
}
/// Apply edits to a COPY of the original source. Edits are sorted DESCENDING by
/// start so each splice leaves earlier offsets valid. Overlapping edits are a
/// hard error — the recursive rewrite must emit exactly one edit per outermost
/// tuple, so two edits sharing any byte is a bug.
pub fn applyEdits(allocator: std.mem.Allocator, source: []const u8, edits_in: []const Edit) ![]const u8 {
const edits = try allocator.dupe(Edit, edits_in);
std.mem.sort(Edit, edits, {}, struct {
fn lessThan(_: void, a: Edit, b: Edit) bool {
return a.start > b.start; // descending
}
}.lessThan);
// Tripwire: after the descending sort, each edit's end must not exceed the
// next (lower-start) edit's start. Any overlap means the recursive rewrite
// double-emitted — refuse to produce corrupt output.
var prev_start: ?u32 = null;
for (edits) |e| {
if (prev_start) |ps| {
if (e.end > ps) return error.OverlappingEdits;
}
prev_start = e.start;
}
var out = try std.ArrayList(u8).initCapacity(allocator, source.len);
try out.appendSlice(allocator, source);
for (edits) |e| {
// Splice source[e.start..e.end] -> e.replacement.
try out.replaceRange(allocator, e.start, e.end - e.start, e.replacement);
}
return out.toOwnedSlice(allocator);
}
/// Apply edits whose `start`/`end` are ABSOLUTE source offsets to a `slice` that
/// begins at absolute offset `base`. Used by `migratedSubtree` to splice a
/// non-tuple subtree's inner tuple rewrites into its raw slice.
fn applyEditsRebased(allocator: std.mem.Allocator, slice: []const u8, base: u32, edits_in: []const Edit) ![]const u8 {
var rebased = try allocator.alloc(Edit, edits_in.len);
for (edits_in, 0..) |e, i| {
rebased[i] = .{ .start = e.start - base, .end = e.end - base, .replacement = e.replacement };
}
return applyEdits(allocator, slice, rebased);
}

View File

@@ -20,6 +20,8 @@ pub const core = @import("core.zig");
pub const c_import = @import("c_import.zig"); pub const c_import = @import("c_import.zig");
pub const c_import_tests = @import("c_import.test.zig"); pub const c_import_tests = @import("c_import.test.zig");
pub const corpus_run_tests = @import("corpus_run.test.zig"); pub const corpus_run_tests = @import("corpus_run.test.zig");
pub const migrate = @import("migrate.zig");
pub const migrate_tests = @import("migrate.test.zig");
pub const ir = @import("ir/ir.zig"); pub const ir = @import("ir/ir.zig");
pub const lsp = struct { pub const lsp = struct {