From 17b437ecfb8bf3a60f2a5662efc201887bda9ada Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 06:13:09 +0300 Subject: [PATCH] F3.2: std.cli minimal subcommand + flag parser over explicit []string Extend std/cli.sx with a zero-heap argument parser that the caller drives over a logical argv ([]string), separate from the F3.1 os_args accessor. Grammar: [--flag VALUE | --bool]... [--json] [-- rest...] - (group, command) dispatched against a caller-provided Command table; no match -> error.UnknownCommand. - value-taking vs boolean flags fixed by each command's FlagSpec list; --json is a reserved global boolean surfaced as parsed.json. - `--` or the first bare operand ends flag parsing; the remainder is parsed.rest (operand views). Heap discipline (heap-discipline.md): zero heap, zero copy. group/command/ flag values/rest are all VIEWS into args. Parsed is a by-value stack struct; flag presence/values live in a fixed [16]FlagValue inline array indexed by spec position (no per-flag allocation, no context.allocator). The flag-spec list and command table are caller storage passed as views. Failure surfacing (no silent skip): unknown command, unknown flag, a value-flag missing its value, and an absent required flag each raise a specific CliError variant; a caller-owned Diag records the offending token (index + view) before each raise, since error tags carry no data. examples/0717 drives the parser over explicit []string vectors: a valid group/command/--flag/--bool/--json case (asserting parsed values + that values are views into argv), subcommand dispatch, `--`/bare-operand separators, and the five failure variants each asserted via destructure + Diag. zig build && zig build test && run_examples.sh green (385 passed). --- examples/0717-modules-cli-parse.sx | 137 ++++++++++ examples/expected/0717-modules-cli-parse.exit | 1 + .../expected/0717-modules-cli-parse.stderr | 0 .../expected/0717-modules-cli-parse.stdout | 33 +++ library/modules/std/cli.sx | 241 ++++++++++++++++++ 5 files changed, 412 insertions(+) create mode 100644 examples/0717-modules-cli-parse.sx create mode 100644 examples/expected/0717-modules-cli-parse.exit create mode 100644 examples/expected/0717-modules-cli-parse.stderr create mode 100644 examples/expected/0717-modules-cli-parse.stdout diff --git a/examples/0717-modules-cli-parse.sx b/examples/0717-modules-cli-parse.sx new file mode 100644 index 0000000..ba9c2a0 --- /dev/null +++ b/examples/0717-modules-cli-parse.sx @@ -0,0 +1,137 @@ +// CLI argument PARSER from `modules/std/cli.sx` (F3.2) — subcommand +// dispatch + `--flag` parsing over an EXPLICIT logical argv (`[]string`). +// +// Every argv vector below is an explicit `[]string` literal (the caller's +// logical args, program name already removed). The suite proves: +// +// 1. DISPATCH — ` ` selects the right command in the +// caller's table; group/command are VIEWS into argv. +// 2. FLAGS — `--out VALUE` (value-taking) binds a VIEW of the next +// token; `--verbose` (boolean) records presence; the +// reserved `--json` mode flag surfaces as `parsed.json`. +// 3. SEPARATORS — `--` and the first bare operand both stop flag +// parsing; the remainder is `parsed.rest` (operand VIEWS). +// 4. HEAP — flag values / group / command / rest all point INSIDE +// the input argv (zero copy); `Parsed` is a stack value. +// 5. FAILURES — unknown command, unknown flag, missing required flag, +// and a value-flag with no value each raise the specific +// `CliError` variant on the error channel, and the +// caller-owned `Diag` names the offending token. + +#import "modules/std.sx"; +#import "modules/std/cli.sx"; + +report :: (label: string, ok: bool) { + if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); } +} + +// Half-open containment [lo, hi) — used to prove a view points into argv. +in_range :: (x: s64, lo: s64, hi: s64) -> bool { + return x >= lo and x < hi; +} + +// True when `parse(args, cmds)` raised exactly `want`. Destructure binds +// the error tag without `try`, so a bad vector never aborts the example; +// the failing token is captured in the caller-owned `Diag`. +raises :: (args: []string, cmds: []Command, want: CliError) -> bool { + d : Diag = .{}; + _, e := parse(args, cmds, @d); + return e == want; +} + +main :: () -> ! { + // ── Command table (caller storage; flag specs passed as views) ──── + publish_flags : []FlagSpec = .[ + FlagSpec.{ name = "out", takes_value = true, required = true }, + FlagSpec.{ name = "verbose", takes_value = false, required = false }, + ]; + status_flags : []FlagSpec = .[ + FlagSpec.{ name = "verbose", takes_value = false, required = false }, + ]; + cmds : []Command = .[ + Command.{ group = "ci", command = "publish", flags = publish_flags }, + Command.{ group = "ci", command = "status", flags = status_flags }, + ]; + + // ── 1. Valid: --flag v --bool --json ─────────── + d : Diag = .{}; + argv : []string = .["ci", "publish", "--out", "dist", "--verbose", "--json"]; + p := try parse(argv, cmds, @d); + + report("dispatch-group", p.group == "ci"); + report("dispatch-command", p.command == "publish"); + report("dispatch-index", p.cmd_index == 0); + report("flag-value", p.value_of("out") == "dist"); + report("flag-value-set", p.is_set("out")); + report("bool-set", p.is_set("verbose")); + report("json-set", p.json); + report("no-rest", p.rest.len == 0); + + // ── 2. Heap discipline: flag value is a VIEW into argv ──────────── + // "dist" is argv[3]; its bytes must lie inside that very element. + src : s64 = xx argv[3].ptr; + stop := src + argv[3].len; + pview : s64 = xx p.value_of("out").ptr; + report("value-is-view", in_range(pview, src, stop) or pview == src); + // group/command are argv[0]/argv[1] verbatim (same pointer, no copy). + g0 : s64 = xx argv[0].ptr; + gp : s64 = xx p.group.ptr; + report("group-is-view", gp == g0); + + // ── 3. Dispatch to a different command in the table ────────────── + s_argv : []string = .["ci", "status", "--verbose"]; + sp := try parse(s_argv, cmds, @d); + report("dispatch-2nd", sp.command == "status" and sp.cmd_index == 1); + report("2nd-bool", sp.is_set("verbose")); + report("2nd-json-unset", !sp.json); + + // ── 4. `--` separator: rest are operand views, flags stop there ── + sep_argv : []string = .["ci", "publish", "--out", "dist", "--", "--raw", "x"]; + spv := try parse(sep_argv, cmds, @d); + report("sep-value", spv.value_of("out") == "dist"); + report("sep-rest-len", spv.rest.len == 2); + report("sep-rest-0", spv.rest.len == 2 and spv.rest[0] == "--raw"); + report("sep-rest-1", spv.rest.len == 2 and spv.rest[1] == "x"); + report("sep-no-bool", !spv.is_set("verbose")); + + // ── 5. First bare operand also stops flag parsing ──────────────── + bare_argv : []string = .["ci", "publish", "--out", "dist", "extra", "tail"]; + bpv := try parse(bare_argv, cmds, @d); + report("bare-rest-len", bpv.rest.len == 2); + report("bare-rest-0", bpv.rest.len == 2 and bpv.rest[0] == "extra"); + + // ── 6. Value-flag accepts a single-dash value (not a long flag) ── + dash_argv : []string = .["ci", "publish", "--out", "-5", "--verbose"]; + dpv := try parse(dash_argv, cmds, @d); + report("dash-value", dpv.value_of("out") == "-5" and dpv.is_set("verbose")); + + // ── 7. Failures: each surfaces the specific variant ────────────── + a_unknown_cmd : []string = .["ci", "deploy", "--out", "x"]; + a_unknown_group : []string = .["zz", "publish", "--out", "x"]; + a_too_few : []string = .["ci"]; + a_unknown_flag : []string = .["ci", "publish", "--out", "x", "--nope"]; + a_missing_value : []string = .["ci", "publish", "--out"]; + a_value_eats : []string = .["ci", "publish", "--out", "--verbose"]; + a_missing_req : []string = .["ci", "publish", "--verbose"]; + report("err-unknown-cmd", raises(a_unknown_cmd, cmds, error.UnknownCommand)); + report("err-unknown-group", raises(a_unknown_group, cmds, error.UnknownCommand)); + report("err-too-few", raises(a_too_few, cmds, error.UnknownCommand)); + report("err-unknown-flag", raises(a_unknown_flag, cmds, error.UnknownFlag)); + report("err-missing-value", raises(a_missing_value, cmds, error.MissingValue)); + report("err-value-eats-flag", raises(a_value_eats, cmds, error.MissingValue)); + report("err-missing-req", raises(a_missing_req, cmds, error.MissingRequired)); + + // ── 8. Diag names the offending token on the error path ────────── + de : Diag = .{}; + _, ue := parse(a_unknown_flag, cmds, @de); + report("diag-flag-tag", ue == error.UnknownFlag); + report("diag-flag-token", de.token == "--nope" and de.index == 4); + + dm : Diag = .{}; + _, me := parse(a_missing_req, cmds, @dm); + report("diag-req-tag", me == error.MissingRequired); + report("diag-req-token", dm.token == "out"); + + print("=== DONE ===\n"); + return; +} diff --git a/examples/expected/0717-modules-cli-parse.exit b/examples/expected/0717-modules-cli-parse.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0717-modules-cli-parse.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0717-modules-cli-parse.stderr b/examples/expected/0717-modules-cli-parse.stderr new file mode 100644 index 0000000..e69de29 diff --git a/examples/expected/0717-modules-cli-parse.stdout b/examples/expected/0717-modules-cli-parse.stdout new file mode 100644 index 0000000..ab06f38 --- /dev/null +++ b/examples/expected/0717-modules-cli-parse.stdout @@ -0,0 +1,33 @@ +dispatch-group: ok +dispatch-command: ok +dispatch-index: ok +flag-value: ok +flag-value-set: ok +bool-set: ok +json-set: ok +no-rest: ok +value-is-view: ok +group-is-view: ok +dispatch-2nd: ok +2nd-bool: ok +2nd-json-unset: ok +sep-value: ok +sep-rest-len: ok +sep-rest-0: ok +sep-rest-1: ok +sep-no-bool: ok +bare-rest-len: ok +bare-rest-0: ok +dash-value: ok +err-unknown-cmd: ok +err-unknown-group: ok +err-too-few: ok +err-unknown-flag: ok +err-missing-value: ok +err-value-eats-flag: ok +err-missing-req: ok +diag-flag-tag: ok +diag-flag-token: ok +diag-req-tag: ok +diag-req-token: ok +=== DONE === diff --git a/library/modules/std/cli.sx b/library/modules/std/cli.sx index 304da5f..22057e3 100644 --- a/library/modules/std/cli.sx +++ b/library/modules/std/cli.sx @@ -92,3 +92,244 @@ os_args :: (buf: []string) -> []string { } } } + +// ===================================================================== +// Argument PARSER — subcommand dispatch + `--flag` over an EXPLICIT +// logical argv (F3.2). +// +// `parse(args, commands, diag)` reads a caller-supplied logical argument +// vector `[]string` — NOT the process argv. (Mapping process argv -> +// logical args, e.g. dropping the `sx run prog.sx` prefix, is the +// consumer's job; see the `os_args` note above and F3.1.) The grammar: +// +// [--flag VALUE | --bool]... [--json] [-- rest...] +// +// - args[0] is the GROUP, args[1] the COMMAND. The (group, command) +// pair is matched against the caller's `commands` table; no match is +// `error.UnknownCommand`. +// - `--name VALUE` sets a value-taking flag to a VIEW of the next token; +// `--name` alone records a boolean flag's presence. Which is which is +// fixed by the matched command's `FlagSpec` list. +// - `--json` is a RESERVED global boolean mode flag, always recognized +// (commands do not declare it); it surfaces as `parsed.json`. +// - `--` ends flag parsing; the tokens after it become `parsed.rest` +// (operand VIEWS). The first bare (non `--`-prefixed) token likewise +// ends flag parsing and, with the remainder, becomes `parsed.rest`. +// - Only long `--flags` are recognized; a single-dash token (`-v`, `-`) +// is treated as a bare operand. A value-taking flag accepts any next +// token that is not itself a long flag (so `--n -5` gives value "-5"). +// +// FAILURE SURFACING (no silent skip): an unknown command, an unknown +// flag, a value-flag missing its value, or an absent required flag each +// raise a meaningful `CliError` on the error channel — never a silent +// default. Because error tags carry no data, the caller-owned `diag` +// records the offending token (its `args` index + a VIEW of it) before +// the raise, so the caller can report exactly which token failed. +// +// HEAP DISCIPLINE (binding, see heap-discipline.md): zero heap, zero copy. +// - group / command / every flag value / every `rest` operand are VIEWS +// (slices) into the caller's `args` — never copied. +// - `Parsed` is a by-value STACK struct the caller binds (like +// `hash.init()`); flag presence/values live in a FIXED-capacity inline +// array `[16]FlagValue` (at most 16 flags per command), indexed +// positionally by the matched command's spec — no per-flag allocation. +// - The flag-spec list and the command table are caller storage passed +// as VIEWS. Nothing here touches `context.allocator`. +// +// Usage: +// +// flags : []FlagSpec = .[ +// FlagSpec.{ name = "out", takes_value = true, required = true }, +// FlagSpec.{ name = "verbose", takes_value = false, required = false }, +// ]; +// cmds : []Command = .[ Command.{ group = "ci", command = "publish", flags = flags } ]; +// d : Diag = .{}; +// p := try parse(args, cmds, @d); +// // p.group == "ci"; p.command == "publish"; +// // p.value_of("out"); p.is_set("verbose"); p.json; p.rest +// ===================================================================== + +// The parser's failure contract. The first four are INPUT errors a caller +// reacts to; `TooManyFlags` rejects a command that declares more flags than +// the inline `Parsed.values` array holds (16) — never a silent truncation. +CliError :: error { UnknownCommand, UnknownFlag, MissingValue, MissingRequired, TooManyFlags } + +// One flag's contract: its long name (without the `--`), whether it takes +// a value, and whether it must be present. Caller-owned; passed as a view. +FlagSpec :: struct { + name: string; + takes_value: bool; + required: bool; +} + +// One command's contract: a (group, command) pair and its flag specs (a +// VIEW into caller storage). +Command :: struct { + group: string; + command: string; + flags: []FlagSpec; +} + +// A parsed flag slot, positionally matched to a `FlagSpec`. `value` is a +// VIEW into `args`, meaningful only for a value-taking flag that was set. +// Defaults make a whole `Parsed` zero-initializable via `.{}`. +FlagValue :: struct { + set: bool = false; + value: string = ""; +} + +// The offending token on the error path. The caller owns a stack `Diag` +// and passes it by pointer; the parser writes it before any raise because +// error tags carry no data. `index` is the position in `args`, or -1 when +// the failure names a flag rather than an input token (a missing required +// flag sets `token` to the flag name). +Diag :: struct { + index: s64 = -1; + token: string = ""; +} + +// The parse result — a by-value stack struct. group / command / flag +// values / rest are all VIEWS into `args`. +Parsed :: struct { + group: string; + command: string; + cmd_index: s64; + json: bool; + rest: []string; + spec: []FlagSpec; // view of the matched command's flag specs + values: [16]FlagValue; // fixed inline storage, indexed by spec position + + // Presence of a declared flag (boolean or value-taking). False for an + // undeclared name. + is_set :: (self: *Parsed, name: string) -> bool { + j := 0; + while j < self.spec.len { + if self.spec[j].name == name { return self.values[j].set; } + j += 1; + } + return false; + } + + // The VIEW value of a value-taking flag, or "" if absent/undeclared. + // Use `is_set` to distinguish "absent" from "present, empty value". + value_of :: (self: *Parsed, name: string) -> string { + j := 0; + while j < self.spec.len { + if self.spec[j].name == name { return self.values[j].value; } + j += 1; + } + return ""; + } +} + +// True for a long-option token (`--x`). A single dash or a bare word is +// not a flag. Exactly `--` is the separator, tested before this. +is_long_flag :: (s: string) -> bool { + return s.len >= 2 and s[0] == 45 and s[1] == 45; // 45 = '-' +} + +// Parse `args` (the logical argv) against the `commands` table, writing +// the offending token into `diag` on the error path. See the section +// header for grammar, failure contract, and heap discipline. +parse :: (args: []string, commands: []Command, diag: *Diag) -> (Parsed, !CliError) { + // ── Dispatch: match (args[0], args[1]) against the command table ── + if args.len < 2 { + diag.index = if args.len == 0 then -1 else 0; + diag.token = if args.len == 0 then "" else args[0]; + raise error.UnknownCommand; + } + group := args[0]; + command := args[1]; + ci := -1; + k := 0; + while k < commands.len { + if commands[k].group == group and commands[k].command == command { ci = k; break; } + k += 1; + } + if ci < 0 { + diag.index = 1; + diag.token = command; + raise error.UnknownCommand; + } + spec := commands[ci].flags; + if spec.len > 16 { + diag.index = -1; + diag.token = command; + raise error.TooManyFlags; + } + + // ── Result skeleton ── + // Clear ALL 16 `values` slots (not just the spec prefix): the whole + // struct must be live before it is returned by value. `rest` is an + // empty slice until a `--`/operand sets it. + result : Parsed = ---; + result.group = group; + result.command = command; + result.cmd_index = ci; + result.json = false; + result.spec = spec; + result.rest = args[args.len ..]; + j := 0; + while j < 16 { + result.values[j].set = false; + result.values[j].value = ""; + j += 1; + } + + // ── Flags ── + i := 2; + while i < args.len { + tok := args[i]; + if tok == "--" { // explicit separator: rest follows + result.rest = args[i + 1 ..]; + break; + } + if !is_long_flag(tok) { // first bare operand ends flag parsing + result.rest = args[i ..]; + break; + } + name := tok[2 ..]; + if name == "json" { // reserved global mode flag + result.json = true; + i += 1; + continue; + } + si := -1; + s := 0; + while s < spec.len { + if spec[s].name == name { si = s; break; } + s += 1; + } + if si < 0 { + diag.index = i; + diag.token = tok; + raise error.UnknownFlag; + } + if spec[si].takes_value { + if i + 1 >= args.len or is_long_flag(args[i + 1]) { + diag.index = i; + diag.token = tok; + raise error.MissingValue; + } + result.values[si].set = true; + result.values[si].value = args[i + 1]; // VIEW into args + i += 2; + } else { + result.values[si].set = true; + i += 1; + } + } + + // ── Required-flag check ── + r := 0; + while r < spec.len { + if spec[r].required and !result.values[r].set { + diag.index = -1; + diag.token = spec[r].name; + raise error.MissingRequired; + } + r += 1; + } + + return result; +}