F2.3: pin std.json round-trip + malformed-input suite (examples/0715)
Add 0715-modules-json-suite as the single comprehensive pinned suite for std.json (mirrors 0711 for std.hash), alongside the focused 0713/0714 demos: - ROUND-TRIP build->write->parse->write over a document covering EVERY value kind (a string with every escape form \" \\ \b \f \n \r \t plus a \u00XX control, integers 0 / negative / s64 MIN / s64 MAX, bool, null, array, nested object) with insertion-order assertions, exact writer bytes, and parse-then-rewrite idempotence. - DECODE positives: \/, the full named-escape set, \uXXXX (BMP 1- and 2-byte) plus a surrogate pair, the escaped control forms, and raw multi-byte UTF-8 round-tripping through writer + reader. - MALFORMED matrix: one assertion per JsonParseError variant and its key edges (UnexpectedToken, UnexpectedEnd, BadEscape, BadNumber incl. leading zero / lone '-' / fraction / exponent / overflow, TrailingGarbage, BadControlChar), each asserted to raise. Pure test work: src/ and library/ untouched, no json.sx change needed. Every model is built through an explicit Arena allocator (heap discipline).
This commit is contained in:
223
examples/0715-modules-json-suite.sx
Normal file
223
examples/0715-modules-json-suite.sx
Normal file
@@ -0,0 +1,223 @@
|
||||
// Comprehensive pinned suite for `modules/std/json.sx` (writer F2.1 +
|
||||
// reader F2.2). Mirrors what 0711 did for std.hash: it LOCKS IN the full
|
||||
// round-trip and the complete malformed-input matrix as one coherent
|
||||
// pinned example. (0713/0714 stay as the focused writer/reader demos with
|
||||
// their heap-discipline narrative; this file is the correctness lock-in.)
|
||||
//
|
||||
// PART A — ROUND-TRIP. Build a representative document covering EVERY
|
||||
// value kind (nested object + array, a string carrying every escape
|
||||
// kind `\" \\ \b \f \n \r \t` and a `\u00XX` control, integers 0 /
|
||||
// small-negative / s64 MIN (-9223372036854775808) / s64 MAX
|
||||
// (9223372036854775807), bool, null) through an explicit Arena, then
|
||||
// `build -> write -> parse -> write`: assert the writer's EXACT bytes,
|
||||
// assert `parse` then re-`write` reproduces them (idempotent), and
|
||||
// spot-check the parsed tree's STRUCTURE incl. INSERTION ORDER.
|
||||
// PART B — DECODE POSITIVES. `\/`, the full named-escape set, `\uXXXX`
|
||||
// (BMP 1- and 2-byte) and a SURROGATE PAIR, the escaped control forms,
|
||||
// and raw multi-byte UTF-8 round-tripping through writer + reader.
|
||||
// PART C — MALFORMED MATRIX. One assertion per `JsonParseError` variant
|
||||
// and its key edges, each asserted to RAISE (never crash, never accept).
|
||||
//
|
||||
// Every model is built through an explicit Arena allocator (heap
|
||||
// discipline): scalars carry no heap, string values are views, composites
|
||||
// and decoded strings go through `alloc`, and the writer allocates nothing.
|
||||
|
||||
#import "modules/std.sx";
|
||||
#import "modules/std/json.sx";
|
||||
|
||||
// The writer's EXACT output for the PART A document (insertion order,
|
||||
// canonical escaping). Hand-pinned so a writer regression fails loudly in
|
||||
// the example itself, not only in the captured golden.
|
||||
EXPECT :: "{\"esc\":\"\\\"\\\\\\b\\t\\n\\f\\r\\u0001\",\"zero\":0,\"neg\":-7,\"min\":-9223372036854775808,\"max\":9223372036854775807,\"ok\":true,\"nil\":null,\"xs\":[1,-2,3],\"nested\":{\"k\":\"v\"}}";
|
||||
|
||||
report :: (label: string, ok: bool) {
|
||||
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
|
||||
}
|
||||
|
||||
// Half-open containment [lo, hi).
|
||||
in_range :: (x: s64, lo: s64, hi: s64) -> bool {
|
||||
return x >= lo and x < hi;
|
||||
}
|
||||
|
||||
// True when `parse(src)` raised exactly `want`. Destructure captures the
|
||||
// error tag without `try`, so a malformed input never aborts the example.
|
||||
raises :: (src: string, want: JsonParseError, alloc: Allocator) -> bool {
|
||||
_, e := parse(src, alloc);
|
||||
e == want
|
||||
}
|
||||
|
||||
// True when parsing `"a<b>b"` (a string holding the RAW control byte `b`)
|
||||
// raises BadControlChar. Built from a byte buffer because a raw control
|
||||
// byte can't appear in an sx string literal.
|
||||
ctrl_raises :: (b: u8, alloc: Allocator) -> bool {
|
||||
raw : [5]u8 = ---;
|
||||
raw[0] = 34; raw[1] = 97; raw[2] = b; raw[3] = 98; raw[4] = 34; // "a<b>b"
|
||||
return raises(string.{ ptr = @raw[0], len = 5 }, error.BadControlChar, alloc);
|
||||
}
|
||||
|
||||
// Build the PART A document: every value kind, in the insertion order the
|
||||
// writer must emit. The `esc` value carries one byte per escape kind; its
|
||||
// bytes are backed by `alloc` (string values are VIEWS, so they must
|
||||
// outlive `build`).
|
||||
build :: (alloc: Allocator) -> Value {
|
||||
ebytes : [*]u8 = xx alloc.alloc(8);
|
||||
ebytes[0] = 34; // " -> \"
|
||||
ebytes[1] = 92; // \ -> \\
|
||||
ebytes[2] = 8; // BS -> \b
|
||||
ebytes[3] = 9; // TAB -> \t
|
||||
ebytes[4] = 10; // LF -> \n
|
||||
ebytes[5] = 12; // FF -> \f
|
||||
ebytes[6] = 13; // CR -> \r
|
||||
ebytes[7] = 1; // SOH -> (control with no named shorthand)
|
||||
esc := string.{ ptr = ebytes, len = 8 };
|
||||
|
||||
nested : Object = .{};
|
||||
nested.put("k", .str("v"), alloc);
|
||||
|
||||
xs : Array = .{};
|
||||
xs.add(.int_(1), alloc);
|
||||
xs.add(.int_(0 - 2), alloc);
|
||||
xs.add(.int_(3), alloc);
|
||||
|
||||
obj : Object = .{};
|
||||
obj.put("esc", .str(esc), alloc);
|
||||
obj.put("zero", .int_(0), alloc);
|
||||
obj.put("neg", .int_(0 - 7), alloc);
|
||||
// s64 MIN: |MIN| is not a representable positive s64 literal, so build
|
||||
// it as MAX-positive minus one.
|
||||
obj.put("min", .int_(0 - 9223372036854775807 - 1), alloc);
|
||||
obj.put("max", .int_(9223372036854775807), alloc);
|
||||
obj.put("ok", .bool_(true), alloc);
|
||||
obj.put("nil", .null_, alloc);
|
||||
obj.put("xs", .array(xs), alloc);
|
||||
obj.put("nested", .object(nested), alloc);
|
||||
return .object(obj);
|
||||
}
|
||||
|
||||
main :: () -> ! {
|
||||
gpa := GPA.init();
|
||||
arena := Arena.init(xx gpa, 16384);
|
||||
defer arena.deinit();
|
||||
a : Allocator = xx arena;
|
||||
|
||||
// ── PART A. build -> write -> parse -> write ─────────────────────────
|
||||
root := build(a);
|
||||
|
||||
buf : [512]u8 = ---;
|
||||
n := try write_to_buffer(root, string.{ ptr = @buf[0], len = 512 });
|
||||
canon := string.{ ptr = @buf[0], len = n };
|
||||
print("doc: {}\n", canon); // golden pins the exact bytes
|
||||
report("rt-exact", canon == EXPECT);
|
||||
report("rt-len", n == EXPECT.len);
|
||||
|
||||
// parse the writer's output, then re-serialize: must reproduce it byte
|
||||
// for byte (writer/reader are inverses on the canonical form).
|
||||
tree2 := try parse(canon, a);
|
||||
buf2 : [512]u8 = ---;
|
||||
n2 := try write_to_buffer(tree2, string.{ ptr = @buf2[0], len = 512 });
|
||||
canon2 := string.{ ptr = @buf2[0], len = n2 };
|
||||
report("rt-idempotent", canon2 == canon);
|
||||
|
||||
// Structure of the parsed tree: insertion order + every value kind.
|
||||
o := tree2.object;
|
||||
report("st-count", o.len == 9);
|
||||
report("st-order",
|
||||
o.items[0].key == "esc" and o.items[1].key == "zero" and
|
||||
o.items[2].key == "neg" and o.items[3].key == "min" and
|
||||
o.items[4].key == "max" and o.items[5].key == "ok" and
|
||||
o.items[6].key == "nil" and o.items[7].key == "xs" and
|
||||
o.items[8].key == "nested");
|
||||
// The escaped string survives the round-trip back to its 8 raw bytes.
|
||||
eexp : [8]u8 = ---;
|
||||
eexp[0] = 34; eexp[1] = 92; eexp[2] = 8; eexp[3] = 9;
|
||||
eexp[4] = 10; eexp[5] = 12; eexp[6] = 13; eexp[7] = 1;
|
||||
report("st-esc", o.items[0].val.str == string.{ ptr = @eexp[0], len = 8 });
|
||||
report("st-zero", o.items[1].val.int_ == 0);
|
||||
report("st-neg", o.items[2].val.int_ == 0 - 7);
|
||||
report("st-min", o.items[3].val.int_ == 0 - 9223372036854775807 - 1);
|
||||
report("st-max", o.items[4].val.int_ == 9223372036854775807);
|
||||
report("st-bool", o.items[5].val.bool_ == true);
|
||||
is_null := if o.items[6].val == { case .null_: true; else: false; };
|
||||
report("st-null", is_null);
|
||||
xs := o.items[7].val.array;
|
||||
report("st-xs", xs.len == 3 and xs.items[0].int_ == 1 and
|
||||
xs.items[1].int_ == 0 - 2 and xs.items[2].int_ == 3);
|
||||
sub := o.items[8].val.object;
|
||||
report("st-nested", sub.len == 1 and sub.items[0].key == "k" and
|
||||
sub.items[0].val.str == "v");
|
||||
|
||||
// ── PART B. decode positives ─────────────────────────────────────────
|
||||
// `\/` decodes to a bare slash (the writer emits it unescaped, so this
|
||||
// is a parse-only form).
|
||||
slash := try parse("\"\\/\"", a);
|
||||
report("dec-slash", slash.str == "/");
|
||||
|
||||
// The full named-escape set in one string: \" \\ \/ \b \f \n \r \t.
|
||||
esc := try parse("\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"", a);
|
||||
sexp : [8]u8 = ---;
|
||||
sexp[0] = 34; sexp[1] = 92; sexp[2] = 47; sexp[3] = 8;
|
||||
sexp[4] = 12; sexp[5] = 10; sexp[6] = 13; sexp[7] = 9;
|
||||
report("dec-escapes", esc.str == string.{ ptr = @sexp[0], len = 8 });
|
||||
|
||||
// \uXXXX: BMP 1-byte (A), BMP 2-byte (é), and a SURROGATE PAIR (😀).
|
||||
// "Aé😀" -> 41 | C3 A9 | F0 9F 98 80 (7 bytes).
|
||||
uni := try parse("\"\\u0041\\u00e9\\uD83D\\uDE00\"", a);
|
||||
uexp : [7]u8 = ---;
|
||||
uexp[0] = 0x41; uexp[1] = 0xC3; uexp[2] = 0xA9;
|
||||
uexp[3] = 0xF0; uexp[4] = 0x9F; uexp[5] = 0x98; uexp[6] = 0x80;
|
||||
report("dec-surrogate", uni.str == string.{ ptr = @uexp[0], len = 7 });
|
||||
|
||||
// POSITIVE counterpart to BadControlChar: the ESCAPED control forms
|
||||
// backslash-t, backslash-n and backslash-u-0009 decode to 09 0A 09.
|
||||
ectrl := try parse("\"\\t\\n\\u0009\"", a);
|
||||
cexp : [3]u8 = ---;
|
||||
cexp[0] = 9; cexp[1] = 10; cexp[2] = 9;
|
||||
report("dec-esc-ctrl", ectrl.str == string.{ ptr = @cexp[0], len = 3 });
|
||||
|
||||
// Raw multi-byte UTF-8 (>= 0x80) round-trips writer -> reader unchanged.
|
||||
ubytes : [*]u8 = xx a.alloc(7);
|
||||
ubytes[0] = 0x41; ubytes[1] = 0xC3; ubytes[2] = 0xA9;
|
||||
ubytes[3] = 0xF0; ubytes[4] = 0x9F; ubytes[5] = 0x98; ubytes[6] = 0x80;
|
||||
uval : Value = .str(string.{ ptr = ubytes, len = 7 });
|
||||
ubuf : [64]u8 = ---;
|
||||
un := try write_to_buffer(uval, string.{ ptr = @ubuf[0], len = 64 });
|
||||
uback := try parse(string.{ ptr = @ubuf[0], len = un }, a);
|
||||
report("rt-utf8", uback.str == string.{ ptr = @ubytes[0], len = 7 });
|
||||
|
||||
// ── PART C. malformed-input matrix — one assertion per variant + edge ─
|
||||
// UnexpectedToken: bad literal, non-string key, missing comma.
|
||||
report("err-token-literal", raises("xyz", error.UnexpectedToken, a));
|
||||
report("err-token-key", raises("{1:2}", error.UnexpectedToken, a));
|
||||
report("err-token-comma", raises("[1 2]", error.UnexpectedToken, a));
|
||||
|
||||
// UnexpectedEnd: truncated object / array / string.
|
||||
report("err-end-object", raises("{\"a\":", error.UnexpectedEnd, a));
|
||||
report("err-end-array", raises("[1,", error.UnexpectedEnd, a));
|
||||
report("err-end-string", raises("\"abc", error.UnexpectedEnd, a));
|
||||
|
||||
// BadEscape: unknown escape, non-hex \u, high surrogate not followed by
|
||||
// a low surrogate.
|
||||
report("err-esc-unknown", raises("\"a\\xb\"", error.BadEscape, a));
|
||||
report("err-esc-bad-hex", raises("\"\\uZZZZ\"", error.BadEscape, a));
|
||||
report("err-esc-surrogate", raises("\"\\uD83D\\u0041\"", error.BadEscape, a));
|
||||
|
||||
// BadNumber: leading zero, lone minus, fraction, exponent, and an
|
||||
// integer just past s64 MAX (overflow).
|
||||
report("err-num-leadzero", raises("01", error.BadNumber, a));
|
||||
report("err-num-lonedash", raises("-", error.BadNumber, a));
|
||||
report("err-num-fraction", raises("1.5", error.BadNumber, a));
|
||||
report("err-num-exponent", raises("1e9", error.BadNumber, a));
|
||||
report("err-num-overflow", raises("9223372036854775808", error.BadNumber, a));
|
||||
|
||||
// TrailingGarbage: junk after a complete value.
|
||||
report("err-trail-array", raises("[1,2] x", error.TrailingGarbage, a));
|
||||
report("err-trail-scalar", raises("null x", error.TrailingGarbage, a));
|
||||
|
||||
// BadControlChar: a raw control byte (< 0x20) inside a string.
|
||||
report("err-ctrl-tab", ctrl_raises(9, a)); // raw 0x09
|
||||
report("err-ctrl-lf", ctrl_raises(10, a)); // raw 0x0A
|
||||
report("err-ctrl-nul", ctrl_raises(0, a)); // raw 0x00
|
||||
|
||||
print("=== DONE ===\n");
|
||||
return;
|
||||
}
|
||||
1
examples/expected/0715-modules-json-suite.exit
Normal file
1
examples/expected/0715-modules-json-suite.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
0
examples/expected/0715-modules-json-suite.stderr
Normal file
0
examples/expected/0715-modules-json-suite.stderr
Normal file
40
examples/expected/0715-modules-json-suite.stdout
Normal file
40
examples/expected/0715-modules-json-suite.stdout
Normal file
@@ -0,0 +1,40 @@
|
||||
doc: {"esc":"\"\\\b\t\n\f\r\u0001","zero":0,"neg":-7,"min":-9223372036854775808,"max":9223372036854775807,"ok":true,"nil":null,"xs":[1,-2,3],"nested":{"k":"v"}}
|
||||
rt-exact: ok
|
||||
rt-len: ok
|
||||
rt-idempotent: ok
|
||||
st-count: ok
|
||||
st-order: ok
|
||||
st-esc: ok
|
||||
st-zero: ok
|
||||
st-neg: ok
|
||||
st-min: ok
|
||||
st-max: ok
|
||||
st-bool: ok
|
||||
st-null: ok
|
||||
st-xs: ok
|
||||
st-nested: ok
|
||||
dec-slash: ok
|
||||
dec-escapes: ok
|
||||
dec-surrogate: ok
|
||||
dec-esc-ctrl: ok
|
||||
rt-utf8: ok
|
||||
err-token-literal: ok
|
||||
err-token-key: ok
|
||||
err-token-comma: ok
|
||||
err-end-object: ok
|
||||
err-end-array: ok
|
||||
err-end-string: ok
|
||||
err-esc-unknown: ok
|
||||
err-esc-bad-hex: ok
|
||||
err-esc-surrogate: ok
|
||||
err-num-leadzero: ok
|
||||
err-num-lonedash: ok
|
||||
err-num-fraction: ok
|
||||
err-num-exponent: ok
|
||||
err-num-overflow: ok
|
||||
err-trail-array: ok
|
||||
err-trail-scalar: ok
|
||||
err-ctrl-tab: ok
|
||||
err-ctrl-lf: ok
|
||||
err-ctrl-nul: ok
|
||||
=== DONE ===
|
||||
Reference in New Issue
Block a user