F2.2: reject raw control bytes (U+0000..U+001F) in JSON strings
parse_string scanned for `"` and `\` but accepted every other byte, including raw control characters. RFC 8259 §7 requires those bytes to be escaped inside a string; an unescaped one is invalid JSON and must surface a parse error, not be silently accepted. Add `BadControlChar` to JsonParseError and reject any unescaped byte < 0x20 in the string body scan (which gates the decode path too, so escaped forms like \t/\n/ still decode correctly; 0x20 and 0x7F are not over-rejected). Regression test in examples/0714: raw 0x09/0x0A/0x00 each raise BadControlChar via `?`/`!`; a positive case proves the escaped forms still decode to the right bytes. All prior assertions kept.
This commit is contained in:
@@ -42,6 +42,15 @@ raises :: (src: string, want: JsonParseError, alloc: Allocator) -> bool {
|
|||||||
e == want
|
e == want
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// True when parsing `"a<b>b"` (a string holding the RAW control byte `b`)
|
||||||
|
// raises BadControlChar. Built from a byte buffer because a raw control
|
||||||
|
// byte can't appear in an sx string literal.
|
||||||
|
ctrl_raises :: (b: u8, alloc: Allocator) -> bool {
|
||||||
|
raw : [5]u8 = ---;
|
||||||
|
raw[0] = 34; raw[1] = 97; raw[2] = b; raw[3] = 98; raw[4] = 34; // "a<b>b"
|
||||||
|
return raises(string.{ ptr = @raw[0], len = 5 }, error.BadControlChar, alloc);
|
||||||
|
}
|
||||||
|
|
||||||
main :: () -> ! {
|
main :: () -> ! {
|
||||||
gpa := GPA.init();
|
gpa := GPA.init();
|
||||||
arena := Arena.init(xx gpa, 8192);
|
arena := Arena.init(xx gpa, 8192);
|
||||||
@@ -125,6 +134,21 @@ main :: () -> ! {
|
|||||||
report("err-overflow", raises("9223372036854775808", error.BadNumber, xx arena));
|
report("err-overflow", raises("9223372036854775808", error.BadNumber, xx arena));
|
||||||
report("err-unterminated", raises("\"abc", error.UnexpectedEnd, xx arena));
|
report("err-unterminated", raises("\"abc", error.UnexpectedEnd, xx arena));
|
||||||
|
|
||||||
|
// ── 7. RFC 8259 §7: unescaped control bytes (U+0000..U+001F) ──────
|
||||||
|
// A RAW control byte inside a string is invalid JSON -> BadControlChar.
|
||||||
|
report("err-raw-tab", ctrl_raises(9, xx arena)); // raw 0x09
|
||||||
|
report("err-raw-lf", ctrl_raises(10, xx arena)); // raw 0x0A
|
||||||
|
report("err-raw-nul", ctrl_raises(0, xx arena)); // raw 0x00
|
||||||
|
|
||||||
|
// POSITIVE: the ESCAPED control forms stay valid and decode to the
|
||||||
|
// exact bytes. JSON "\t\n\u0009" -> 0x09 0x0A 0x09 (3 bytes).
|
||||||
|
esc := try parse("\"\\t\\n\\u0009\"", xx arena);
|
||||||
|
es := esc.str;
|
||||||
|
report("esc-ctrl-len", es.len == 3);
|
||||||
|
report("esc-tab", es[0] == 0x09); // \t
|
||||||
|
report("esc-lf", es[1] == 0x0A); // \n
|
||||||
|
report("esc-u", es[2] == 0x09); // \u0009
|
||||||
|
|
||||||
print("=== DONE ===\n");
|
print("=== DONE ===\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,4 +33,11 @@ err-fraction: ok
|
|||||||
err-leading-zero: ok
|
err-leading-zero: ok
|
||||||
err-overflow: ok
|
err-overflow: ok
|
||||||
err-unterminated: ok
|
err-unterminated: ok
|
||||||
|
err-raw-tab: ok
|
||||||
|
err-raw-lf: ok
|
||||||
|
err-raw-nul: ok
|
||||||
|
esc-ctrl-len: ok
|
||||||
|
esc-tab: ok
|
||||||
|
esc-lf: ok
|
||||||
|
esc-u: ok
|
||||||
=== DONE ===
|
=== DONE ===
|
||||||
|
|||||||
@@ -349,9 +349,11 @@ write_to_file :: (v: Value, file: *File, staging: []u8) -> !JsonError {
|
|||||||
//
|
//
|
||||||
// NOT SUPPORTED (rejected, not silently accepted): a fraction or exponent
|
// NOT SUPPORTED (rejected, not silently accepted): a fraction or exponent
|
||||||
// in a number (`1.5`, `1e9`) → `BadNumber`; a number outside s64 →
|
// in a number (`1.5`, `1e9`) → `BadNumber`; a number outside s64 →
|
||||||
// `BadNumber`; a leading-zero integer (`01`) → `BadNumber`. UNESCAPED raw
|
// `BadNumber`; a leading-zero integer (`01`) → `BadNumber`. An UNESCAPED
|
||||||
// control bytes (< 0x20) inside a string are passed through verbatim (the
|
// raw control byte (U+0000..U+001F) inside a string → `BadControlChar`
|
||||||
// minimal-reader leniency the manifest / db.json never exercise).
|
// (RFC 8259 §7 requires those bytes to be escaped); the escaped forms
|
||||||
|
// (`\t`, `\n`, `\u0009`, …) stay valid and decode normally. Bytes >= 0x20,
|
||||||
|
// including 0x7F (DEL) and UTF-8 continuation bytes (>= 0x80), pass through.
|
||||||
//
|
//
|
||||||
// HEAP DISCIPLINE (binding, see heap-discipline.md). Exactly two kinds of
|
// HEAP DISCIPLINE (binding, see heap-discipline.md). Exactly two kinds of
|
||||||
// allocation happen, both through the EXPLICIT `alloc` parameter, never
|
// allocation happen, both through the EXPLICIT `alloc` parameter, never
|
||||||
@@ -377,7 +379,7 @@ write_to_file :: (v: Value, file: *File, staging: []u8) -> !JsonError {
|
|||||||
|
|
||||||
// The reader's failure contract. Meaningful variants so a caller can tell
|
// The reader's failure contract. Meaningful variants so a caller can tell
|
||||||
// a truncated document from a bad escape from trailing junk.
|
// a truncated document from a bad escape from trailing junk.
|
||||||
JsonParseError :: error { UnexpectedToken, UnexpectedEnd, BadEscape, BadNumber, TrailingGarbage }
|
JsonParseError :: error { UnexpectedToken, UnexpectedEnd, BadEscape, BadNumber, TrailingGarbage, BadControlChar }
|
||||||
|
|
||||||
// Lowercase/uppercase hex nibble value (0..15) of an ASCII byte; a non-hex
|
// Lowercase/uppercase hex nibble value (0..15) of an ASCII byte; a non-hex
|
||||||
// byte in a `\uXXXX` escape is a `BadEscape`.
|
// byte in a `\uXXXX` escape is a `BadEscape`.
|
||||||
@@ -518,6 +520,11 @@ Parser :: struct {
|
|||||||
has_escape = true;
|
has_escape = true;
|
||||||
i += 1;
|
i += 1;
|
||||||
if i >= self.src.len { raise error.UnexpectedEnd; }
|
if i >= self.src.len { raise error.UnexpectedEnd; }
|
||||||
|
} else if c < 32 {
|
||||||
|
// RFC 8259 §7: a raw control byte (U+0000..U+001F) must be
|
||||||
|
// escaped inside a string; an unescaped one is invalid JSON.
|
||||||
|
self.pos = i;
|
||||||
|
raise error.BadControlChar;
|
||||||
}
|
}
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user