url: strip invisible chars at decode + reject LRM/RLM at launch
NSDataDetector and android.util.Patterns occasionally drag invisible chars inside a URL when the source text uses them as soft separators — tab / LF / CR / NBSP / zero-width markers / BOM / word joiner. The OS URL parser then refuses the result and the tap silently fails. `_sanitizeUrl` strips that fixed set on the Dart side as matches come out of `_decode`, so the persisted V17 spans only carry openable bytes. Bidi controls (U+202A..E, U+2066..9) are left in — they're a spoofing primitive, not noise, and `_isLaunchable` rejects them outright. LRM / RLM (U+200E/F) are added to the same launch reject set so a legacy persisted URL that escaped the strip can't reach the OS handler.
This commit is contained in:
@@ -76,12 +76,14 @@ bool _isLaunchable(String url) {
|
||||
if (colon <= 0) return false;
|
||||
final scheme = url.substring(0, colon).toLowerCase();
|
||||
if (!_kLaunchSchemes.contains(scheme)) return false;
|
||||
// Bidi controls let a sender visually reverse the displayed URL while
|
||||
// the byte order — what the OS opens — points elsewhere. Reject any
|
||||
// URL containing them rather than guessing the user's intent.
|
||||
// Bidi controls + directional marks let a sender visually reverse the
|
||||
// displayed URL while the byte order — what the OS opens — points
|
||||
// elsewhere. Reject rather than guessing intent.
|
||||
for (int i = 0; i < url.length; i++) {
|
||||
final c = url.codeUnitAt(i);
|
||||
if ((c >= 0x202A && c <= 0x202E) || (c >= 0x2066 && c <= 0x2069)) {
|
||||
if (c == 0x200E || c == 0x200F /* LRM / RLM */ ||
|
||||
(c >= 0x202A && c <= 0x202E) ||
|
||||
(c >= 0x2066 && c <= 0x2069)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -93,6 +95,33 @@ bool _isLaunchable(String url) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Strips invisible / whitespace chars that NSDataDetector or
|
||||
/// `Patterns.WEB_URL` sometimes drag inside a URL — typically when the
|
||||
/// source text uses them as soft separators. Leaving them in produces a
|
||||
/// URL the OS refuses to open later. Bidi-override chars are NOT
|
||||
/// stripped here (they survive into [_isLaunchable] which rejects them
|
||||
/// outright; see comment there).
|
||||
String _sanitizeUrl(String url) {
|
||||
StringBuffer? out;
|
||||
for (int i = 0; i < url.length; i++) {
|
||||
final c = url.codeUnitAt(i);
|
||||
final strip =
|
||||
c == 0x09 /* tab */ ||
|
||||
c == 0x0A /* LF */ ||
|
||||
c == 0x0D /* CR */ ||
|
||||
c == 0xA0 /* NBSP */ ||
|
||||
(c >= 0x200B && c <= 0x200D) /* ZWSP / ZWNJ / ZWJ */ ||
|
||||
(c >= 0x2060 && c <= 0x206F) /* word joiner + invisible ops */ ||
|
||||
c == 0xFEFF /* BOM / ZWNBSP */;
|
||||
if (strip) {
|
||||
out ??= StringBuffer(url.substring(0, i));
|
||||
continue;
|
||||
}
|
||||
out?.writeCharCode(c);
|
||||
}
|
||||
return out?.toString() ?? url;
|
||||
}
|
||||
|
||||
/// One detected span inside a source string.
|
||||
class UrlMatch {
|
||||
/// Creates a span with UTF-16 [start]/[end] offsets and a canonical [url].
|
||||
@@ -232,7 +261,9 @@ List<UrlMatch> _decode(Uint8List buf) {
|
||||
final urlLen = view.getUint32(p + 12, Endian.little);
|
||||
p += 16;
|
||||
if (p + urlLen > buf.length) break;
|
||||
final url = utf8.decode(Uint8List.sublistView(buf, p, p + urlLen));
|
||||
final url = _sanitizeUrl(
|
||||
utf8.decode(Uint8List.sublistView(buf, p, p + urlLen)),
|
||||
);
|
||||
p += urlLen;
|
||||
out.add(UrlMatch(
|
||||
start: start,
|
||||
|
||||
Reference in New Issue
Block a user