Files
ux/macos/Classes/url_detect.m
agra b4b5ee58a9 feat: UxUrl — native URL / phone / email detection + tap launcher
Sync FFI from Dart into platform detectors:
- iOS / macOS: NSDataDetector(.link | .phoneNumber) + a tight bare-domain
  pass that requires `/` or `?` (so `etc.` / `v1.2.3` don't false-positive
  while `example.com/path` does match). NFKD-fold the phone capture so
  full-width / Arabic-Indic digits collapse to ASCII; stop the digit run
  at the first letter so `+1 555 1234 ext.99` doesn't fuse the extension.
- Android: JNI into android.util.Patterns (WEB_URL / EMAIL_ADDRESS / PHONE)
  via a cached JavaVM, std::call_once for init, full per-call
  ExceptionCheck coverage. UTF-16→UTF-8 conversion is hand-rolled to dodge
  the Modified-UTF-8 / CESU-8 incompatibility with Dart's utf8.decode.

`UxUrl.launch(url)` is the matching tap action. Channel side dispatches via
UIApplication / NSWorkspace / Intent.ACTION_VIEW. Dart-side gates the URL
against a scheme allowlist (http, https, mailto, tel, sms, banlu, tg),
rejects bidi-override controls (U+202A..E / U+2066..9) to prevent visual
spoofs, and blocks USSD / MMI tel: codes containing `*` or `#`.

Library/native cleanup along the way:
- Renamed libux_keyboard.so to libux.so (also covers sensor + url).
- Collapsed three near-identical FFI loader stanzas across keyboard / sensor
  / url into a shared lib/src/_ffi.dart with `uxLib` + typed `uxLookupX`
  helpers.
2026-05-14 22:59:25 +03:00

212 lines
8.6 KiB
Objective-C

// Native data detection for UxUrl. Synchronous, callable via dart:ffi.
//
// Exports two symbols:
// uint8_t* ux_match_url(const uint16_t* utf16, int32_t len, int32_t* out_size);
// void ux_free(uint8_t* buf);
//
// Output buffer layout (little-endian on Apple ARM64 / x86_64):
// u32 count
// count * { i32 start, i32 end, u32 kind, u32 url_len, u8[url_len] url_utf8 }
// where kind ∈ { 0=web, 1=email, 2=phone }.
//
// Backed by NSDataDetector (.link | .phoneNumber). The detector itself
// won't flag bare domains like `google.com` (Apple's stock apps make
// the same trade-off), so we run a second pass with a tight regex that
// requires a `/` or `?` after the domain — that pulls in `example.com/path`
// without dragging in `etc.` / `v1.2.3` / `Mr.Smith` false positives.
#import <Foundation/Foundation.h>
#import <stdint.h>
#import <stdlib.h>
#import <string.h>
static const uint32_t kKindWeb = 0;
static const uint32_t kKindEmail = 1;
static const uint32_t kKindPhone = 2;
@interface UxUrlRawMatch : NSObject
@property (nonatomic) int32_t start;
@property (nonatomic) int32_t end;
@property (nonatomic) uint32_t kind;
@property (nonatomic, copy) NSData *urlUtf8;
@end
@implementation UxUrlRawMatch
@end
static NSDataDetector *ux_url_data_detector(void) {
static NSDataDetector *detector;
static dispatch_once_t once;
dispatch_once(&once, ^{
NSError *err = nil;
NSTextCheckingTypes types =
NSTextCheckingTypeLink | NSTextCheckingTypePhoneNumber;
detector = [NSDataDetector dataDetectorWithTypes:types error:&err];
});
return detector;
}
static NSRegularExpression *ux_url_bare_domain_regex(void) {
static NSRegularExpression *regex;
static dispatch_once_t once;
dispatch_once(&once, ^{
// Hostname segment + at least one dot-segment + `/` or `?` suffix.
// ASCII-only; bare IDN domains stay undetected.
NSString *pattern = @"\\b[a-z0-9-]+(\\.[a-z0-9-]+)+([/?][^\\s]*)";
NSError *err = nil;
regex = [NSRegularExpression
regularExpressionWithPattern:pattern
options:NSRegularExpressionCaseInsensitive
error:&err];
});
return regex;
}
static NSData *ux_utf8(NSString *s) {
return [s dataUsingEncoding:NSUTF8StringEncoding allowLossyConversion:YES];
}
__attribute__((visibility("default")))
uint8_t *ux_match_url(const uint16_t *utf16, int32_t len, int32_t *out_size) {
if (out_size) *out_size = 0;
if (utf16 == NULL || len <= 0) return NULL;
@autoreleasepool {
NSString *text = [[NSString alloc] initWithCharacters:(const unichar *)utf16
length:(NSUInteger)len];
if (text.length == 0) return NULL;
NSRange whole = NSMakeRange(0, text.length);
NSMutableArray<UxUrlRawMatch *> *raws = [NSMutableArray array];
NSDataDetector *detector = ux_url_data_detector();
if (detector != nil) {
[detector enumerateMatchesInString:text
options:0
range:whole
usingBlock:^(NSTextCheckingResult *result,
NSMatchingFlags flags,
BOOL *stop) {
if (result == nil) return;
const NSRange r = result.range;
if (r.location == NSNotFound || r.length == 0) return;
NSString *url = nil;
uint32_t kind = kKindWeb;
if (result.resultType == NSTextCheckingTypePhoneNumber) {
NSString *raw = result.phoneNumber ?: @"";
// Compat-decompose so full-width / Arabic-Indic digits
// collapse to ASCII before we filter — tel: URIs only
// accept [0-9+] per RFC 3966. Stop at the first
// letter run so "ext.99" extensions don't get fused
// into the dialed number.
NSString *folded = [raw decomposedStringWithCompatibilityMapping];
NSMutableString *digits = [NSMutableString stringWithCapacity:folded.length];
BOOL seenLetter = NO;
for (NSUInteger i = 0; i < folded.length; i++) {
unichar c = [folded characterAtIndex:i];
if ((c >= '0' && c <= '9') || c == '+') {
if (seenLetter) break;
[digits appendFormat:@"%C", c];
} else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
seenLetter = YES;
}
}
url = [NSString stringWithFormat:@"tel:%@", digits];
kind = kKindPhone;
} else if (result.resultType == NSTextCheckingTypeLink) {
NSURL *u = result.URL;
if (u == nil) return;
NSString *scheme = u.scheme.lowercaseString ?: @"";
kind = [scheme isEqualToString:@"mailto"] ? kKindEmail : kKindWeb;
url = u.absoluteString;
} else {
return;
}
if (url.length == 0) return;
UxUrlRawMatch *m = [[UxUrlRawMatch alloc] init];
m.start = (int32_t)r.location;
m.end = (int32_t)(r.location + r.length);
m.kind = kind;
m.urlUtf8 = ux_utf8(url);
[raws addObject:m];
}];
}
NSRegularExpression *bareRe = ux_url_bare_domain_regex();
if (bareRe != nil) {
[bareRe enumerateMatchesInString:text
options:0
range:whole
usingBlock:^(NSTextCheckingResult *result,
NSMatchingFlags flags,
BOOL *stop) {
if (result == nil) return;
const NSRange r = result.range;
if (r.location == NSNotFound || r.length == 0) return;
NSString *substr = [text substringWithRange:r];
NSString *withScheme = [@"http://" stringByAppendingString:substr];
UxUrlRawMatch *m = [[UxUrlRawMatch alloc] init];
m.start = (int32_t)r.location;
m.end = (int32_t)(r.location + r.length);
m.kind = kKindWeb;
m.urlUtf8 = ux_utf8(withScheme);
[raws addObject:m];
}];
}
if (raws.count == 0) return NULL;
// Sort: start asc, then length desc, then kind desc (phone > email > web on tie).
[raws sortUsingComparator:^NSComparisonResult(UxUrlRawMatch *a, UxUrlRawMatch *b) {
if (a.start != b.start) return a.start < b.start ? NSOrderedAscending : NSOrderedDescending;
int32_t la = a.end - a.start;
int32_t lb = b.end - b.start;
if (la != lb) return la > lb ? NSOrderedAscending : NSOrderedDescending;
if (a.kind != b.kind) return a.kind > b.kind ? NSOrderedAscending : NSOrderedDescending;
return NSOrderedSame;
}];
// Greedy de-overlap.
NSMutableArray<UxUrlRawMatch *> *kept = [NSMutableArray arrayWithCapacity:raws.count];
int32_t lastEnd = 0;
BOOL haveAny = NO;
for (UxUrlRawMatch *m in raws) {
if (haveAny && m.start < lastEnd) continue;
[kept addObject:m];
lastEnd = m.end;
haveAny = YES;
}
NSUInteger total = 4;
for (UxUrlRawMatch *m in kept) {
total += 16 + (NSUInteger)m.urlUtf8.length;
}
uint8_t *buf = (uint8_t *)malloc(total);
if (buf == NULL) return NULL;
uint32_t cnt = (uint32_t)kept.count;
memcpy(buf, &cnt, 4);
NSUInteger off = 4;
for (UxUrlRawMatch *m in kept) {
int32_t start = m.start;
int32_t end = m.end;
uint32_t kind = m.kind;
uint32_t urlLen = (uint32_t)m.urlUtf8.length;
memcpy(buf + off + 0, &start, 4);
memcpy(buf + off + 4, &end, 4);
memcpy(buf + off + 8, &kind, 4);
memcpy(buf + off + 12, &urlLen, 4);
memcpy(buf + off + 16, m.urlUtf8.bytes, urlLen);
off += 16 + urlLen;
}
if (out_size) *out_size = (int32_t)total;
return buf;
}
}
__attribute__((visibility("default")))
void ux_free(uint8_t *buf) {
if (buf != NULL) free(buf);
}