metal: pause step 3b pending sx-side fixes (filed 0024-0030)

Step 3b code is wired across UIRenderer + GlyphCache + UIPipeline +
chess game (gpu_mode = .metal on iOS, MetalGPU bound via the GPU
protocol). macOS GL chess, iOS-sim GLES chess, and iOS-sim Metal
triangle (63-metal-clear.sx) all still render.

iOS-sim Metal chess crashes inside replaceRegion uploading the 1MB
font atlas. Bisecting that crash exposed several sx-language issues
where mid-bisect tracers (NSLog inside if/else branch bodies) didn't
produce output, blocking further investigation.

Filing each finding as examples/issue-NNNN.sx rather than working
around piecemeal:

Bugs:
- 0024 NSLog/foreign-call inside if/else body not producing output
- 0025 C-ABI param coercion incomplete for composites >16B
       (combined direct-call abiCoerceParamType TODO + call_indirect
        path that doesn't apply C-ABI coercion at all)
- 0026 replaceRegion 1MB upload crash (likely downstream of 0025)

Features needed for step 4 + cleanup:
- 0027 Obj-C block bridge (^{...}) for animateWithDuration:
- 0028 Optional protocol box (?GPU = null) replaces T = ---; has_T: bool
- 0029 destroy_texture/buffer/shader on GPU protocol
- 0030 extern cross-file globals

Library-side: renderer.sx + glyph_cache.sx + pipeline.sx gain a
`gpu: GPU = ---; has_gpu: bool` field pair + branches that route every
GL touchpoint through the protocol when has_gpu. glyph_cache.init
saves/restores those fields around its memset. pipeline.set_gpu()
propagates to renderer + font. Renderer's MSL shader source added as
UI_MSL_SRC using packed_float2/packed_float4 to keep the 12-float
interleaved vertex layout tight (48 bytes).

metal.sx: dual-phase init (init(null, 0, 0) for eager device+queue,
re-init with the layer once UIKit installs the SxMetalView).
setStorageMode:.shared on every texture descriptor to ensure CPU-
writable atlas pixels on Apple Silicon iOS-sim.

Regression suite: 68 passing, 0 failed. WASM chess build currently
broken under step 3b state (silent compiler crash); documented in
CHECKPOINT.md, likely fallout from one of the filed issues (probably
0028 — the verbose protocol-box pattern). Step 3b resumes after
0024-0030 land.
This commit is contained in:
agra
2026-05-17 21:17:17 +03:00
parent a938c4f900
commit a1647eab9b
11 changed files with 783 additions and 97 deletions

View File

@@ -28,6 +28,12 @@ MTL_PIXEL_FORMAT_R8_UNORM :u64: 10;
MTL_LOAD_ACTION_CLEAR :u64: 2;
MTL_STORE_ACTION_STORE :u64: 1;
// MTLStorageMode. For UI atlases + sprites the CPU needs to write pixels
// and the GPU needs to sample — `.shared` is the safe default. On iOS-sim
// under Apple Silicon the convenience class method's default storage
// isn't reliably shared, so we set it explicitly in metal_create_texture_ios.
MTL_STORAGE_MODE_SHARED :u64: 0;
// MTLPrimitiveType.
MTL_PRIMITIVE_TYPE_TRIANGLE :u64: 3;
@@ -84,11 +90,18 @@ MetalGPU :: struct {
}
impl GPU for MetalGPU {
// Two-phase init: callers can `init(null, 0, 0)` first to allocate
// device + queue eagerly (lets the UI pipeline compile shaders before
// UIKit hands us a layer), then re-call `init(layer, w, h)` once the
// CAMetalLayer is available. The second call only updates the layer
// ref + dims; device/queue are preserved.
init :: (self: *MetalGPU, target: *void, pixel_w: s32, pixel_h: s32) -> bool {
inline if OS != .ios { return false; }
self.layer = target;
self.pixel_w = pixel_w;
self.pixel_h = pixel_h;
if target != null {
self.layer = target;
self.pixel_w = pixel_w;
self.pixel_h = pixel_h;
}
metal_init_ios(self);
}
@@ -200,12 +213,19 @@ impl GPU for MetalGPU {
// so non-iOS builds never reference the unresolved Metal symbols below.
// ───────────────────────────────────────────────────────────────────────────
// init() may be called twice: once with target==null to create device +
// queue eagerly (so the UI pipeline can compile shaders before UIKit
// has a layer for us), then again with target=CAMetalLayer once
// `-[SxAppDelegate didFinishLaunching:]` has installed the view.
// Both calls go through this helper; it's idempotent on the device/queue
// and only touches the layer when one's been supplied.
metal_init_ios :: (self: *MetalGPU) -> bool {
inline if OS != .ios { return false; }
if self.layer == null { return false; }
self.device = MTLCreateSystemDefaultDevice();
if self.device == null { return false; }
if self.device == null {
self.device = MTLCreateSystemDefaultDevice();
if self.device == null { return false; }
}
msg_oo : (*void, *void, *void) -> void = xx objc_msgSend;
msg_ou : (*void, *void, u64) -> void = xx objc_msgSend;
@@ -213,15 +233,19 @@ metal_init_ios :: (self: *MetalGPU) -> bool {
msg_osize : (*void, *void, CGSize) -> void = xx objc_msgSend;
msg_o : (*void, *void) -> *void = xx objc_msgSend;
msg_oo(self.layer, sel_registerName("setDevice:".ptr), self.device);
msg_ou(self.layer, sel_registerName("setPixelFormat:".ptr), MTL_PIXEL_FORMAT_BGRA8_UNORM);
msg_ob(self.layer, sel_registerName("setFramebufferOnly:".ptr), 1);
if self.queue == null {
self.queue = msg_o(self.device, sel_registerName("newCommandQueue".ptr));
if self.queue == null { return false; }
}
size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h };
msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size);
if self.layer != null {
msg_oo(self.layer, sel_registerName("setDevice:".ptr), self.device);
msg_ou(self.layer, sel_registerName("setPixelFormat:".ptr), MTL_PIXEL_FORMAT_BGRA8_UNORM);
msg_ob(self.layer, sel_registerName("setFramebufferOnly:".ptr), 1);
self.queue = msg_o(self.device, sel_registerName("newCommandQueue".ptr));
if self.queue == null { return false; }
size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h };
msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size);
}
true;
}
@@ -457,6 +481,12 @@ metal_create_texture_ios :: (self: *MetalGPU, w: s32, h: s32, format: TextureFor
pixel_format, xx w, xx h, 0);
if desc == null { return 0; }
// Force shared storage so the CPU can keep writing pixels (atlas updates,
// sprite uploads). On iOS-sim under Apple Silicon the convenience class
// method's default storage isn't reliably shared for every format.
msg_ou_void : (*void, *void, u64) -> void = xx objc_msgSend;
msg_ou_void(desc, sel_registerName("setStorageMode:".ptr), MTL_STORAGE_MODE_SHARED);
msg_oo : (*void, *void, *void) -> *void = xx objc_msgSend;
tex := msg_oo(self.device, sel_registerName("newTextureWithDescriptor:".ptr), desc);
if tex == null { return 0; }

View File

@@ -1,5 +1,7 @@
#import "modules/std.sx";
#import "modules/opengl.sx";
#import "modules/gpu/types.sx";
#import "modules/gpu/api.sx";
#import "modules/stb_truetype.sx";
#import "modules/ui/types.sx";
@@ -176,9 +178,20 @@ GlyphCache :: struct {
last_shape_len: s64;
last_shape_size_q: u16;
// GPU protocol backend. When `has_gpu`, atlas creation + dirty uploads
// route through `gpu` instead of raw GL.
gpu: GPU = ---;
has_gpu: bool = false;
init :: (self: *GlyphCache, path: [:0]u8, default_size: f32) {
// Preserve any pre-set GPU dispatch across the zero-out — the
// surrounding struct memset would otherwise wipe it.
saved_gpu := self.gpu;
saved_has_gpu := self.has_gpu;
// Zero out the entire struct first (parent may be uninitialized with = ---)
memset(self, 0, size_of(GlyphCache));
self.gpu = saved_gpu;
self.has_gpu = saved_has_gpu;
// Load font file
file_size : s32 = 0;
@@ -245,15 +258,25 @@ GlyphCache :: struct {
val_bytes : s64 = self.hash_cap * 8; // s64 per slot (s32 would suffice but alignment)
self.hash_vals = xx context.allocator.alloc(val_bytes);
// Create OpenGL texture
glGenTextures(1, @self.texture_id);
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, self.atlas_width, self.atlas_height, 0, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
// Create the atlas texture. In GPU-protocol mode we create empty and
// let the first `flush()` push the (zero-initialized) bitmap via
// update_texture_region — same result as the GL path's glTexImage2D
// with the zeroed bitmap, but works whether or not the backend
// accepts CPU pixel pointers at create time.
if self.has_gpu {
self.texture_id = self.gpu.create_texture(
self.atlas_width, self.atlas_height, .r8, null);
self.dirty = true;
} else {
glGenTextures(1, @self.texture_id);
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, self.atlas_width, self.atlas_height, 0, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
}
out("GlyphCache initialized: ");
out(path);
@@ -406,9 +429,14 @@ GlyphCache :: struct {
// Upload dirty atlas to GPU
flush :: (self: *GlyphCache) {
if self.dirty == false { return; }
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, self.atlas_width, self.atlas_height, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
if self.has_gpu {
self.gpu.update_texture_region(self.texture_id, 0, 0,
self.atlas_width, self.atlas_height, xx self.bitmap);
} else {
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, self.atlas_width, self.atlas_height, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
}
self.dirty = false;
}
@@ -464,16 +492,23 @@ GlyphCache :: struct {
self.atlas_width = new_w;
self.atlas_height = new_h;
// Recreate GL texture
glDeleteTextures(1, @self.texture_id);
glGenTextures(1, @self.texture_id);
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, new_w, new_h, 0, GL_RED, GL_UNSIGNED_BYTE, new_bitmap);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
// Recreate atlas at the new size.
if self.has_gpu {
// No destroy_texture in the GPU protocol yet — old atlas
// leaks in the backend table until process exit. Atlas grow
// is rare so this is acceptable for now.
self.texture_id = self.gpu.create_texture(new_w, new_h, .r8, xx new_bitmap);
} else {
glDeleteTextures(1, @self.texture_id);
glGenTextures(1, @self.texture_id);
glBindTexture(GL_TEXTURE_2D, self.texture_id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, new_w, new_h, 0, GL_RED, GL_UNSIGNED_BYTE, new_bitmap);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
}
// Recompute UV coordinates for all cached glyphs
atlas_wf : f32 = xx new_w;

View File

@@ -1,6 +1,7 @@
#import "modules/std.sx";
#import "modules/allocators.sx";
#import "modules/opengl.sx";
#import "modules/gpu/api.sx";
#import "modules/ui/types.sx";
#import "modules/ui/render.sx";
#import "modules/ui/events.sx";
@@ -24,6 +25,23 @@ UIPipeline :: struct {
has_body: bool;
parent_allocator: Allocator;
// GPU protocol backend. When `has_gpu`, the pipeline propagates this
// to its renderer + font, and skips the per-frame GL state setup in
// commit_gpu (Metal bakes blend mode into the pipeline state).
gpu: GPU = ---;
has_gpu: bool = false;
// Set the GPU dispatch BEFORE calling init() / init_font() so the
// shaders + atlas land on the right backend.
set_gpu :: (self: *UIPipeline, gpu: GPU) {
self.gpu = gpu;
self.has_gpu = true;
self.renderer.gpu = gpu;
self.renderer.has_gpu = true;
self.font.gpu = gpu;
self.font.has_gpu = true;
}
init :: (self: *UIPipeline, width: f32, height: f32) {
self.render_tree = RenderTree.init();
self.renderer.init();
@@ -149,14 +167,18 @@ UIPipeline :: struct {
}
commit_gpu :: (self: *UIPipeline) {
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_DEPTH_TEST);
if !self.has_gpu {
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_DEPTH_TEST);
}
self.renderer.begin(self.screen_width, self.screen_height, self.font.texture_id);
self.renderer.process(@self.render_tree);
self.renderer.flush();
glDisable(GL_BLEND);
if !self.has_gpu {
glDisable(GL_BLEND);
}
}
}

View File

@@ -2,6 +2,8 @@
#import "modules/compiler.sx";
#import "modules/opengl.sx";
#import "modules/math";
#import "modules/gpu/types.sx";
#import "modules/gpu/api.sx";
#import "modules/ui/types.sx";
#import "modules/ui/render.sx";
#import "modules/ui/glyph_cache.sx";
@@ -13,62 +15,81 @@ UI_VERTEX_BYTES :s64: 48;
MAX_UI_VERTICES :s64: 16384;
UIRenderer :: struct {
// GL-side handles. Used when `gpu == null` (every non-iOS target today).
vao: u32;
vbo: u32;
shader: u32;
proj_loc: s32;
tex_loc: s32;
// CPU-side vertex scratch buffer — same for both backends.
vertices: [*]f32;
vertex_count: s64;
screen_width: f32;
screen_height: f32;
dpi_scale: f32;
white_texture: u32;
white_texture: u32; // GL name OR TextureHandle (both are u32-shaped)
current_texture: u32;
draw_calls: s64;
init :: (self: *UIRenderer) {
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
inline if OS == .wasm or OS == .ios {
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
} else {
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
}
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
// GPU protocol backend. When `has_gpu`, the renderer routes shader /
// buffer / texture / draw calls through `gpu` instead of raw GL. The
// chess game sets this on iOS to a boxed `*MetalGPU`.
gpu: GPU = ---;
has_gpu: bool = false;
mtl_shader: ShaderHandle = 0;
mtl_vbuf: BufferHandle = 0;
// Allocate vertex buffer (CPU side)
init :: (self: *UIRenderer) {
// Allocate vertex scratch (CPU side) — same for both backends.
buf_size := MAX_UI_VERTICES * UI_VERTEX_BYTES;
self.vertices = xx context.allocator.alloc(buf_size);
memset(self.vertices, 0, buf_size);
self.vertex_count = 0;
// Create VAO/VBO
glGenVertexArrays(1, @self.vao);
glGenBuffers(1, @self.vbo);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
// pos (2 floats)
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
glEnableVertexAttribArray(0);
// uv (2 floats)
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
glEnableVertexAttribArray(1);
// color (4 floats)
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
glEnableVertexAttribArray(2);
// params: corner_radius, border_width, rect_w, rect_h
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
glEnableVertexAttribArray(3);
glBindVertexArray(0);
self.dpi_scale = 1.0;
// 1x1 white texture for solid rects
self.white_texture = create_white_texture();
if self.has_gpu {
// ── Metal backend (via GPU protocol) ───────────────────────
self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, "");
self.mtl_vbuf = self.gpu.create_buffer(buf_size);
white_px : [4]u8 = .[255, 255, 255, 255];
self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]);
} else {
// ── GL backend ─────────────────────────────────────────────
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
inline if OS == .wasm or OS == .ios {
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
} else {
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
}
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
// Create VAO/VBO
glGenVertexArrays(1, @self.vao);
glGenBuffers(1, @self.vbo);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
// pos (2 floats)
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
glEnableVertexAttribArray(0);
// uv (2 floats)
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
glEnableVertexAttribArray(1);
// color (4 floats)
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
glEnableVertexAttribArray(2);
// params: corner_radius, border_width, rect_w, rect_h
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
glEnableVertexAttribArray(3);
glBindVertexArray(0);
// 1x1 white texture for solid rects
self.white_texture = create_white_texture();
}
}
begin :: (self: *UIRenderer, width: f32, height: f32, font_texture: u32) {
@@ -78,15 +99,26 @@ UIRenderer :: struct {
self.current_texture = font_texture;
self.draw_calls = 0;
// Set up GL state once for the entire frame
glUseProgram(self.shader);
proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0);
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
glUniform1i(self.tex_loc, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, font_texture);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
if self.has_gpu {
// Pipeline state + vertex buffer + projection + initial texture.
// Metal blend mode + scissor-cleared defaults are baked into
// the pipeline state, so no per-frame glEnable/glDisable.
self.gpu.set_shader(self.mtl_shader);
self.gpu.set_vertex_buffer(self.mtl_vbuf);
self.gpu.set_vertex_constants(1, xx proj.data, 64);
self.gpu.set_texture(0, font_texture);
} else {
// GL: bind everything for the frame.
glUseProgram(self.shader);
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
glUniform1i(self.tex_loc, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, font_texture);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
}
}
bind_texture :: (self: *UIRenderer, tex: u32) {
@@ -202,18 +234,33 @@ UIRenderer :: struct {
}
case .clip_push: {
self.flush();
glEnable(GL_SCISSOR_TEST);
dpi := self.dpi_scale;
glScissor(
xx (node.frame.origin.x * dpi),
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi)
);
if self.has_gpu {
// Metal: pixel coords, top-left origin (no Y flip).
self.gpu.set_scissor(
xx (node.frame.origin.x * dpi),
xx (node.frame.origin.y * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi),
);
} else {
// GL: pixel coords, bottom-left origin — flip Y.
glEnable(GL_SCISSOR_TEST);
glScissor(
xx (node.frame.origin.x * dpi),
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi)
);
}
}
case .clip_pop: {
self.flush();
glDisable(GL_SCISSOR_TEST);
if self.has_gpu {
self.gpu.disable_scissor();
} else {
glDisable(GL_SCISSOR_TEST);
}
}
case .opacity_push: {}
case .opacity_pop: {}
@@ -225,13 +272,22 @@ UIRenderer :: struct {
flush :: (self: *UIRenderer) {
if self.vertex_count == 0 { return; }
// Only bind the current texture (program, projection, VAO already bound in begin())
glBindTexture(GL_TEXTURE_2D, self.current_texture);
upload_size : s64 = self.vertex_count * UI_VERTEX_BYTES;
// Use glBufferData to orphan the old buffer and avoid GPU sync stalls
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
if self.has_gpu {
// Mirror the GL path: bind current texture before drawing.
// current_texture may have changed since the last flush.
self.gpu.set_texture(0, self.current_texture);
self.gpu.update_buffer(self.mtl_vbuf, xx self.vertices, upload_size);
self.gpu.draw_triangles(0, xx self.vertex_count);
} else {
// Only re-bind the current texture (program, projection, VAO
// already bound in begin()). glBufferData orphans the old buffer
// to avoid GPU sync stalls.
glBindTexture(GL_TEXTURE_2D, self.current_texture);
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
}
self.vertex_count = 0;
self.draw_calls += 1;
@@ -458,3 +514,87 @@ void main() {
}
}
GLSL;
// --- Metal (MSL) — single library with vmain/fmain entry points ---
//
// `packed_float2 / packed_float4` keep the 12-float interleaved vertex
// layout (pos2 / uv2 / color4 / params4 = 48 bytes) without padding —
// MSL's default `float4` has 16-byte alignment and would force a 64-byte
// struct (see examples/63-metal-clear.sx for the gotcha).
//
// Uniform passing: GL uses `glUniformMatrix4fv("uProj", proj)`; Metal
// receives the projection via `setVertexBytes:length:atIndex:1` (slot 0
// is the vertex buffer). Texture binding goes through
// `setFragmentTexture:atIndex:0`.
UI_MSL_SRC :: #string MSL
#include <metal_stdlib>
using namespace metal;
struct UIVertex {
packed_float2 pos;
packed_float2 uv;
packed_float4 color;
packed_float4 params;
};
struct VOut {
float4 position [[position]];
float2 uv;
float4 color;
float4 params;
};
vertex VOut vmain(uint vid [[vertex_id]],
constant UIVertex* verts [[buffer(0)]],
constant float4x4& proj [[buffer(1)]]) {
UIVertex v = verts[vid];
VOut o;
o.position = proj * float4(v.pos, 0.0, 1.0);
o.uv = float2(v.uv);
o.color = float4(v.color);
o.params = float4(v.params);
return o;
}
static float roundedBoxSDF(float2 center, float2 half_size, float radius) {
float2 q = abs(center) - half_size + float2(radius);
return length(max(q, float2(0.0))) + min(max(q.x, q.y), 0.0) - radius;
}
fragment float4 fmain(VOut in [[stage_in]],
texture2d<float> tex [[texture(0)]]) {
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
float mode = in.params.x;
float border = in.params.y;
float2 rectSize = in.params.zw;
if (mode < -1.5) {
// Image mode (mode == -2.0): sample texture
return tex.sample(s, in.uv) * in.color;
} else if (mode < 0.0) {
// Text mode (mode == -1.0): sample glyph atlas .r as alpha
float alpha = tex.sample(s, in.uv).r;
float ew = fwidth(alpha) * 0.7;
alpha = smoothstep(0.5 - ew, 0.5 + ew, alpha);
return float4(in.color.rgb, in.color.a * pow(alpha, 0.9));
} else if (mode > 0.0 || border > 0.0) {
// Rounded rect: SDF alpha, vertex color only
float2 half_size = rectSize * 0.5;
float2 center = (in.uv - float2(0.5)) * rectSize;
float dist = roundedBoxSDF(center, half_size, mode);
float aa = fwidth(dist);
float alpha = 1.0 - smoothstep(-aa, aa, dist);
if (border > 0.0) {
float inner = roundedBoxSDF(center, half_size - float2(border), max(mode - border, 0.0));
float border_alpha = smoothstep(-aa, aa, inner);
alpha = alpha * max(border_alpha, 0.0);
}
return float4(in.color.rgb, in.color.a * alpha);
} else {
// Plain rect: vertex color only
return in.color;
}
}
MSL;