metal: pause step 3b pending sx-side fixes (filed 0024-0030)
Step 3b code is wired across UIRenderer + GlyphCache + UIPipeline +
chess game (gpu_mode = .metal on iOS, MetalGPU bound via the GPU
protocol). macOS GL chess, iOS-sim GLES chess, and iOS-sim Metal
triangle (63-metal-clear.sx) all still render.
iOS-sim Metal chess crashes inside replaceRegion uploading the 1MB
font atlas. Bisecting that crash exposed several sx-language issues
where mid-bisect tracers (NSLog inside if/else branch bodies) didn't
produce output, blocking further investigation.
Filing each finding as examples/issue-NNNN.sx rather than working
around piecemeal:
Bugs:
- 0024 NSLog/foreign-call inside if/else body not producing output
- 0025 C-ABI param coercion incomplete for composites >16B
(combined direct-call abiCoerceParamType TODO + call_indirect
path that doesn't apply C-ABI coercion at all)
- 0026 replaceRegion 1MB upload crash (likely downstream of 0025)
Features needed for step 4 + cleanup:
- 0027 Obj-C block bridge (^{...}) for animateWithDuration:
- 0028 Optional protocol box (?GPU = null) replaces T = ---; has_T: bool
- 0029 destroy_texture/buffer/shader on GPU protocol
- 0030 extern cross-file globals
Library-side: renderer.sx + glyph_cache.sx + pipeline.sx gain a
`gpu: GPU = ---; has_gpu: bool` field pair + branches that route every
GL touchpoint through the protocol when has_gpu. glyph_cache.init
saves/restores those fields around its memset. pipeline.set_gpu()
propagates to renderer + font. Renderer's MSL shader source added as
UI_MSL_SRC using packed_float2/packed_float4 to keep the 12-float
interleaved vertex layout tight (48 bytes).
metal.sx: dual-phase init (init(null, 0, 0) for eager device+queue,
re-init with the layer once UIKit installs the SxMetalView).
setStorageMode:.shared on every texture descriptor to ensure CPU-
writable atlas pixels on Apple Silicon iOS-sim.
Regression suite: 68 passing, 0 failed. WASM chess build currently
broken under step 3b state (silent compiler crash); documented in
CHECKPOINT.md, likely fallout from one of the filed issues (probably
0028 — the verbose protocol-box pattern). Step 3b resumes after
0024-0030 land.
This commit is contained in:
@@ -28,6 +28,12 @@ MTL_PIXEL_FORMAT_R8_UNORM :u64: 10;
|
||||
MTL_LOAD_ACTION_CLEAR :u64: 2;
|
||||
MTL_STORE_ACTION_STORE :u64: 1;
|
||||
|
||||
// MTLStorageMode. For UI atlases + sprites the CPU needs to write pixels
|
||||
// and the GPU needs to sample — `.shared` is the safe default. On iOS-sim
|
||||
// under Apple Silicon the convenience class method's default storage
|
||||
// isn't reliably shared, so we set it explicitly in metal_create_texture_ios.
|
||||
MTL_STORAGE_MODE_SHARED :u64: 0;
|
||||
|
||||
// MTLPrimitiveType.
|
||||
MTL_PRIMITIVE_TYPE_TRIANGLE :u64: 3;
|
||||
|
||||
@@ -84,11 +90,18 @@ MetalGPU :: struct {
|
||||
}
|
||||
|
||||
impl GPU for MetalGPU {
|
||||
// Two-phase init: callers can `init(null, 0, 0)` first to allocate
|
||||
// device + queue eagerly (lets the UI pipeline compile shaders before
|
||||
// UIKit hands us a layer), then re-call `init(layer, w, h)` once the
|
||||
// CAMetalLayer is available. The second call only updates the layer
|
||||
// ref + dims; device/queue are preserved.
|
||||
init :: (self: *MetalGPU, target: *void, pixel_w: s32, pixel_h: s32) -> bool {
|
||||
inline if OS != .ios { return false; }
|
||||
self.layer = target;
|
||||
self.pixel_w = pixel_w;
|
||||
self.pixel_h = pixel_h;
|
||||
if target != null {
|
||||
self.layer = target;
|
||||
self.pixel_w = pixel_w;
|
||||
self.pixel_h = pixel_h;
|
||||
}
|
||||
metal_init_ios(self);
|
||||
}
|
||||
|
||||
@@ -200,12 +213,19 @@ impl GPU for MetalGPU {
|
||||
// so non-iOS builds never reference the unresolved Metal symbols below.
|
||||
// ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// init() may be called twice: once with target==null to create device +
|
||||
// queue eagerly (so the UI pipeline can compile shaders before UIKit
|
||||
// has a layer for us), then again with target=CAMetalLayer once
|
||||
// `-[SxAppDelegate didFinishLaunching:]` has installed the view.
|
||||
// Both calls go through this helper; it's idempotent on the device/queue
|
||||
// and only touches the layer when one's been supplied.
|
||||
metal_init_ios :: (self: *MetalGPU) -> bool {
|
||||
inline if OS != .ios { return false; }
|
||||
if self.layer == null { return false; }
|
||||
|
||||
self.device = MTLCreateSystemDefaultDevice();
|
||||
if self.device == null { return false; }
|
||||
if self.device == null {
|
||||
self.device = MTLCreateSystemDefaultDevice();
|
||||
if self.device == null { return false; }
|
||||
}
|
||||
|
||||
msg_oo : (*void, *void, *void) -> void = xx objc_msgSend;
|
||||
msg_ou : (*void, *void, u64) -> void = xx objc_msgSend;
|
||||
@@ -213,15 +233,19 @@ metal_init_ios :: (self: *MetalGPU) -> bool {
|
||||
msg_osize : (*void, *void, CGSize) -> void = xx objc_msgSend;
|
||||
msg_o : (*void, *void) -> *void = xx objc_msgSend;
|
||||
|
||||
msg_oo(self.layer, sel_registerName("setDevice:".ptr), self.device);
|
||||
msg_ou(self.layer, sel_registerName("setPixelFormat:".ptr), MTL_PIXEL_FORMAT_BGRA8_UNORM);
|
||||
msg_ob(self.layer, sel_registerName("setFramebufferOnly:".ptr), 1);
|
||||
if self.queue == null {
|
||||
self.queue = msg_o(self.device, sel_registerName("newCommandQueue".ptr));
|
||||
if self.queue == null { return false; }
|
||||
}
|
||||
|
||||
size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h };
|
||||
msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size);
|
||||
if self.layer != null {
|
||||
msg_oo(self.layer, sel_registerName("setDevice:".ptr), self.device);
|
||||
msg_ou(self.layer, sel_registerName("setPixelFormat:".ptr), MTL_PIXEL_FORMAT_BGRA8_UNORM);
|
||||
msg_ob(self.layer, sel_registerName("setFramebufferOnly:".ptr), 1);
|
||||
|
||||
self.queue = msg_o(self.device, sel_registerName("newCommandQueue".ptr));
|
||||
if self.queue == null { return false; }
|
||||
size := CGSize.{ width = xx self.pixel_w, height = xx self.pixel_h };
|
||||
msg_osize(self.layer, sel_registerName("setDrawableSize:".ptr), size);
|
||||
}
|
||||
|
||||
true;
|
||||
}
|
||||
@@ -457,6 +481,12 @@ metal_create_texture_ios :: (self: *MetalGPU, w: s32, h: s32, format: TextureFor
|
||||
pixel_format, xx w, xx h, 0);
|
||||
if desc == null { return 0; }
|
||||
|
||||
// Force shared storage so the CPU can keep writing pixels (atlas updates,
|
||||
// sprite uploads). On iOS-sim under Apple Silicon the convenience class
|
||||
// method's default storage isn't reliably shared for every format.
|
||||
msg_ou_void : (*void, *void, u64) -> void = xx objc_msgSend;
|
||||
msg_ou_void(desc, sel_registerName("setStorageMode:".ptr), MTL_STORAGE_MODE_SHARED);
|
||||
|
||||
msg_oo : (*void, *void, *void) -> *void = xx objc_msgSend;
|
||||
tex := msg_oo(self.device, sel_registerName("newTextureWithDescriptor:".ptr), desc);
|
||||
if tex == null { return 0; }
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#import "modules/std.sx";
|
||||
#import "modules/opengl.sx";
|
||||
#import "modules/gpu/types.sx";
|
||||
#import "modules/gpu/api.sx";
|
||||
#import "modules/stb_truetype.sx";
|
||||
#import "modules/ui/types.sx";
|
||||
|
||||
@@ -176,9 +178,20 @@ GlyphCache :: struct {
|
||||
last_shape_len: s64;
|
||||
last_shape_size_q: u16;
|
||||
|
||||
// GPU protocol backend. When `has_gpu`, atlas creation + dirty uploads
|
||||
// route through `gpu` instead of raw GL.
|
||||
gpu: GPU = ---;
|
||||
has_gpu: bool = false;
|
||||
|
||||
init :: (self: *GlyphCache, path: [:0]u8, default_size: f32) {
|
||||
// Preserve any pre-set GPU dispatch across the zero-out — the
|
||||
// surrounding struct memset would otherwise wipe it.
|
||||
saved_gpu := self.gpu;
|
||||
saved_has_gpu := self.has_gpu;
|
||||
// Zero out the entire struct first (parent may be uninitialized with = ---)
|
||||
memset(self, 0, size_of(GlyphCache));
|
||||
self.gpu = saved_gpu;
|
||||
self.has_gpu = saved_has_gpu;
|
||||
|
||||
// Load font file
|
||||
file_size : s32 = 0;
|
||||
@@ -245,15 +258,25 @@ GlyphCache :: struct {
|
||||
val_bytes : s64 = self.hash_cap * 8; // s64 per slot (s32 would suffice but alignment)
|
||||
self.hash_vals = xx context.allocator.alloc(val_bytes);
|
||||
|
||||
// Create OpenGL texture
|
||||
glGenTextures(1, @self.texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, self.atlas_width, self.atlas_height, 0, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
|
||||
// Create the atlas texture. In GPU-protocol mode we create empty and
|
||||
// let the first `flush()` push the (zero-initialized) bitmap via
|
||||
// update_texture_region — same result as the GL path's glTexImage2D
|
||||
// with the zeroed bitmap, but works whether or not the backend
|
||||
// accepts CPU pixel pointers at create time.
|
||||
if self.has_gpu {
|
||||
self.texture_id = self.gpu.create_texture(
|
||||
self.atlas_width, self.atlas_height, .r8, null);
|
||||
self.dirty = true;
|
||||
} else {
|
||||
glGenTextures(1, @self.texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, self.atlas_width, self.atlas_height, 0, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
|
||||
}
|
||||
|
||||
out("GlyphCache initialized: ");
|
||||
out(path);
|
||||
@@ -406,9 +429,14 @@ GlyphCache :: struct {
|
||||
// Upload dirty atlas to GPU
|
||||
flush :: (self: *GlyphCache) {
|
||||
if self.dirty == false { return; }
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, self.atlas_width, self.atlas_height, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
|
||||
if self.has_gpu {
|
||||
self.gpu.update_texture_region(self.texture_id, 0, 0,
|
||||
self.atlas_width, self.atlas_height, xx self.bitmap);
|
||||
} else {
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, self.atlas_width, self.atlas_height, GL_RED, GL_UNSIGNED_BYTE, self.bitmap);
|
||||
}
|
||||
self.dirty = false;
|
||||
}
|
||||
|
||||
@@ -464,16 +492,23 @@ GlyphCache :: struct {
|
||||
self.atlas_width = new_w;
|
||||
self.atlas_height = new_h;
|
||||
|
||||
// Recreate GL texture
|
||||
glDeleteTextures(1, @self.texture_id);
|
||||
glGenTextures(1, @self.texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, new_w, new_h, 0, GL_RED, GL_UNSIGNED_BYTE, new_bitmap);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
|
||||
// Recreate atlas at the new size.
|
||||
if self.has_gpu {
|
||||
// No destroy_texture in the GPU protocol yet — old atlas
|
||||
// leaks in the backend table until process exit. Atlas grow
|
||||
// is rare so this is acceptable for now.
|
||||
self.texture_id = self.gpu.create_texture(new_w, new_h, .r8, xx new_bitmap);
|
||||
} else {
|
||||
glDeleteTextures(1, @self.texture_id);
|
||||
glGenTextures(1, @self.texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, self.texture_id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, xx GL_R8, new_w, new_h, 0, GL_RED, GL_UNSIGNED_BYTE, new_bitmap);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, xx GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, xx GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, xx GL_CLAMP_TO_EDGE);
|
||||
}
|
||||
|
||||
// Recompute UV coordinates for all cached glyphs
|
||||
atlas_wf : f32 = xx new_w;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#import "modules/std.sx";
|
||||
#import "modules/allocators.sx";
|
||||
#import "modules/opengl.sx";
|
||||
#import "modules/gpu/api.sx";
|
||||
#import "modules/ui/types.sx";
|
||||
#import "modules/ui/render.sx";
|
||||
#import "modules/ui/events.sx";
|
||||
@@ -24,6 +25,23 @@ UIPipeline :: struct {
|
||||
has_body: bool;
|
||||
parent_allocator: Allocator;
|
||||
|
||||
// GPU protocol backend. When `has_gpu`, the pipeline propagates this
|
||||
// to its renderer + font, and skips the per-frame GL state setup in
|
||||
// commit_gpu (Metal bakes blend mode into the pipeline state).
|
||||
gpu: GPU = ---;
|
||||
has_gpu: bool = false;
|
||||
|
||||
// Set the GPU dispatch BEFORE calling init() / init_font() so the
|
||||
// shaders + atlas land on the right backend.
|
||||
set_gpu :: (self: *UIPipeline, gpu: GPU) {
|
||||
self.gpu = gpu;
|
||||
self.has_gpu = true;
|
||||
self.renderer.gpu = gpu;
|
||||
self.renderer.has_gpu = true;
|
||||
self.font.gpu = gpu;
|
||||
self.font.has_gpu = true;
|
||||
}
|
||||
|
||||
init :: (self: *UIPipeline, width: f32, height: f32) {
|
||||
self.render_tree = RenderTree.init();
|
||||
self.renderer.init();
|
||||
@@ -149,14 +167,18 @@ UIPipeline :: struct {
|
||||
}
|
||||
|
||||
commit_gpu :: (self: *UIPipeline) {
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
if !self.has_gpu {
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
}
|
||||
|
||||
self.renderer.begin(self.screen_width, self.screen_height, self.font.texture_id);
|
||||
self.renderer.process(@self.render_tree);
|
||||
self.renderer.flush();
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
if !self.has_gpu {
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
#import "modules/compiler.sx";
|
||||
#import "modules/opengl.sx";
|
||||
#import "modules/math";
|
||||
#import "modules/gpu/types.sx";
|
||||
#import "modules/gpu/api.sx";
|
||||
#import "modules/ui/types.sx";
|
||||
#import "modules/ui/render.sx";
|
||||
#import "modules/ui/glyph_cache.sx";
|
||||
@@ -13,62 +15,81 @@ UI_VERTEX_BYTES :s64: 48;
|
||||
MAX_UI_VERTICES :s64: 16384;
|
||||
|
||||
UIRenderer :: struct {
|
||||
// GL-side handles. Used when `gpu == null` (every non-iOS target today).
|
||||
vao: u32;
|
||||
vbo: u32;
|
||||
shader: u32;
|
||||
proj_loc: s32;
|
||||
tex_loc: s32;
|
||||
|
||||
// CPU-side vertex scratch buffer — same for both backends.
|
||||
vertices: [*]f32;
|
||||
vertex_count: s64;
|
||||
screen_width: f32;
|
||||
screen_height: f32;
|
||||
dpi_scale: f32;
|
||||
white_texture: u32;
|
||||
white_texture: u32; // GL name OR TextureHandle (both are u32-shaped)
|
||||
current_texture: u32;
|
||||
draw_calls: s64;
|
||||
|
||||
init :: (self: *UIRenderer) {
|
||||
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
|
||||
inline if OS == .wasm or OS == .ios {
|
||||
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
|
||||
} else {
|
||||
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
|
||||
}
|
||||
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
|
||||
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
|
||||
// GPU protocol backend. When `has_gpu`, the renderer routes shader /
|
||||
// buffer / texture / draw calls through `gpu` instead of raw GL. The
|
||||
// chess game sets this on iOS to a boxed `*MetalGPU`.
|
||||
gpu: GPU = ---;
|
||||
has_gpu: bool = false;
|
||||
mtl_shader: ShaderHandle = 0;
|
||||
mtl_vbuf: BufferHandle = 0;
|
||||
|
||||
// Allocate vertex buffer (CPU side)
|
||||
init :: (self: *UIRenderer) {
|
||||
// Allocate vertex scratch (CPU side) — same for both backends.
|
||||
buf_size := MAX_UI_VERTICES * UI_VERTEX_BYTES;
|
||||
self.vertices = xx context.allocator.alloc(buf_size);
|
||||
memset(self.vertices, 0, buf_size);
|
||||
self.vertex_count = 0;
|
||||
|
||||
// Create VAO/VBO
|
||||
glGenVertexArrays(1, @self.vao);
|
||||
glGenBuffers(1, @self.vbo);
|
||||
glBindVertexArray(self.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
|
||||
|
||||
// pos (2 floats)
|
||||
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
|
||||
glEnableVertexAttribArray(0);
|
||||
// uv (2 floats)
|
||||
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
|
||||
glEnableVertexAttribArray(1);
|
||||
// color (4 floats)
|
||||
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
|
||||
glEnableVertexAttribArray(2);
|
||||
// params: corner_radius, border_width, rect_w, rect_h
|
||||
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
|
||||
glEnableVertexAttribArray(3);
|
||||
|
||||
glBindVertexArray(0);
|
||||
|
||||
self.dpi_scale = 1.0;
|
||||
|
||||
// 1x1 white texture for solid rects
|
||||
self.white_texture = create_white_texture();
|
||||
if self.has_gpu {
|
||||
// ── Metal backend (via GPU protocol) ───────────────────────
|
||||
self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, "");
|
||||
self.mtl_vbuf = self.gpu.create_buffer(buf_size);
|
||||
white_px : [4]u8 = .[255, 255, 255, 255];
|
||||
self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]);
|
||||
} else {
|
||||
// ── GL backend ─────────────────────────────────────────────
|
||||
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
|
||||
inline if OS == .wasm or OS == .ios {
|
||||
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
|
||||
} else {
|
||||
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
|
||||
}
|
||||
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
|
||||
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
|
||||
|
||||
// Create VAO/VBO
|
||||
glGenVertexArrays(1, @self.vao);
|
||||
glGenBuffers(1, @self.vbo);
|
||||
glBindVertexArray(self.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
|
||||
|
||||
// pos (2 floats)
|
||||
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
|
||||
glEnableVertexAttribArray(0);
|
||||
// uv (2 floats)
|
||||
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
|
||||
glEnableVertexAttribArray(1);
|
||||
// color (4 floats)
|
||||
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
|
||||
glEnableVertexAttribArray(2);
|
||||
// params: corner_radius, border_width, rect_w, rect_h
|
||||
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
|
||||
glEnableVertexAttribArray(3);
|
||||
|
||||
glBindVertexArray(0);
|
||||
|
||||
// 1x1 white texture for solid rects
|
||||
self.white_texture = create_white_texture();
|
||||
}
|
||||
}
|
||||
|
||||
begin :: (self: *UIRenderer, width: f32, height: f32, font_texture: u32) {
|
||||
@@ -78,15 +99,26 @@ UIRenderer :: struct {
|
||||
self.current_texture = font_texture;
|
||||
self.draw_calls = 0;
|
||||
|
||||
// Set up GL state once for the entire frame
|
||||
glUseProgram(self.shader);
|
||||
proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0);
|
||||
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
|
||||
glUniform1i(self.tex_loc, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, font_texture);
|
||||
glBindVertexArray(self.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
|
||||
|
||||
if self.has_gpu {
|
||||
// Pipeline state + vertex buffer + projection + initial texture.
|
||||
// Metal blend mode + scissor-cleared defaults are baked into
|
||||
// the pipeline state, so no per-frame glEnable/glDisable.
|
||||
self.gpu.set_shader(self.mtl_shader);
|
||||
self.gpu.set_vertex_buffer(self.mtl_vbuf);
|
||||
self.gpu.set_vertex_constants(1, xx proj.data, 64);
|
||||
self.gpu.set_texture(0, font_texture);
|
||||
} else {
|
||||
// GL: bind everything for the frame.
|
||||
glUseProgram(self.shader);
|
||||
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
|
||||
glUniform1i(self.tex_loc, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, font_texture);
|
||||
glBindVertexArray(self.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
|
||||
}
|
||||
}
|
||||
|
||||
bind_texture :: (self: *UIRenderer, tex: u32) {
|
||||
@@ -202,18 +234,33 @@ UIRenderer :: struct {
|
||||
}
|
||||
case .clip_push: {
|
||||
self.flush();
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
dpi := self.dpi_scale;
|
||||
glScissor(
|
||||
xx (node.frame.origin.x * dpi),
|
||||
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
|
||||
xx (node.frame.size.width * dpi),
|
||||
xx (node.frame.size.height * dpi)
|
||||
);
|
||||
if self.has_gpu {
|
||||
// Metal: pixel coords, top-left origin (no Y flip).
|
||||
self.gpu.set_scissor(
|
||||
xx (node.frame.origin.x * dpi),
|
||||
xx (node.frame.origin.y * dpi),
|
||||
xx (node.frame.size.width * dpi),
|
||||
xx (node.frame.size.height * dpi),
|
||||
);
|
||||
} else {
|
||||
// GL: pixel coords, bottom-left origin — flip Y.
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(
|
||||
xx (node.frame.origin.x * dpi),
|
||||
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
|
||||
xx (node.frame.size.width * dpi),
|
||||
xx (node.frame.size.height * dpi)
|
||||
);
|
||||
}
|
||||
}
|
||||
case .clip_pop: {
|
||||
self.flush();
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
if self.has_gpu {
|
||||
self.gpu.disable_scissor();
|
||||
} else {
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
}
|
||||
}
|
||||
case .opacity_push: {}
|
||||
case .opacity_pop: {}
|
||||
@@ -225,13 +272,22 @@ UIRenderer :: struct {
|
||||
flush :: (self: *UIRenderer) {
|
||||
if self.vertex_count == 0 { return; }
|
||||
|
||||
// Only bind the current texture (program, projection, VAO already bound in begin())
|
||||
glBindTexture(GL_TEXTURE_2D, self.current_texture);
|
||||
|
||||
upload_size : s64 = self.vertex_count * UI_VERTEX_BYTES;
|
||||
// Use glBufferData to orphan the old buffer and avoid GPU sync stalls
|
||||
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
|
||||
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
|
||||
|
||||
if self.has_gpu {
|
||||
// Mirror the GL path: bind current texture before drawing.
|
||||
// current_texture may have changed since the last flush.
|
||||
self.gpu.set_texture(0, self.current_texture);
|
||||
self.gpu.update_buffer(self.mtl_vbuf, xx self.vertices, upload_size);
|
||||
self.gpu.draw_triangles(0, xx self.vertex_count);
|
||||
} else {
|
||||
// Only re-bind the current texture (program, projection, VAO
|
||||
// already bound in begin()). glBufferData orphans the old buffer
|
||||
// to avoid GPU sync stalls.
|
||||
glBindTexture(GL_TEXTURE_2D, self.current_texture);
|
||||
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
|
||||
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
|
||||
}
|
||||
|
||||
self.vertex_count = 0;
|
||||
self.draw_calls += 1;
|
||||
@@ -458,3 +514,87 @@ void main() {
|
||||
}
|
||||
}
|
||||
GLSL;
|
||||
|
||||
// --- Metal (MSL) — single library with vmain/fmain entry points ---
|
||||
//
|
||||
// `packed_float2 / packed_float4` keep the 12-float interleaved vertex
|
||||
// layout (pos2 / uv2 / color4 / params4 = 48 bytes) without padding —
|
||||
// MSL's default `float4` has 16-byte alignment and would force a 64-byte
|
||||
// struct (see examples/63-metal-clear.sx for the gotcha).
|
||||
//
|
||||
// Uniform passing: GL uses `glUniformMatrix4fv("uProj", proj)`; Metal
|
||||
// receives the projection via `setVertexBytes:length:atIndex:1` (slot 0
|
||||
// is the vertex buffer). Texture binding goes through
|
||||
// `setFragmentTexture:atIndex:0`.
|
||||
|
||||
UI_MSL_SRC :: #string MSL
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct UIVertex {
|
||||
packed_float2 pos;
|
||||
packed_float2 uv;
|
||||
packed_float4 color;
|
||||
packed_float4 params;
|
||||
};
|
||||
|
||||
struct VOut {
|
||||
float4 position [[position]];
|
||||
float2 uv;
|
||||
float4 color;
|
||||
float4 params;
|
||||
};
|
||||
|
||||
vertex VOut vmain(uint vid [[vertex_id]],
|
||||
constant UIVertex* verts [[buffer(0)]],
|
||||
constant float4x4& proj [[buffer(1)]]) {
|
||||
UIVertex v = verts[vid];
|
||||
VOut o;
|
||||
o.position = proj * float4(v.pos, 0.0, 1.0);
|
||||
o.uv = float2(v.uv);
|
||||
o.color = float4(v.color);
|
||||
o.params = float4(v.params);
|
||||
return o;
|
||||
}
|
||||
|
||||
static float roundedBoxSDF(float2 center, float2 half_size, float radius) {
|
||||
float2 q = abs(center) - half_size + float2(radius);
|
||||
return length(max(q, float2(0.0))) + min(max(q.x, q.y), 0.0) - radius;
|
||||
}
|
||||
|
||||
fragment float4 fmain(VOut in [[stage_in]],
|
||||
texture2d<float> tex [[texture(0)]]) {
|
||||
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
|
||||
|
||||
float mode = in.params.x;
|
||||
float border = in.params.y;
|
||||
float2 rectSize = in.params.zw;
|
||||
|
||||
if (mode < -1.5) {
|
||||
// Image mode (mode == -2.0): sample texture
|
||||
return tex.sample(s, in.uv) * in.color;
|
||||
} else if (mode < 0.0) {
|
||||
// Text mode (mode == -1.0): sample glyph atlas .r as alpha
|
||||
float alpha = tex.sample(s, in.uv).r;
|
||||
float ew = fwidth(alpha) * 0.7;
|
||||
alpha = smoothstep(0.5 - ew, 0.5 + ew, alpha);
|
||||
return float4(in.color.rgb, in.color.a * pow(alpha, 0.9));
|
||||
} else if (mode > 0.0 || border > 0.0) {
|
||||
// Rounded rect: SDF alpha, vertex color only
|
||||
float2 half_size = rectSize * 0.5;
|
||||
float2 center = (in.uv - float2(0.5)) * rectSize;
|
||||
float dist = roundedBoxSDF(center, half_size, mode);
|
||||
float aa = fwidth(dist);
|
||||
float alpha = 1.0 - smoothstep(-aa, aa, dist);
|
||||
if (border > 0.0) {
|
||||
float inner = roundedBoxSDF(center, half_size - float2(border), max(mode - border, 0.0));
|
||||
float border_alpha = smoothstep(-aa, aa, inner);
|
||||
alpha = alpha * max(border_alpha, 0.0);
|
||||
}
|
||||
return float4(in.color.rgb, in.color.a * alpha);
|
||||
} else {
|
||||
// Plain rect: vertex color only
|
||||
return in.color;
|
||||
}
|
||||
}
|
||||
MSL;
|
||||
|
||||
Reference in New Issue
Block a user