diff --git a/library/modules/gpu/api.sx b/library/modules/gpu/api.sx index ce7d82e..c25bb17 100644 --- a/library/modules/gpu/api.sx +++ b/library/modules/gpu/api.sx @@ -28,6 +28,13 @@ GPU :: protocol { create_shader :: (vsrc: string, fsrc: string) -> ShaderHandle; create_buffer :: (size_bytes: s64) -> BufferHandle; update_buffer :: (buf: BufferHandle, data: *void, size_bytes: s64); + // Sub-buffer write at a byte offset. Required for Metal where re-using + // the same buffer slice across multiple draws in a single command + // encoder is a race: the GPU executes draws asynchronously and reads + // shared-storage buffer contents at execution time, so the LAST writer + // wins if every flush targets offset 0. Renderers that issue more than + // one draw per frame must advance their write offset between flushes. + update_buffer_at :: (buf: BufferHandle, data: *void, size_bytes: s64, byte_offset: s64); create_texture :: (w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle; update_texture_region :: (tex: TextureHandle, x: s32, y: s32, w: s32, h: s32, pixels: *void); diff --git a/library/modules/gpu/metal.sx b/library/modules/gpu/metal.sx index cfe8b6d..407c32b 100644 --- a/library/modules/gpu/metal.sx +++ b/library/modules/gpu/metal.sx @@ -149,6 +149,12 @@ impl GPU for MetalGPU { } } + update_buffer_at :: (self: *MetalGPU, buf: BufferHandle, data: *void, size_bytes: s64, byte_offset: s64) { + inline if OS == .ios { + metal_update_buffer_at_ios(self, buf, data, size_bytes, byte_offset); + } + } + create_texture :: (self: *MetalGPU, w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle { inline if OS != .ios { return 0; } metal_create_texture_ios(self, w, h, format, pixels); @@ -445,6 +451,21 @@ metal_update_buffer_ios :: (self: *MetalGPU, handle: u32, data: *void, size_byte memcpy(dst, data, size_bytes); } +metal_update_buffer_at_ios :: (self: *MetalGPU, handle: u32, data: *void, size_bytes: s64, byte_offset: s64) { + inline if OS != .ios { return; } + buf := metal_lookup_buffer(self, handle); + if buf == null { return; } + if data == null { return; } + if size_bytes <= 0 { return; } + if byte_offset < 0 { return; } + + msg_o : (*void, *void) -> *void = xx objc_msgSend; + base := msg_o(buf, sel_registerName("contents".ptr)); + if base == null { return; } + dst : [*]u8 = xx base; + memcpy(xx @dst[byte_offset], data, size_bytes); +} + metal_lookup_buffer :: (self: *MetalGPU, handle: u32) -> *void { inline if OS != .ios { return null; } if handle == 0 { return null; } diff --git a/library/modules/ui/renderer.sx b/library/modules/ui/renderer.sx index 9c7646d..ce98b53 100755 --- a/library/modules/ui/renderer.sx +++ b/library/modules/ui/renderer.sx @@ -39,6 +39,14 @@ UIRenderer :: struct { has_gpu: bool = false; mtl_shader: ShaderHandle = 0; mtl_vbuf: BufferHandle = 0; + // Per-frame byte offset into the Metal vertex buffer. Each flush writes + // to a fresh slice so concurrent in-flight draws don't trample each + // other's data — Metal's shared-storage buffer is read at GPU execution + // time, not at draw-call submission, so re-using offset 0 across flushes + // would let the last writer win and earlier batches would render as + // whatever was uploaded last. Reset to 0 in `begin()`. + mtl_buf_offset: s64 = 0; + mtl_buf_capacity: s64 = 0; init :: (self: *UIRenderer) { // Allocate vertex scratch (CPU side) — same for both backends. @@ -50,8 +58,14 @@ UIRenderer :: struct { if self.has_gpu { // ── Metal backend (via GPU protocol) ─────────────────────── + // Oversize the GPU buffer enough to hold many sub-batches per + // frame without wrapping. With per-flush offset advance, each + // draw reads from its own slice and can outlive earlier in- + // flight draws without corruption. + metal_buf_size := buf_size * 4; self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, ""); - self.mtl_vbuf = self.gpu.create_buffer(buf_size); + self.mtl_vbuf = self.gpu.create_buffer(metal_buf_size); + self.mtl_buf_capacity = metal_buf_size; white_px : [4]u8 = .[255, 255, 255, 255]; self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]); } else { @@ -102,6 +116,8 @@ UIRenderer :: struct { proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0); if self.has_gpu { + // Reset the per-frame ring offset; this frame's flushes start at 0. + self.mtl_buf_offset = 0; // Pipeline state + vertex buffer + projection + initial texture. // Metal blend mode + scissor-cleared defaults are baked into // the pipeline state, so no per-frame glEnable/glDisable. @@ -278,8 +294,27 @@ UIRenderer :: struct { // Mirror the GL path: bind current texture before drawing. // current_texture may have changed since the last flush. self.gpu.set_texture(0, self.current_texture); - self.gpu.update_buffer(self.mtl_vbuf, xx self.vertices, upload_size); - self.gpu.draw_triangles(0, xx self.vertex_count); + + // Write this batch to a fresh slice of the GPU buffer and draw + // it from there. Re-using offset 0 would race against earlier + // still-in-flight draws (see `mtl_buf_offset` comment in the + // struct). + if self.mtl_buf_offset + upload_size > self.mtl_buf_capacity { + // Frame overflowed the GPU buffer; wrap to 0. Previous in- + // flight batches from this frame will likely render wrong, + // but the alternative (skipping the draw) would render + // even less. Practical UIs should never hit this. + self.mtl_buf_offset = 0; + } + byte_off := self.mtl_buf_offset; + self.gpu.update_buffer_at(self.mtl_vbuf, xx self.vertices, upload_size, byte_off); + vertex_off : s32 = xx (byte_off / UI_VERTEX_BYTES); + self.gpu.draw_triangles(vertex_off, xx self.vertex_count); + self.mtl_buf_offset = byte_off + upload_size; + // Align next slice to 16B for safety with packed_float4 reads. + align : s64 = 16; + rem := self.mtl_buf_offset % align; + if rem != 0 { self.mtl_buf_offset = self.mtl_buf_offset + (align - rem); } } else { // Only re-bind the current texture (program, projection, VAO // already bound in begin()). glBufferData orphans the old buffer