ui: per-flush byte-offset on Metal vertex buffer fixes chess board
UIRenderer.flush wrote to mtl_vbuf at byte offset 0 on every flush. Metal records draw commands but reads the buffer at GPU execution time, so a frame with multiple flushes ended up rendering whatever the LAST writer left in the buffer for every draw. Chess UI hit this hard: each of the 32 pieces in the initial position triggers two bind_texture flushes (atlas -> pieces -> atlas), so ~64 mid-frame flushes silently rendered the final info-panel batch over the board and the sprites. New GPU protocol method update_buffer_at(buf, data, size, byte_offset); Metal impl writes at offset via [*]u8 arithmetic on [buf contents]. UIRenderer tracks mtl_buf_offset (reset in begin, advanced per flush, aligned to 16B, wraps on overflow) and draws each batch with vertex_off = byte_off / UI_VERTEX_BYTES. Metal buffer over-allocated 4x the per-flush max (~3 MB) for headroom. GL path untouched — glBufferData already orphans the storage. 71/71 regression tests pass. Metal-clear example, macOS GL chess, and WASM chess all still build.
This commit is contained in:
@@ -28,6 +28,13 @@ GPU :: protocol {
|
|||||||
create_shader :: (vsrc: string, fsrc: string) -> ShaderHandle;
|
create_shader :: (vsrc: string, fsrc: string) -> ShaderHandle;
|
||||||
create_buffer :: (size_bytes: s64) -> BufferHandle;
|
create_buffer :: (size_bytes: s64) -> BufferHandle;
|
||||||
update_buffer :: (buf: BufferHandle, data: *void, size_bytes: s64);
|
update_buffer :: (buf: BufferHandle, data: *void, size_bytes: s64);
|
||||||
|
// Sub-buffer write at a byte offset. Required for Metal where re-using
|
||||||
|
// the same buffer slice across multiple draws in a single command
|
||||||
|
// encoder is a race: the GPU executes draws asynchronously and reads
|
||||||
|
// shared-storage buffer contents at execution time, so the LAST writer
|
||||||
|
// wins if every flush targets offset 0. Renderers that issue more than
|
||||||
|
// one draw per frame must advance their write offset between flushes.
|
||||||
|
update_buffer_at :: (buf: BufferHandle, data: *void, size_bytes: s64, byte_offset: s64);
|
||||||
create_texture :: (w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle;
|
create_texture :: (w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle;
|
||||||
update_texture_region :: (tex: TextureHandle, x: s32, y: s32, w: s32, h: s32, pixels: *void);
|
update_texture_region :: (tex: TextureHandle, x: s32, y: s32, w: s32, h: s32, pixels: *void);
|
||||||
|
|
||||||
|
|||||||
@@ -149,6 +149,12 @@ impl GPU for MetalGPU {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_buffer_at :: (self: *MetalGPU, buf: BufferHandle, data: *void, size_bytes: s64, byte_offset: s64) {
|
||||||
|
inline if OS == .ios {
|
||||||
|
metal_update_buffer_at_ios(self, buf, data, size_bytes, byte_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
create_texture :: (self: *MetalGPU, w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle {
|
create_texture :: (self: *MetalGPU, w: s32, h: s32, format: TextureFormat, pixels: *void) -> TextureHandle {
|
||||||
inline if OS != .ios { return 0; }
|
inline if OS != .ios { return 0; }
|
||||||
metal_create_texture_ios(self, w, h, format, pixels);
|
metal_create_texture_ios(self, w, h, format, pixels);
|
||||||
@@ -445,6 +451,21 @@ metal_update_buffer_ios :: (self: *MetalGPU, handle: u32, data: *void, size_byte
|
|||||||
memcpy(dst, data, size_bytes);
|
memcpy(dst, data, size_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metal_update_buffer_at_ios :: (self: *MetalGPU, handle: u32, data: *void, size_bytes: s64, byte_offset: s64) {
|
||||||
|
inline if OS != .ios { return; }
|
||||||
|
buf := metal_lookup_buffer(self, handle);
|
||||||
|
if buf == null { return; }
|
||||||
|
if data == null { return; }
|
||||||
|
if size_bytes <= 0 { return; }
|
||||||
|
if byte_offset < 0 { return; }
|
||||||
|
|
||||||
|
msg_o : (*void, *void) -> *void = xx objc_msgSend;
|
||||||
|
base := msg_o(buf, sel_registerName("contents".ptr));
|
||||||
|
if base == null { return; }
|
||||||
|
dst : [*]u8 = xx base;
|
||||||
|
memcpy(xx @dst[byte_offset], data, size_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
metal_lookup_buffer :: (self: *MetalGPU, handle: u32) -> *void {
|
metal_lookup_buffer :: (self: *MetalGPU, handle: u32) -> *void {
|
||||||
inline if OS != .ios { return null; }
|
inline if OS != .ios { return null; }
|
||||||
if handle == 0 { return null; }
|
if handle == 0 { return null; }
|
||||||
|
|||||||
@@ -39,6 +39,14 @@ UIRenderer :: struct {
|
|||||||
has_gpu: bool = false;
|
has_gpu: bool = false;
|
||||||
mtl_shader: ShaderHandle = 0;
|
mtl_shader: ShaderHandle = 0;
|
||||||
mtl_vbuf: BufferHandle = 0;
|
mtl_vbuf: BufferHandle = 0;
|
||||||
|
// Per-frame byte offset into the Metal vertex buffer. Each flush writes
|
||||||
|
// to a fresh slice so concurrent in-flight draws don't trample each
|
||||||
|
// other's data — Metal's shared-storage buffer is read at GPU execution
|
||||||
|
// time, not at draw-call submission, so re-using offset 0 across flushes
|
||||||
|
// would let the last writer win and earlier batches would render as
|
||||||
|
// whatever was uploaded last. Reset to 0 in `begin()`.
|
||||||
|
mtl_buf_offset: s64 = 0;
|
||||||
|
mtl_buf_capacity: s64 = 0;
|
||||||
|
|
||||||
init :: (self: *UIRenderer) {
|
init :: (self: *UIRenderer) {
|
||||||
// Allocate vertex scratch (CPU side) — same for both backends.
|
// Allocate vertex scratch (CPU side) — same for both backends.
|
||||||
@@ -50,8 +58,14 @@ UIRenderer :: struct {
|
|||||||
|
|
||||||
if self.has_gpu {
|
if self.has_gpu {
|
||||||
// ── Metal backend (via GPU protocol) ───────────────────────
|
// ── Metal backend (via GPU protocol) ───────────────────────
|
||||||
|
// Oversize the GPU buffer enough to hold many sub-batches per
|
||||||
|
// frame without wrapping. With per-flush offset advance, each
|
||||||
|
// draw reads from its own slice and can outlive earlier in-
|
||||||
|
// flight draws without corruption.
|
||||||
|
metal_buf_size := buf_size * 4;
|
||||||
self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, "");
|
self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, "");
|
||||||
self.mtl_vbuf = self.gpu.create_buffer(buf_size);
|
self.mtl_vbuf = self.gpu.create_buffer(metal_buf_size);
|
||||||
|
self.mtl_buf_capacity = metal_buf_size;
|
||||||
white_px : [4]u8 = .[255, 255, 255, 255];
|
white_px : [4]u8 = .[255, 255, 255, 255];
|
||||||
self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]);
|
self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]);
|
||||||
} else {
|
} else {
|
||||||
@@ -102,6 +116,8 @@ UIRenderer :: struct {
|
|||||||
proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0);
|
proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0);
|
||||||
|
|
||||||
if self.has_gpu {
|
if self.has_gpu {
|
||||||
|
// Reset the per-frame ring offset; this frame's flushes start at 0.
|
||||||
|
self.mtl_buf_offset = 0;
|
||||||
// Pipeline state + vertex buffer + projection + initial texture.
|
// Pipeline state + vertex buffer + projection + initial texture.
|
||||||
// Metal blend mode + scissor-cleared defaults are baked into
|
// Metal blend mode + scissor-cleared defaults are baked into
|
||||||
// the pipeline state, so no per-frame glEnable/glDisable.
|
// the pipeline state, so no per-frame glEnable/glDisable.
|
||||||
@@ -278,8 +294,27 @@ UIRenderer :: struct {
|
|||||||
// Mirror the GL path: bind current texture before drawing.
|
// Mirror the GL path: bind current texture before drawing.
|
||||||
// current_texture may have changed since the last flush.
|
// current_texture may have changed since the last flush.
|
||||||
self.gpu.set_texture(0, self.current_texture);
|
self.gpu.set_texture(0, self.current_texture);
|
||||||
self.gpu.update_buffer(self.mtl_vbuf, xx self.vertices, upload_size);
|
|
||||||
self.gpu.draw_triangles(0, xx self.vertex_count);
|
// Write this batch to a fresh slice of the GPU buffer and draw
|
||||||
|
// it from there. Re-using offset 0 would race against earlier
|
||||||
|
// still-in-flight draws (see `mtl_buf_offset` comment in the
|
||||||
|
// struct).
|
||||||
|
if self.mtl_buf_offset + upload_size > self.mtl_buf_capacity {
|
||||||
|
// Frame overflowed the GPU buffer; wrap to 0. Previous in-
|
||||||
|
// flight batches from this frame will likely render wrong,
|
||||||
|
// but the alternative (skipping the draw) would render
|
||||||
|
// even less. Practical UIs should never hit this.
|
||||||
|
self.mtl_buf_offset = 0;
|
||||||
|
}
|
||||||
|
byte_off := self.mtl_buf_offset;
|
||||||
|
self.gpu.update_buffer_at(self.mtl_vbuf, xx self.vertices, upload_size, byte_off);
|
||||||
|
vertex_off : s32 = xx (byte_off / UI_VERTEX_BYTES);
|
||||||
|
self.gpu.draw_triangles(vertex_off, xx self.vertex_count);
|
||||||
|
self.mtl_buf_offset = byte_off + upload_size;
|
||||||
|
// Align next slice to 16B for safety with packed_float4 reads.
|
||||||
|
align : s64 = 16;
|
||||||
|
rem := self.mtl_buf_offset % align;
|
||||||
|
if rem != 0 { self.mtl_buf_offset = self.mtl_buf_offset + (align - rem); }
|
||||||
} else {
|
} else {
|
||||||
// Only re-bind the current texture (program, projection, VAO
|
// Only re-bind the current texture (program, projection, VAO
|
||||||
// already bound in begin()). glBufferData orphans the old buffer
|
// already bound in begin()). glBufferData orphans the old buffer
|
||||||
|
|||||||
Reference in New Issue
Block a user