metal: pause step 3b pending sx-side fixes (filed 0024-0030)

Step 3b code is wired across UIRenderer + GlyphCache + UIPipeline +
chess game (gpu_mode = .metal on iOS, MetalGPU bound via the GPU
protocol). macOS GL chess, iOS-sim GLES chess, and iOS-sim Metal
triangle (63-metal-clear.sx) all still render.

iOS-sim Metal chess crashes inside replaceRegion uploading the 1MB
font atlas. Bisecting that crash exposed several sx-language issues
where mid-bisect tracers (NSLog inside if/else branch bodies) didn't
produce output, blocking further investigation.

Filing each finding as examples/issue-NNNN.sx rather than working
around piecemeal:

Bugs:
- 0024 NSLog/foreign-call inside if/else body not producing output
- 0025 C-ABI param coercion incomplete for composites >16B
       (combined direct-call abiCoerceParamType TODO + call_indirect
        path that doesn't apply C-ABI coercion at all)
- 0026 replaceRegion 1MB upload crash (likely downstream of 0025)

Features needed for step 4 + cleanup:
- 0027 Obj-C block bridge (^{...}) for animateWithDuration:
- 0028 Optional protocol box (?GPU = null) replaces T = ---; has_T: bool
- 0029 destroy_texture/buffer/shader on GPU protocol
- 0030 extern cross-file globals

Library-side: renderer.sx + glyph_cache.sx + pipeline.sx gain a
`gpu: GPU = ---; has_gpu: bool` field pair + branches that route every
GL touchpoint through the protocol when has_gpu. glyph_cache.init
saves/restores those fields around its memset. pipeline.set_gpu()
propagates to renderer + font. Renderer's MSL shader source added as
UI_MSL_SRC using packed_float2/packed_float4 to keep the 12-float
interleaved vertex layout tight (48 bytes).

metal.sx: dual-phase init (init(null, 0, 0) for eager device+queue,
re-init with the layer once UIKit installs the SxMetalView).
setStorageMode:.shared on every texture descriptor to ensure CPU-
writable atlas pixels on Apple Silicon iOS-sim.

Regression suite: 68 passing, 0 failed. WASM chess build currently
broken under step 3b state (silent compiler crash); documented in
CHECKPOINT.md, likely fallout from one of the filed issues (probably
0028 — the verbose protocol-box pattern). Step 3b resumes after
0024-0030 land.
This commit is contained in:
agra
2026-05-17 21:17:17 +03:00
parent a938c4f900
commit a1647eab9b
11 changed files with 783 additions and 97 deletions

View File

@@ -2,6 +2,8 @@
#import "modules/compiler.sx";
#import "modules/opengl.sx";
#import "modules/math";
#import "modules/gpu/types.sx";
#import "modules/gpu/api.sx";
#import "modules/ui/types.sx";
#import "modules/ui/render.sx";
#import "modules/ui/glyph_cache.sx";
@@ -13,62 +15,81 @@ UI_VERTEX_BYTES :s64: 48;
MAX_UI_VERTICES :s64: 16384;
UIRenderer :: struct {
// GL-side handles. Used when `gpu == null` (every non-iOS target today).
vao: u32;
vbo: u32;
shader: u32;
proj_loc: s32;
tex_loc: s32;
// CPU-side vertex scratch buffer — same for both backends.
vertices: [*]f32;
vertex_count: s64;
screen_width: f32;
screen_height: f32;
dpi_scale: f32;
white_texture: u32;
white_texture: u32; // GL name OR TextureHandle (both are u32-shaped)
current_texture: u32;
draw_calls: s64;
init :: (self: *UIRenderer) {
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
inline if OS == .wasm or OS == .ios {
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
} else {
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
}
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
// GPU protocol backend. When `has_gpu`, the renderer routes shader /
// buffer / texture / draw calls through `gpu` instead of raw GL. The
// chess game sets this on iOS to a boxed `*MetalGPU`.
gpu: GPU = ---;
has_gpu: bool = false;
mtl_shader: ShaderHandle = 0;
mtl_vbuf: BufferHandle = 0;
// Allocate vertex buffer (CPU side)
init :: (self: *UIRenderer) {
// Allocate vertex scratch (CPU side) — same for both backends.
buf_size := MAX_UI_VERTICES * UI_VERTEX_BYTES;
self.vertices = xx context.allocator.alloc(buf_size);
memset(self.vertices, 0, buf_size);
self.vertex_count = 0;
// Create VAO/VBO
glGenVertexArrays(1, @self.vao);
glGenBuffers(1, @self.vbo);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
// pos (2 floats)
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
glEnableVertexAttribArray(0);
// uv (2 floats)
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
glEnableVertexAttribArray(1);
// color (4 floats)
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
glEnableVertexAttribArray(2);
// params: corner_radius, border_width, rect_w, rect_h
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
glEnableVertexAttribArray(3);
glBindVertexArray(0);
self.dpi_scale = 1.0;
// 1x1 white texture for solid rects
self.white_texture = create_white_texture();
if self.has_gpu {
// ── Metal backend (via GPU protocol) ───────────────────────
self.mtl_shader = self.gpu.create_shader(UI_MSL_SRC, "");
self.mtl_vbuf = self.gpu.create_buffer(buf_size);
white_px : [4]u8 = .[255, 255, 255, 255];
self.white_texture = self.gpu.create_texture(1, 1, .rgba8, xx @white_px[0]);
} else {
// ── GL backend ─────────────────────────────────────────────
// Create shader (ES for WASM/WebGL2 + iOS GLES3, Core for desktop GL 3.3)
inline if OS == .wasm or OS == .ios {
self.shader = create_program(UI_VERT_SRC_ES, UI_FRAG_SRC_ES);
} else {
self.shader = create_program(UI_VERT_SRC_CORE, UI_FRAG_SRC_CORE);
}
self.proj_loc = glGetUniformLocation(self.shader, "uProj");
self.tex_loc = glGetUniformLocation(self.shader, "uTex");
// Create VAO/VBO
glGenVertexArrays(1, @self.vao);
glGenBuffers(1, @self.vbo);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
glBufferData(GL_ARRAY_BUFFER, xx buf_size, null, GL_DYNAMIC_DRAW);
// pos (2 floats)
glVertexAttribPointer(0, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 0);
glEnableVertexAttribArray(0);
// uv (2 floats)
glVertexAttribPointer(1, 2, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 8);
glEnableVertexAttribArray(1);
// color (4 floats)
glVertexAttribPointer(2, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 16);
glEnableVertexAttribArray(2);
// params: corner_radius, border_width, rect_w, rect_h
glVertexAttribPointer(3, 4, GL_FLOAT, 0, xx UI_VERTEX_BYTES, xx 32);
glEnableVertexAttribArray(3);
glBindVertexArray(0);
// 1x1 white texture for solid rects
self.white_texture = create_white_texture();
}
}
begin :: (self: *UIRenderer, width: f32, height: f32, font_texture: u32) {
@@ -78,15 +99,26 @@ UIRenderer :: struct {
self.current_texture = font_texture;
self.draw_calls = 0;
// Set up GL state once for the entire frame
glUseProgram(self.shader);
proj := Mat4.ortho(0.0, width, height, 0.0, -1.0, 1.0);
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
glUniform1i(self.tex_loc, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, font_texture);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
if self.has_gpu {
// Pipeline state + vertex buffer + projection + initial texture.
// Metal blend mode + scissor-cleared defaults are baked into
// the pipeline state, so no per-frame glEnable/glDisable.
self.gpu.set_shader(self.mtl_shader);
self.gpu.set_vertex_buffer(self.mtl_vbuf);
self.gpu.set_vertex_constants(1, xx proj.data, 64);
self.gpu.set_texture(0, font_texture);
} else {
// GL: bind everything for the frame.
glUseProgram(self.shader);
glUniformMatrix4fv(self.proj_loc, 1, 0, proj.data);
glUniform1i(self.tex_loc, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, font_texture);
glBindVertexArray(self.vao);
glBindBuffer(GL_ARRAY_BUFFER, self.vbo);
}
}
bind_texture :: (self: *UIRenderer, tex: u32) {
@@ -202,18 +234,33 @@ UIRenderer :: struct {
}
case .clip_push: {
self.flush();
glEnable(GL_SCISSOR_TEST);
dpi := self.dpi_scale;
glScissor(
xx (node.frame.origin.x * dpi),
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi)
);
if self.has_gpu {
// Metal: pixel coords, top-left origin (no Y flip).
self.gpu.set_scissor(
xx (node.frame.origin.x * dpi),
xx (node.frame.origin.y * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi),
);
} else {
// GL: pixel coords, bottom-left origin — flip Y.
glEnable(GL_SCISSOR_TEST);
glScissor(
xx (node.frame.origin.x * dpi),
xx ((self.screen_height - node.frame.origin.y - node.frame.size.height) * dpi),
xx (node.frame.size.width * dpi),
xx (node.frame.size.height * dpi)
);
}
}
case .clip_pop: {
self.flush();
glDisable(GL_SCISSOR_TEST);
if self.has_gpu {
self.gpu.disable_scissor();
} else {
glDisable(GL_SCISSOR_TEST);
}
}
case .opacity_push: {}
case .opacity_pop: {}
@@ -225,13 +272,22 @@ UIRenderer :: struct {
flush :: (self: *UIRenderer) {
if self.vertex_count == 0 { return; }
// Only bind the current texture (program, projection, VAO already bound in begin())
glBindTexture(GL_TEXTURE_2D, self.current_texture);
upload_size : s64 = self.vertex_count * UI_VERTEX_BYTES;
// Use glBufferData to orphan the old buffer and avoid GPU sync stalls
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
if self.has_gpu {
// Mirror the GL path: bind current texture before drawing.
// current_texture may have changed since the last flush.
self.gpu.set_texture(0, self.current_texture);
self.gpu.update_buffer(self.mtl_vbuf, xx self.vertices, upload_size);
self.gpu.draw_triangles(0, xx self.vertex_count);
} else {
// Only re-bind the current texture (program, projection, VAO
// already bound in begin()). glBufferData orphans the old buffer
// to avoid GPU sync stalls.
glBindTexture(GL_TEXTURE_2D, self.current_texture);
glBufferData(GL_ARRAY_BUFFER, xx upload_size, self.vertices, GL_DYNAMIC_DRAW);
glDrawArrays(GL_TRIANGLES, 0, xx self.vertex_count);
}
self.vertex_count = 0;
self.draw_calls += 1;
@@ -458,3 +514,87 @@ void main() {
}
}
GLSL;
// --- Metal (MSL) — single library with vmain/fmain entry points ---
//
// `packed_float2 / packed_float4` keep the 12-float interleaved vertex
// layout (pos2 / uv2 / color4 / params4 = 48 bytes) without padding —
// MSL's default `float4` has 16-byte alignment and would force a 64-byte
// struct (see examples/63-metal-clear.sx for the gotcha).
//
// Uniform passing: GL uses `glUniformMatrix4fv("uProj", proj)`; Metal
// receives the projection via `setVertexBytes:length:atIndex:1` (slot 0
// is the vertex buffer). Texture binding goes through
// `setFragmentTexture:atIndex:0`.
UI_MSL_SRC :: #string MSL
#include <metal_stdlib>
using namespace metal;
struct UIVertex {
packed_float2 pos;
packed_float2 uv;
packed_float4 color;
packed_float4 params;
};
struct VOut {
float4 position [[position]];
float2 uv;
float4 color;
float4 params;
};
vertex VOut vmain(uint vid [[vertex_id]],
constant UIVertex* verts [[buffer(0)]],
constant float4x4& proj [[buffer(1)]]) {
UIVertex v = verts[vid];
VOut o;
o.position = proj * float4(v.pos, 0.0, 1.0);
o.uv = float2(v.uv);
o.color = float4(v.color);
o.params = float4(v.params);
return o;
}
static float roundedBoxSDF(float2 center, float2 half_size, float radius) {
float2 q = abs(center) - half_size + float2(radius);
return length(max(q, float2(0.0))) + min(max(q.x, q.y), 0.0) - radius;
}
fragment float4 fmain(VOut in [[stage_in]],
texture2d<float> tex [[texture(0)]]) {
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
float mode = in.params.x;
float border = in.params.y;
float2 rectSize = in.params.zw;
if (mode < -1.5) {
// Image mode (mode == -2.0): sample texture
return tex.sample(s, in.uv) * in.color;
} else if (mode < 0.0) {
// Text mode (mode == -1.0): sample glyph atlas .r as alpha
float alpha = tex.sample(s, in.uv).r;
float ew = fwidth(alpha) * 0.7;
alpha = smoothstep(0.5 - ew, 0.5 + ew, alpha);
return float4(in.color.rgb, in.color.a * pow(alpha, 0.9));
} else if (mode > 0.0 || border > 0.0) {
// Rounded rect: SDF alpha, vertex color only
float2 half_size = rectSize * 0.5;
float2 center = (in.uv - float2(0.5)) * rectSize;
float dist = roundedBoxSDF(center, half_size, mode);
float aa = fwidth(dist);
float alpha = 1.0 - smoothstep(-aa, aa, dist);
if (border > 0.0) {
float inner = roundedBoxSDF(center, half_size - float2(border), max(mode - border, 0.0));
float border_alpha = smoothstep(-aa, aa, inner);
alpha = alpha * max(border_alpha, 0.0);
}
return float4(in.color.rgb, in.color.a * alpha);
} else {
// Plain rect: vertex color only
return in.color;
}
}
MSL;