1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "render_backend_gles.h"
16
17 #include <algorithm>
18
19 #include <base/containers/fixed_string.h>
20 #include <core/perf/intf_performance_data_manager.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23
24 #if (RENDER_PERF_ENABLED == 1)
25 #include "perf/gpu_query.h"
26 #include "perf/gpu_query_manager.h"
27 #endif
28 #include "device/gpu_resource_manager.h"
29 #include "gles/device_gles.h"
30 #include "gles/gl_functions.h"
31 #include "gles/gpu_buffer_gles.h"
32 #include "gles/gpu_image_gles.h"
33 #include "gles/gpu_program_gles.h"
34 #include "gles/gpu_query_gles.h"
35 #include "gles/gpu_sampler_gles.h"
36 #include "gles/gpu_semaphore_gles.h"
37 #include "gles/node_context_descriptor_set_manager_gles.h"
38 #include "gles/node_context_pool_manager_gles.h"
39 #include "gles/pipeline_state_object_gles.h"
40 #include "gles/render_frame_sync_gles.h"
41 #include "gles/swapchain_gles.h"
42 #include "nodecontext/render_command_list.h"
43 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
44 #include "util/log.h"
45 #include "util/render_frame_util.h"
46
47 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
48 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
49
50 using namespace BASE_NS;
51
52 // NOTE: implement missing commands, add state caching and cleanup a bit more.
53 RENDER_BEGIN_NAMESPACE()
54 namespace Gles {
55 // Indices to colorBlendConstants
56 static constexpr uint32_t RED_INDEX = 0;
57 static constexpr uint32_t GREEN_INDEX = 1;
58 static constexpr uint32_t BLUE_INDEX = 2;
59 static constexpr uint32_t ALPHA_INDEX = 3;
60 static constexpr uint32_t CUBEMAP_LAYERS = 6;
61 struct Bind {
62 DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
63 struct BufferType {
64 uint32_t bufferId;
65 uint32_t offset;
66 uint32_t size;
67 };
68 struct ImageType {
69 GpuImageGLES* image;
70 uint32_t mode;
71 uint32_t mipLevel;
72 };
73 struct SamplerType {
74 uint32_t samplerId;
75 };
76 struct Resource {
77 union {
78 Bind::BufferType buffer { 0, 0, 0 };
79 Bind::ImageType image;
80 };
81 SamplerType sampler { 0 };
82 };
83 vector<Resource> resources;
84 };
85 } // namespace Gles
86 namespace {
GetRenderHandleType(const DescriptorType descriptorType)87 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
88 {
89 if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
90 return RenderHandleType::GPU_SAMPLER;
91 } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
92 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
93 (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
94 return RenderHandleType::GPU_IMAGE;
95 } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
96 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
97 return RenderHandleType::GPU_BUFFER;
98 }
99 return RenderHandleType::UNDEFINED;
100 }
101
getCubeMapTarget(GLenum type,uint32_t layer)102 GLenum getCubeMapTarget(GLenum type, uint32_t layer)
103 {
104 if (type == GL_TEXTURE_CUBE_MAP) {
105 constexpr GLenum layerId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
106 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
107 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
108 PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
109 return layerId[layer];
110 }
111 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
112 return GL_NONE;
113 }
114
getTarget(GLenum type,uint32_t layer,uint32_t sampleCount)115 GLenum getTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
116 {
117 if (type == GL_TEXTURE_2D) {
118 if (sampleCount > 1) {
119 return GL_TEXTURE_2D_MULTISAMPLE;
120 }
121 return GL_TEXTURE_2D;
122 }
123 if (type == GL_TEXTURE_CUBE_MAP) {
124 PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
125 return getCubeMapTarget(type, layer);
126 }
127 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
128 return GL_NONE;
129 }
130 struct BlitArgs {
131 uint32_t mipLevel {};
132 Size3D rect0 {};
133 Size3D rect1 {};
134 uint32_t height {};
135 };
136
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)137 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
138 {
139 // Handle top-left / bottom-left origin conversion
140 GLint sy = static_cast<GLint>(src.rect0.height);
141 const GLint sh = static_cast<const GLint>(src.rect1.height);
142 const GLint sfh = static_cast<GLint>(src.height >> src.mipLevel);
143 sy = sfh - (sy + sh);
144 GLint dy = static_cast<GLint>(dst.rect0.height);
145 const GLint dh = static_cast<const GLint>(dst.rect1.height);
146 const GLint dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
147 dy = dfh - (dy + dh);
148 GLenum glfilter = GL_NEAREST;
149 if (filter == CORE_FILTER_NEAREST) {
150 glfilter = GL_NEAREST;
151 } else if (filter == CORE_FILTER_LINEAR) {
152 glfilter = GL_LINEAR;
153 } else {
154 PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
155 }
156 glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
157 static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
158 glfilter);
159 }
160
GetPrimFromTopology(PrimitiveTopology op)161 GLenum GetPrimFromTopology(PrimitiveTopology op)
162 {
163 switch (op) {
164 case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
165 return GL_POINTS;
166 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
167 return GL_LINES;
168 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
169 return GL_LINE_STRIP;
170 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
171 return GL_TRIANGLES;
172 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
173 return GL_TRIANGLE_STRIP;
174 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
175 return GL_TRIANGLE_FAN;
176 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
177 // The following are valid after gles 3.2
178 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
179 return GL_LINES_ADJACENCY;
180 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
181 return GL_LINE_STRIP_ADJACENCY;
182 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
183 return GL_TRIANGLES_ADJACENCY;
184 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
185 return GL_TRIANGLE_STRIP_ADJACENCY;
186 case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
187 return GL_PATCHES;
188 #endif
189 default:
190 PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
191 break;
192 }
193 return GL_POINTS;
194 }
195
GetBlendOp(BlendOp func)196 GLenum GetBlendOp(BlendOp func)
197 {
198 switch (func) {
199 case CORE_BLEND_OP_ADD:
200 return GL_FUNC_ADD;
201 case CORE_BLEND_OP_SUBTRACT:
202 return GL_FUNC_SUBTRACT;
203 case CORE_BLEND_OP_REVERSE_SUBTRACT:
204 return GL_FUNC_REVERSE_SUBTRACT;
205 case CORE_BLEND_OP_MIN:
206 return GL_MIN;
207 case CORE_BLEND_OP_MAX:
208 return GL_MAX;
209 default:
210 break;
211 }
212 return GL_FUNC_ADD;
213 }
214
GetBlendFactor(BlendFactor factor)215 GLenum GetBlendFactor(BlendFactor factor)
216 {
217 switch (factor) {
218 case CORE_BLEND_FACTOR_ZERO:
219 return GL_ZERO;
220 case CORE_BLEND_FACTOR_ONE:
221 return GL_ONE;
222 case CORE_BLEND_FACTOR_SRC_COLOR:
223 return GL_SRC_COLOR;
224 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
225 return GL_ONE_MINUS_SRC_COLOR;
226 case CORE_BLEND_FACTOR_DST_COLOR:
227 return GL_DST_COLOR;
228 case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
229 return GL_ONE_MINUS_DST_COLOR;
230 case CORE_BLEND_FACTOR_SRC_ALPHA:
231 return GL_SRC_ALPHA;
232 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
233 return GL_ONE_MINUS_SRC_ALPHA;
234 case CORE_BLEND_FACTOR_DST_ALPHA:
235 return GL_DST_ALPHA;
236 case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
237 return GL_ONE_MINUS_DST_ALPHA;
238 case CORE_BLEND_FACTOR_CONSTANT_COLOR:
239 return GL_CONSTANT_COLOR;
240 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
241 return GL_ONE_MINUS_CONSTANT_COLOR;
242 case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
243 return GL_CONSTANT_ALPHA;
244 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
245 return GL_ONE_MINUS_CONSTANT_ALPHA;
246 case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
247 return GL_SRC_ALPHA_SATURATE;
248 // NOTE: check the GLES3.2...
249 /* following requires EXT_blend_func_extended (dual source blending) */
250 case CORE_BLEND_FACTOR_SRC1_COLOR:
251 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
252 case CORE_BLEND_FACTOR_SRC1_ALPHA:
253 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
254 default:
255 break;
256 }
257 return GL_ONE;
258 }
259
GetCompareOp(CompareOp aOp)260 GLenum GetCompareOp(CompareOp aOp)
261 {
262 switch (aOp) {
263 case CORE_COMPARE_OP_NEVER:
264 return GL_NEVER;
265 case CORE_COMPARE_OP_LESS:
266 return GL_LESS;
267 case CORE_COMPARE_OP_EQUAL:
268 return GL_EQUAL;
269 case CORE_COMPARE_OP_LESS_OR_EQUAL:
270 return GL_LEQUAL;
271 case CORE_COMPARE_OP_GREATER:
272 return GL_GREATER;
273 case CORE_COMPARE_OP_NOT_EQUAL:
274 return GL_NOTEQUAL;
275 case CORE_COMPARE_OP_GREATER_OR_EQUAL:
276 return GL_GEQUAL;
277 case CORE_COMPARE_OP_ALWAYS:
278 return GL_ALWAYS;
279 default:
280 break;
281 }
282 return GL_ALWAYS;
283 }
284
GetStencilOp(StencilOp aOp)285 GLenum GetStencilOp(StencilOp aOp)
286 {
287 switch (aOp) {
288 case CORE_STENCIL_OP_KEEP:
289 return GL_KEEP;
290 case CORE_STENCIL_OP_ZERO:
291 return GL_ZERO;
292 case CORE_STENCIL_OP_REPLACE:
293 return GL_REPLACE;
294 case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
295 return GL_INCR;
296 case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
297 return GL_DECR;
298 case CORE_STENCIL_OP_INVERT:
299 return GL_INVERT;
300 case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
301 return GL_INCR_WRAP;
302 case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
303 return GL_DECR_WRAP;
304 default:
305 break;
306 }
307 return GL_KEEP;
308 }
309
SetState(GLenum type,bool enabled)310 void SetState(GLenum type, bool enabled)
311 {
312 if (enabled) {
313 glEnable(type);
314 } else {
315 glDisable(type);
316 }
317 }
318
SetCullMode(const GraphicsState::RasterizationState & rs)319 void SetCullMode(const GraphicsState::RasterizationState& rs)
320 {
321 SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
322
323 switch (rs.cullModeFlags) {
324 case CORE_CULL_MODE_FRONT_BIT:
325 glCullFace(GL_FRONT);
326 break;
327 case CORE_CULL_MODE_BACK_BIT:
328 glCullFace(GL_BACK);
329 break;
330 case CORE_CULL_MODE_FRONT_AND_BACK:
331 glCullFace(GL_FRONT_AND_BACK);
332 break;
333 case CORE_CULL_MODE_NONE:
334 default:
335 break;
336 }
337 }
338
SetFrontFace(const GraphicsState::RasterizationState & rs)339 void SetFrontFace(const GraphicsState::RasterizationState& rs)
340 {
341 switch (rs.frontFace) {
342 case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
343 glFrontFace(GL_CCW);
344 break;
345 case CORE_FRONT_FACE_CLOCKWISE:
346 glFrontFace(GL_CW);
347 break;
348 default:
349 break;
350 }
351 }
352
353 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)354 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
355 {
356 GLenum mode;
357 switch (rs.polygonMode) {
358 default:
359 case CORE_POLYGON_MODE_FILL:
360 mode = GL_FILL;
361 break;
362 case CORE_POLYGON_MODE_LINE:
363 mode = GL_LINE;
364 break;
365 case CORE_POLYGON_MODE_POINT:
366 mode = GL_POINT;
367 break;
368 }
369 glPolygonMode(GL_FRONT_AND_BACK, mode);
370 }
371 #endif
372
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)373 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
374 const LowlevelFramebufferGL& frameBuffer)
375 {
376 if (count > 0) {
377 if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
378 // Invalidate the whole buffer. (attachment sizes match render area)
379 glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
380 } else {
381 // invalidate only a part of the render target..
382 // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
383 const GLint X = static_cast<const GLint>(rpd.renderArea.offsetX);
384 const GLint Y = static_cast<const GLint>(rpd.renderArea.offsetY);
385 const GLsizei W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
386 const GLsizei H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
387 glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
388 }
389 }
390 }
391
392 struct BlitData {
393 const GpuImagePlatformDataGL& iPlat;
394 const GpuImageDesc& imageDesc;
395 const BufferImageCopy& bufferImageCopy;
396 uintptr_t data { 0 };
397 uint64_t size { 0 };
398 uint64_t sizeOfData { 0 };
399 bool compressed { false };
400 };
401
BlitArray(DeviceGLES & device_,const BlitData & bd)402 void BlitArray(DeviceGLES& device_, const BlitData& bd)
403 {
404 const auto& iPlat = bd.iPlat;
405 const auto& bufferImageCopy = bd.bufferImageCopy;
406 const auto& imageSubresource = bufferImageCopy.imageSubresource;
407 const auto& imageDesc = bd.imageDesc;
408 const uint32_t mip = imageSubresource.mipLevel;
409 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
410 // NOTE: image offset depth is ignored
411 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
412 const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
413 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
414 Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
415 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
416 if (valid) {
417 uintptr_t data = bd.data;
418 const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
419 for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
420 const Math::UVec3 offset3D { offset.x, offset.y, layer };
421 if (bd.compressed) {
422 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
423 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
424 } else {
425 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
426 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
427 }
428 data += static_cast<ptrdiff_t>(bd.sizeOfData);
429 }
430 }
431 }
432
Blit2D(DeviceGLES & device_,const BlitData & bd)433 void Blit2D(DeviceGLES& device_, const BlitData& bd)
434 {
435 const auto& iPlat = bd.iPlat;
436 const auto& bufferImageCopy = bd.bufferImageCopy;
437 const auto& imageSubresource = bufferImageCopy.imageSubresource;
438 const auto& imageDesc = bd.imageDesc;
439 const uint32_t mip = imageSubresource.mipLevel;
440 const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
441 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
442 const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
443 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
444 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
445 "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
446 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
447 const uintptr_t data = bd.data;
448 if (valid && bd.compressed) {
449 device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
450 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
451 } else if (valid) {
452 device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
453 iPlat.dataType, reinterpret_cast<const void*>(data));
454 }
455 }
456
Blit3D(DeviceGLES & device_,const BlitData & bd)457 void Blit3D(DeviceGLES& device_, const BlitData& bd)
458 {
459 const auto& iPlat = bd.iPlat;
460 const auto& bufferImageCopy = bd.bufferImageCopy;
461 const auto& imageSubresource = bufferImageCopy.imageSubresource;
462 const auto& imageDesc = bd.imageDesc;
463 const uint32_t mip = imageSubresource.mipLevel;
464 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
465 const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
466 bufferImageCopy.imageOffset.depth };
467 Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
468 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
469 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
470 if (valid) {
471 uintptr_t data = bd.data;
472 for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
473 const Math::UVec3 offset3D { offset.x, offset.y, slice };
474 if (bd.compressed) {
475 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
476 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
477 } else {
478 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
479 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
480 }
481 // offsets one slice
482 data += static_cast<ptrdiff_t>(bd.sizeOfData);
483 }
484 }
485 }
486
BlitCube(DeviceGLES & device_,const BlitData & bd)487 void BlitCube(DeviceGLES& device_, const BlitData& bd)
488 {
489 const auto& iPlat = bd.iPlat;
490 const auto& bufferImageCopy = bd.bufferImageCopy;
491 const auto& imageSubresource = bufferImageCopy.imageSubresource;
492 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
493 const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
494 constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
495 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
496 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
497 PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
498 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
499 "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
500 uintptr_t data = bd.data;
501 const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
502 for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
503 const GLenum face = faceId[i]; // convert layer index to cube map face id.
504 if (face == 0) {
505 // reached the end of cubemap faces (see faceId)
506 // so must stop copying.
507 break;
508 }
509 if (bd.compressed) {
510 device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
511 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
512 } else {
513 device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
514 iPlat.dataType, reinterpret_cast<const void*>(data));
515 }
516 data += static_cast<ptrdiff_t>(bd.sizeOfData);
517 }
518 }
519 template<bool usePixelUnpackBuffer>
520
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)521 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
522 const GpuImageGLES& dstGpuImage)
523 {
524 const auto& iPlat = dstGpuImage.GetPlatformData();
525 const auto& imageOffset = bufferImageCopy.imageOffset;
526 PLUGIN_UNUSED(imageOffset);
527 const auto& imageExtent = bufferImageCopy.imageExtent;
528 // size is calculated for single layer / slice
529 const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
530 static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
531 static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
532 uintptr_t data = bufferImageCopy.bufferOffset;
533 if constexpr (usePixelUnpackBuffer) {
534 const auto& plat = srcGpuBuffer.GetPlatformData();
535 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
536 } else {
537 // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
538 // simulator and crash with ETC2 textures on NVIDIA..
539 data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
540 }
541 uint64_t sizeOfData = size;
542 const auto& compinfo = iPlat.compression;
543 if (compinfo.compressed) {
544 // how many blocks in width
545 const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
546 // how many blocks in height
547 const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
548 // size in bytes..
549 sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
550
551 // Warn for partial copies. we do not handle those at the moment.
552 if (bufferImageCopy.bufferRowLength != 0) {
553 if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
554 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
555 "Stride must match image width (with block align). "
556 "bufferImageCopy.bufferRowLength(%d) "
557 "imageExtent.width(%d) ",
558 bufferImageCopy.bufferRowLength, imageExtent.width);
559 }
560 }
561 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
562 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
563 } else {
564 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
565 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
566 }
567 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
568 return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
569 }
570
571 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)572 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
573 {
574 if constexpr (usePixelUnpackBuffer) {
575 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
576 } else {
577 srcGpuBuffer.Unmap();
578 }
579 }
580
581 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])582 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
583 {
584 for (size_t i = 0; i < N; i++) {
585 if (a[i] != b[i])
586 return false;
587 }
588 return true;
589 }
590
591 template<typename T, size_t N>
592
Set(T (& a)[N],const T (& b)[N])593 constexpr size_t Set(T (&a)[N], const T (&b)[N])
594 {
595 for (size_t i = 0; i < N; i++) {
596 a[i] = b[i];
597 }
598 return true;
599 }
600
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)601 bool CompareBlendFactors(
602 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
603 {
604 return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
605 (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
606 }
607
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)608 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
609 {
610 a.srcColorBlendFactor = b.srcColorBlendFactor;
611 a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
612 a.dstColorBlendFactor = b.dstColorBlendFactor;
613 a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
614 }
615
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)616 bool CompareBlendOps(
617 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
618 {
619 return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
620 }
621
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)622 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
623 {
624 a.colorBlendOp = b.colorBlendOp;
625 a.alphaBlendOp = b.alphaBlendOp;
626 }
627
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)628 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
629 {
630 return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
631 }
632
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)633 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
634 {
635 a.failOp = b.failOp;
636 a.depthFailOp = b.depthFailOp;
637 a.passOp = b.passOp;
638 }
639
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)640 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642 a.compareOp = b.compareOp;
643 a.compareMask = b.compareMask;
644 a.reference = b.reference;
645 }
646
647 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)648 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
649 {
650 if (mipLevel >= imageDesc.mipCount) {
651 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
652 }
653 if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
654 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
655 }
656 if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
657 ((offset.z + extent.depth) > imageDesc.depth)) {
658 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
659 }
660 }
661
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)662 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
663 {
664 ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
665 ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
666 }
667 #endif
668
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)669 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
670 {
671 if (srcOffset < 0) {
672 size += srcOffset;
673 dstOffset -= srcOffset;
674 srcOffset = 0;
675 }
676 }
677
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)678 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
679 {
680 ClampOffset(srcOffset.x, dstOffset.x, size.width);
681 ClampOffset(srcOffset.y, dstOffset.y, size.height);
682 ClampOffset(srcOffset.z, dstOffset.z, size.depth);
683 }
684
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)685 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
686 {
687 if (size > (maxSize - offset)) {
688 size = maxSize - offset;
689 }
690 }
691
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)692 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
693 {
694 ClampSize(offset.x, desc.width, size.width);
695 ClampSize(offset.y, desc.height, size.height);
696 ClampSize(offset.z, desc.depth, size.depth);
697 }
698 } // namespace
699
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)700 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
701 : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
702 {
703 #if (RENDER_PERF_ENABLED == 1)
704 validGpuQueries_ = false;
705 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
706 gpuQueryMgr_ = make_unique<GpuQueryManager>();
707 #if RENDER_HAS_GL_BACKEND
708 if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
709 validGpuQueries_ = true;
710 }
711 #endif
712 #if RENDER_HAS_GLES_BACKEND
713 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
714 // Check if GL_EXT_disjoint_timer_query is available.
715 validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
716 }
717 #endif
718 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
719 #endif // RENDER_PERF_ENABLED
720 #if RENDER_HAS_GLES_BACKEND
721 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
722 multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
723 }
724 #endif
725 PLUGIN_ASSERT(device_.IsActive());
726 PrimeCache(GraphicsState {}); // Initializes cache.
727 glGenFramebuffers(1, &blitImageSourceFbo_);
728 glGenFramebuffers(1, &blitImageDestinationFbo_);
729 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
730 PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
731 PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
732 #endif
733 #if !RENDER_HAS_GLES_BACKEND
734 glEnable(GL_PROGRAM_POINT_SIZE);
735 #endif
736 }
737
~RenderBackendGLES()738 RenderBackendGLES::~RenderBackendGLES()
739 {
740 PLUGIN_ASSERT(device_.IsActive());
741 device_.DeleteFrameBuffer(blitImageSourceFbo_);
742 device_.DeleteFrameBuffer(blitImageDestinationFbo_);
743 }
744
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)745 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
746 {
747 if (!backBufferConfig.swapchainData.empty()) {
748 if (device_.HasSwapchain()) {
749 #if (RENDER_PERF_ENABLED == 1)
750 commonCpuTimers_.present.Begin();
751 #endif
752 for (const auto& swapchainData : backBufferConfig.swapchainData) {
753 #if (RENDER_DEV_ENABLED == 1)
754 if (swapchainData.config.gpuSemaphoreHandle) {
755 // NOTE: not implemented
756 PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
757 }
758 #endif
759 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
760 if (swp) {
761 #if RENDER_GL_FLIP_Y_SWAPCHAIN
762 // Blit and flip our swapchain frame to backbuffer..
763 const auto& sdesc = swp->GetDesc();
764 if (scissorEnabled_) {
765 glDisable(GL_SCISSOR_TEST);
766 scissorEnabled_ = false;
767 }
768 const auto& platSwapchain = swp->GetPlatformData();
769 device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
770 device_.BindWriteFrameBuffer(0); // FBO 0 is the surface bound to current context..
771 glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
772 (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
773 device_.BindReadFrameBuffer(0);
774 #endif
775 device_.SwapBuffers(*swp);
776 }
777 }
778 #if (RENDER_PERF_ENABLED == 1)
779 commonCpuTimers_.present.End();
780 #endif
781 }
782 }
783 }
784
ResetState()785 void RenderBackendGLES::ResetState()
786 {
787 boundProgram_ = {};
788 boundIndexBuffer_ = {};
789 vertexAttribBinds_ = 0;
790 renderingToDefaultFbo_ = false;
791 boundComputePipeline_ = nullptr;
792 boundGraphicsPipeline_ = nullptr;
793 currentPsoHandle_ = {};
794 renderArea_ = {};
795 activeRenderPass_ = {};
796 currentSubPass_ = 0;
797 currentFrameBuffer_ = nullptr;
798 scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
799 inRenderpass_ = 0;
800 }
801
ResetBindings()802 void RenderBackendGLES::ResetBindings()
803 {
804 for (auto& b : boundObjects_) {
805 b.dirty = true;
806 }
807 boundComputePipeline_ = nullptr;
808 boundGraphicsPipeline_ = nullptr;
809 currentPsoHandle_ = {};
810 }
811
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)812 void RenderBackendGLES::Render(
813 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
814 {
815 // NOTE: all command lists are validated before entering here
816 PLUGIN_ASSERT(device_.IsActive());
817 #if (RENDER_PERF_ENABLED == 1)
818 commonCpuTimers_.full.Begin();
819 commonCpuTimers_.acquire.Begin();
820 #endif
821 presentationInfo_ = {};
822
823 if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
824 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
825 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
826 if (const SwapchainGLES* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
827 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
828 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
829 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
830 // remap image to backbuffer
831 const RenderHandle currentSwapchainHandle =
832 swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
833 // special swapchain remapping
834 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
835 }
836 }
837 }
838 }
839 #if (RENDER_PERF_ENABLED == 1)
840 commonCpuTimers_.acquire.End();
841
842 StartFrameTimers(renderCommandFrameData);
843 commonCpuTimers_.execute.Begin();
844 #endif
845 // Reset bindings.
846 ResetState();
847 for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
848 // Reset bindings between command lists..
849 ResetBindings();
850 RenderSingleCommandList(ref);
851 }
852 #if (RENDER_PERF_ENABLED == 1)
853 commonCpuTimers_.execute.End();
854 #endif
855 RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
856 #if (RENDER_PERF_ENABLED == 1)
857 commonCpuTimers_.full.End();
858 EndFrameTimers();
859 #endif
860 }
861
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)862 void RenderBackendGLES::RenderProcessEndCommandLists(
863 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
864 {
865 if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
866 frameSync->GetFrameFence();
867 }
868 // signal external GPU fences
869 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
870 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
871 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
872 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
873 if (externalSignals.size() == externalSemaphores.size()) {
874 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
875 // needs to be false
876 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
877 if (const GpuSemaphoreGles* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
878 auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
879 // NOTE: currently could create only one GPU sync
880 GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
881 plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
882 externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
883 externalSignals[sigIdx].signaled = true;
884
885 // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
886 }
887 }
888 }
889 }
890 }
891 }
892
RenderCommandUndefined(const RenderCommandWithType & renderCommand)893 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
894 {
895 PLUGIN_ASSERT_MSG(false, "non-valid render command");
896 }
897
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)898 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
899 {
900 // these are validated in render graph
901 managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
902 renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
903
904 managers_.poolMgr->BeginBackendFrame();
905 managers_.psoMgr->BeginBackendFrame();
906 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
907 const auto& debugName = renderCommandCtx.debugName;
908 #endif
909 #if (RENDER_PERF_ENABLED == 1)
910 perfCounters_ = {};
911 PLUGIN_ASSERT(timers_.count(debugName) == 1);
912 PerfDataSet& perfDataSet = timers_[debugName];
913 perfDataSet.cpuTimer.Begin();
914 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
915 if (validGpuQueries_) {
916 #ifdef GL_GPU_DISJOINT_EXT
917 /* Clear disjoint error */
918 GLint disjointOccurred = 0;
919 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
920 #endif
921 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
922 PLUGIN_ASSERT(gpuQuery);
923
924 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
925 PLUGIN_ASSERT(platData.queryObject);
926 glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
927 }
928 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
929 #endif // RENDER_PERF_ENABLED
930 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
931 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
932 #endif
933 commandListValid_ = true;
934 for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
935 PLUGIN_ASSERT(ref.rc);
936 if (commandListValid_) {
937 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
938 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1,
939 (const GLchar*)COMMAND_NAMES[static_cast<uint32_t>(ref.type)]);
940 #endif
941 (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
942 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
943 glPopDebugGroup();
944 #endif
945 }
946 }
947 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
948 glPopDebugGroup();
949 #endif
950 #if (RENDER_PERF_ENABLED == 1)
951 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
952 if (validGpuQueries_) {
953 glEndQuery(GL_TIME_ELAPSED_EXT);
954 }
955 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
956 perfDataSet.cpuTimer.End();
957 CopyPerfTimeStamp(debugName, perfDataSet);
958 #endif // RENDER_PERF_ENABLED
959 }
960
RenderCommandBindPipeline(const RenderCommandWithType & ref)961 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
962 {
963 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
964 const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
965 boundProgram_ = {};
966 if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
967 PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
968 BindComputePipeline(renderCmd);
969 } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
970 BindGraphicsPipeline(renderCmd);
971 }
972 currentPsoHandle_ = renderCmd.psoHandle;
973 }
974
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)975 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
976 {
977 const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
978 managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
979 if (pso) {
980 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
981 // Setup descriptorset bind cache..
982 SetupCache(data.pipelineLayout);
983 }
984 boundComputePipeline_ = pso;
985 boundGraphicsPipeline_ = nullptr;
986 }
987
SetupCache(const PipelineLayout & pipelineLayout)988 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
989 {
990 // based on pipeline layout. (note that compatible sets should "save state")
991 for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
992 // mark unmatching sets dirty (all for now)
993 // resize the cache stuffs.
994 const auto& s = pipelineLayout.descriptorSetLayouts[set];
995 if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
996 boundObjects_[set].dirty = true;
997 #if RENDER_HAS_GLES_BACKEND
998 boundObjects_[set].oesBinds.clear();
999 #endif
1000 boundObjects_[set].resources.clear();
1001 continue;
1002 }
1003 PLUGIN_ASSERT(s.set == set);
1004
1005 uint32_t maxB = 0;
1006 // NOTE: compatibility optimizations?
1007 // NOTE: we expect bindings to be sorted.
1008 if (s.bindings.back().binding == s.bindings.size() - 1U) {
1009 // since the last binding matches the size, expect it to be continuous.
1010 maxB = static_cast<uint32_t>(s.bindings.size());
1011 } else {
1012 // Sparse binding.
1013 // NOTE: note sparse sets will waste memory here. (see notes in
1014 // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1015 for (const auto& bind : s.bindings) {
1016 maxB = Math::max(maxB, bind.binding);
1017 }
1018 maxB += 1U; // zero based bindings..
1019 }
1020 if (boundObjects_[set].resources.size() != maxB) {
1021 // resource count change.. (so it's dirty then)
1022 boundObjects_[set].dirty = true;
1023 #if RENDER_HAS_GLES_BACKEND
1024 boundObjects_[set].oesBinds.clear();
1025 #endif
1026 boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1027 boundObjects_[set].resources.resize(maxB);
1028 }
1029
1030 for (const auto& b : s.bindings) {
1031 auto& o = boundObjects_[set].resources[b.binding];
1032 // ignore b.shaderStageFlags for now.
1033 if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1034 // mark set dirty, since "not matching"
1035 o.resources.clear();
1036 o.resources.resize(b.descriptorCount);
1037 o.descriptorType = b.descriptorType;
1038 boundObjects_[set].dirty = true;
1039 #if RENDER_HAS_GLES_BACKEND
1040 boundObjects_[set].oesBinds.clear();
1041 #endif
1042 }
1043 }
1044 }
1045 }
1046
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1047 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1048 {
1049 const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1050 managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1051 activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1052 if (pso) {
1053 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1054 dynamicStateFlags_ = data.dynamicStateFlags;
1055 DoGraphicsState(data.graphicsState);
1056 // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1057 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1058 SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1059 static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1060 }
1061 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1062 SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1063 }
1064 // Setup descriptorset bind cache..
1065 SetupCache(data.pipelineLayout);
1066 }
1067 boundComputePipeline_ = nullptr;
1068 boundGraphicsPipeline_ = pso;
1069 }
1070
RenderCommandDraw(const RenderCommandWithType & ref)1071 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1072 {
1073 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1074 const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1075 if (!boundGraphicsPipeline_) {
1076 return;
1077 }
1078 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1079 BindResources();
1080 const auto type = GetPrimFromTopology(topology_);
1081 const GLsizei firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1082 const GLsizei instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1083 // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1084 // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1085 if (renderCmd.indexCount) {
1086 uintptr_t offsetp = boundIndexBuffer_.offset;
1087 GLenum indexType = GL_UNSIGNED_SHORT;
1088 switch (boundIndexBuffer_.type) {
1089 case CORE_INDEX_TYPE_UINT16:
1090 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1091 indexType = GL_UNSIGNED_SHORT;
1092 break;
1093 case CORE_INDEX_TYPE_UINT32:
1094 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1095 indexType = GL_UNSIGNED_INT;
1096 break;
1097 default:
1098 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1099 break;
1100 }
1101 const GLsizei indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1102 const void* offset = reinterpret_cast<const void*>(offsetp);
1103 if (renderCmd.instanceCount > 1) {
1104 if (renderCmd.firstVertex) {
1105 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, firstVertex);
1106 } else {
1107 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1108 }
1109 } else {
1110 if (renderCmd.vertexOffset) {
1111 glDrawElementsBaseVertex(
1112 type, indexCount, indexType, offset, static_cast<GLint>(renderCmd.vertexOffset));
1113 } else {
1114 glDrawElements(type, indexCount, indexType, offset);
1115 }
1116 }
1117 #if (RENDER_PERF_ENABLED == 1)
1118 ++perfCounters_.drawCount;
1119 perfCounters_.instanceCount += renderCmd.instanceCount;
1120 perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1121 #endif
1122 } else {
1123 const GLsizei vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1124 if (renderCmd.instanceCount > 1) {
1125 glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1126 } else {
1127 glDrawArrays(type, firstVertex, vertexCount);
1128 }
1129 #if (RENDER_PERF_ENABLED == 1)
1130 ++perfCounters_.drawCount;
1131 perfCounters_.instanceCount += renderCmd.instanceCount;
1132 perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1133 #endif
1134 }
1135 }
1136
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1137 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1138 {
1139 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1140 const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1141 if (!boundGraphicsPipeline_) {
1142 return;
1143 }
1144 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1145 BindResources();
1146 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1147 const auto& plat = gpuBuffer->GetPlatformData();
1148 device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1149 const auto type = GetPrimFromTopology(topology_);
1150 auto offset = static_cast<GLintptr>(renderCmd.offset);
1151 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1152 GLenum indexType = GL_UNSIGNED_SHORT;
1153 switch (boundIndexBuffer_.type) {
1154 case CORE_INDEX_TYPE_UINT16:
1155 indexType = GL_UNSIGNED_SHORT;
1156 break;
1157 case CORE_INDEX_TYPE_UINT32:
1158 indexType = GL_UNSIGNED_INT;
1159 break;
1160 default:
1161 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1162 break;
1163 }
1164 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1165 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1166 offset += renderCmd.stride;
1167 }
1168 } else {
1169 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1170 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1171 offset += renderCmd.stride;
1172 }
1173 }
1174 #if (RENDER_PERF_ENABLED == 1)
1175 perfCounters_.drawIndirectCount += renderCmd.drawCount;
1176 #endif
1177 }
1178 }
1179
RenderCommandDispatch(const RenderCommandWithType & ref)1180 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1181 {
1182 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1183 const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1184 if (!boundComputePipeline_) {
1185 return;
1186 }
1187 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1188 BindResources();
1189 glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1190 #if (RENDER_PERF_ENABLED == 1)
1191 ++perfCounters_.dispatchCount;
1192 #endif
1193 }
1194
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1195 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1196 {
1197 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1198 const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1199 if (!boundComputePipeline_) {
1200 return;
1201 }
1202 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1203 BindResources();
1204 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1205 const auto& plat = gpuBuffer->GetPlatformData();
1206 device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1207 glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1208 #if (RENDER_PERF_ENABLED == 1)
1209 ++perfCounters_.dispatchIndirectCount;
1210 #endif
1211 }
1212 }
1213
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1214 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1215 {
1216 resetScissor_ = false; // need to reset scissor state after clear?
1217 clearScissorSet_ = true; // need to setup clear scissors before clear?
1218 clearScissor_ = aArea; // area to be cleared
1219 if (scissorPrimed_) { // have scissors been set yet?
1220 if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1221 (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1222 (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1223 (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1224 // Current scissors match clearscissor area, so no need to set it again.
1225 clearScissorSet_ = false;
1226 }
1227 }
1228 }
1229
ClearScissorSet()1230 void RenderBackendGLES::ClearScissorSet()
1231 {
1232 if (clearScissorSet_) { // do we need to set clear scissors.
1233 clearScissorSet_ = false; // clear scissors have been set now.
1234 resetScissor_ = true; // we are modifying scissors, so remember to reset them afterwards.
1235 glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1236 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1237 }
1238 }
1239
ClearScissorReset()1240 void RenderBackendGLES::ClearScissorReset()
1241 {
1242 if (resetScissor_) { // need to reset correct scissors?
1243 if (!scissorPrimed_) {
1244 // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1245 // setting)
1246 scissorPrimed_ = true;
1247 scissorBox_.offsetX = clearScissor_.offsetX;
1248 scissorBox_.offsetY = clearScissor_.offsetY;
1249 scissorBox_.extentHeight = clearScissor_.extentHeight;
1250 scissorBox_.extentWidth = clearScissor_.extentWidth;
1251 } else {
1252 // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1253 scissorBoxUpdated_ = true; // ie. request to update scissor state.
1254 }
1255 }
1256 }
1257
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1258 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1259 {
1260 constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1261 CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1262 const auto& cBlend = cacheState_.colorBlendState;
1263 for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1264 if (colorAttachments[idx] == nullptr) {
1265 continue;
1266 }
1267 const auto& ref = *(colorAttachments[idx]);
1268 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1269 const auto& cBlendState = cBlend.colorAttachments[idx];
1270 if (clearAll != cBlendState.colorWriteMask) {
1271 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1272 }
1273 ClearScissorSet();
1274 // glClearBufferfv only for float formats?
1275 // glClearBufferiv & glClearbufferuv only for integer formats?
1276 glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1277 if (clearAll != cBlendState.colorWriteMask) {
1278 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1279 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1280 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1281 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1282 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1283 }
1284 }
1285 }
1286 }
1287
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1288 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1289 {
1290 const GLuint allBits = 0xFFFFFFFFu;
1291 const auto& ref = depthAttachment;
1292 const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1293 const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1294 // Change state if needed.
1295 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1296 glDepthMask(GL_TRUE);
1297 }
1298 if (clearStencil) {
1299 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1300 glStencilMaskSeparate(GL_FRONT, allBits);
1301 }
1302 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1303 glStencilMaskSeparate(GL_BACK, allBits);
1304 }
1305 }
1306 if (clearDepth || clearStencil) {
1307 // Set the scissors for clear..
1308 ClearScissorSet();
1309 }
1310 // Do clears.
1311 if (clearDepth && clearStencil) {
1312 glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1313 static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1314 } else if (clearDepth) {
1315 glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1316 } else if (clearStencil) {
1317 glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1318 }
1319
1320 // Restore cached state, if we touched the state.
1321 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1322 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1323 glDepthMask(GL_FALSE);
1324 }
1325 if (clearStencil) {
1326 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1327 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1328 glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1329 }
1330 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1331 glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1332 }
1333 }
1334 }
1335
DoSubPass(uint32_t subPass)1336 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1337 {
1338 if (currentFrameBuffer_ == nullptr) {
1339 // Completely invalid state in backend.
1340 return;
1341 }
1342 const auto& rpd = activeRenderPass_.renderPassDesc;
1343 const auto& sb = activeRenderPass_.subpasses[subPass];
1344
1345 // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1346 if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1347 auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1348 device_.Activate(color);
1349 }
1350 device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1351 ClearScissorInit(renderArea_);
1352 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1353 SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1354 }
1355 {
1356 // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1357 // possible. (ie. all buffers at once)
1358 renderingToDefaultFbo_ = false;
1359 if (sb.colorAttachmentCount > 0) {
1360 // collect color attachment infos..
1361 const RenderPassDesc::AttachmentDesc*
1362 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1363 for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1364 uint32_t index = sb.colorAttachmentIndices[ci];
1365 if (resolveToBackbuffer_[index]) {
1366 // NOTE: this could fail with multiple color attachments....
1367 renderingToDefaultFbo_ = true;
1368 }
1369 if (!attachmentCleared_[index]) {
1370 attachmentCleared_[index] = true;
1371 colorAttachments[ci] = &rpd.attachments[index];
1372 } else {
1373 colorAttachments[ci] = nullptr;
1374 }
1375 }
1376 HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1377 }
1378 if (sb.depthAttachmentCount) {
1379 if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1380 attachmentCleared_[sb.depthAttachmentIndex] = true;
1381 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1382 }
1383 }
1384 }
1385 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1386 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1387 SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1388 }
1389 ClearScissorReset();
1390 }
1391
ScanPasses(const RenderPassDesc & rpd)1392 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1393 {
1394 for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1395 const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1396 for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1397 uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1398 if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1399 attachmentFirstUse_[resolveTo] = sub;
1400 }
1401 attachmentLastUse_[resolveTo] = sub;
1402 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1403 if ((p.image == 0) && (p.renderBuffer == 0)) {
1404 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1405 // backbuffer...
1406 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1407 resolveToBackbuffer_[resolveFrom] = true;
1408 }
1409 }
1410 for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1411 uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1412 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1413 attachmentFirstUse_[index] = sub;
1414 }
1415 attachmentLastUse_[index] = sub;
1416 }
1417 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1418 uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1419 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1420 attachmentFirstUse_[index] = sub;
1421 }
1422 attachmentLastUse_[index] = sub;
1423 if (attachmentImage_[index]) {
1424 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1425 if ((p.image == 0) && (p.renderBuffer == 0)) {
1426 resolveToBackbuffer_[index] = true;
1427 }
1428 }
1429 }
1430 if (currentSubPass.depthAttachmentCount > 0) {
1431 uint32_t index = currentSubPass.depthAttachmentIndex;
1432 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1433 attachmentFirstUse_[index] = sub;
1434 }
1435 attachmentLastUse_[index] = sub;
1436 }
1437 }
1438 }
1439
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1440 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1441 {
1442 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1443 const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1444 switch (renderCmd.beginType) {
1445 case RenderPassBeginType::RENDER_PASS_BEGIN: {
1446 ++inRenderpass_;
1447 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1448 activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1449
1450 const auto& rpd = activeRenderPass_.renderPassDesc;
1451 renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1452 auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1453 if (multisampledRenderToTexture_) {
1454 cpm.FilterRenderPass(activeRenderPass_);
1455 }
1456 currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1457 if (currentFrameBuffer_ == nullptr) {
1458 // Completely invalid state in backend.
1459 commandListValid_ = false;
1460 --inRenderpass_;
1461 return;
1462 }
1463 PLUGIN_ASSERT_MSG(
1464 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1465 currentSubPass_ = 0;
1466 // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1467 for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1468 attachmentCleared_[i] = false;
1469 attachmentFirstUse_[i] = 0xFFFFFFFF;
1470 attachmentLastUse_[i] = 0;
1471 resolveToBackbuffer_[i] = false;
1472 attachmentImage_[i] =
1473 static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1474 }
1475 ScanPasses(rpd);
1476 DoSubPass(0);
1477 #if (RENDER_PERF_ENABLED == 1)
1478 ++perfCounters_.renderPassCount;
1479 #endif
1480 } break;
1481
1482 case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1483 ++currentSubPass_;
1484 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1485 DoSubPass(activeRenderPass_.subpassStartIndex);
1486 } break;
1487
1488 default:
1489 break;
1490 }
1491 }
1492
RenderCommandNextSubpass(const RenderCommandWithType & ref)1493 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1494 {
1495 PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1496 const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1497 PLUGIN_UNUSED(renderCmd);
1498 PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1499 ++currentSubPass_;
1500 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1501 DoSubPass(currentSubPass_);
1502 }
1503
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1504 int32_t RenderBackendGLES::InvalidateDepthStencil(
1505 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1506 {
1507 int32_t depthCount = 0;
1508 if (currentSubPass.depthAttachmentCount > 0) {
1509 const uint32_t index = currentSubPass.depthAttachmentIndex;
1510 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1511 const auto& image = attachmentImage_[index];
1512 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1513 // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1514 if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1515 bool depth = false;
1516 bool stencil = false;
1517 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1518 if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1519 depth = true;
1520 }
1521 }
1522 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1523 if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1524 stencil = true;
1525 }
1526 }
1527 if (depth && stencil) {
1528 invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1529 depthCount++;
1530 } else if (stencil) {
1531 invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1532 depthCount++;
1533 } else if (depth) {
1534 invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1535 depthCount++;
1536 }
1537 }
1538 }
1539 }
1540 return depthCount;
1541 }
1542
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1543 int32_t RenderBackendGLES::InvalidateColor(
1544 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1545 {
1546 int32_t colorCount = 0;
1547 // see which parts of the fbo can be invalidated...
1548 // collect color attachment infos..
1549 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1550 const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1551 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1552 if (const auto* image = attachmentImage_[index]) {
1553 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1554 if (dplat.image || dplat.renderBuffer) {
1555 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1556 invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1557 colorCount++;
1558 }
1559 }
1560 }
1561 }
1562 }
1563 return colorCount;
1564 }
1565
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1566 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1567 {
1568 const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1569 ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1570 if (mask) {
1571 // Resolve MSAA buffers.
1572 // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1573 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1574 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1575 if (scissorEnabled_) {
1576 glDisable(GL_SCISSOR_TEST);
1577 scissorEnabled_ = false;
1578 }
1579 // FLIP_RESOLVE_DEFAULT_FBO not needed, since we render flipped if end result will be resolved to fbo..
1580 // hopefully it works now.
1581 #if defined(FLIP_RESOLVE_DEFAULT_FBO) && FLIP_RESOLVE_DEFAULT_FBO
1582 if (currentFrameBuffer_->resolveFbo[currentSubPass_] == 0) {
1583 // flip if resolving to default fbo. (NOTE: sample count of destination must be zero or equal to source)
1584 // and in mali devices src and dst rects MUST be equal. (which is not according to spec)
1585 // IE. can't flip and resolve at the same time on MALI based devices.
1586 // NEED A FIX HERE!
1587 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1588 static_cast<GLint>(currentFrameBuffer_->height), 0, static_cast<GLint>(currentFrameBuffer_->height),
1589 static_cast<GLint>(currentFrameBuffer_->width), 0, mask, GL_NEAREST);
1590 return GL_READ_FRAMEBUFFER;
1591 }
1592 #endif
1593 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1594 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1595 static_cast<GLint>(currentFrameBuffer_->height), mask,
1596 GL_NEAREST); // no flip
1597 return GL_READ_FRAMEBUFFER;
1598 }
1599 return GL_FRAMEBUFFER;
1600 }
1601
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1602 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1603 {
1604 PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1605 const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1606 if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1607 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1608 inRenderpass_--;
1609 }
1610 if (currentFrameBuffer_ == nullptr) {
1611 // Completely invalid state in backend.
1612 return;
1613 }
1614 const auto& rpd = activeRenderPass_.renderPassDesc;
1615 const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1616
1617 // Resolve MSAA
1618 const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1619
1620 // Finally invalidate color and depth..
1621 GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1622 int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1623 invalidateCount += InvalidateDepthStencil(
1624 array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1625
1626 // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1627 Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1628
1629 if (inRenderpass_ == 0) {
1630 currentFrameBuffer_ = nullptr;
1631 }
1632 }
1633
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1634 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1635 {
1636 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1637 const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1638 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1639 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1640 if (!boundGraphicsPipeline_) {
1641 return;
1642 }
1643 vertexAttribBinds_ = renderCmd.vertexBufferCount;
1644 for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1645 const auto& currVb = renderCmd.vertexBuffers[i];
1646 if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1647 const auto& plat = gpuBuffer->GetPlatformData();
1648 uintptr_t offset = currVb.bufferOffset;
1649 offset += plat.currentByteOffset;
1650 vertexAttribBindSlots_[i].id = plat.buffer;
1651 vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1652 } else {
1653 vertexAttribBindSlots_[i].id = 0;
1654 vertexAttribBindSlots_[i].offset = 0;
1655 }
1656 }
1657 }
1658
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1659 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1660 {
1661 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1662 const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1663 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1664 gpuBuffer) {
1665 const auto& plat = gpuBuffer->GetPlatformData();
1666 boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1667 boundIndexBuffer_.offset += plat.currentByteOffset;
1668 boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1669 boundIndexBuffer_.id = plat.buffer;
1670 }
1671 }
1672
RenderCommandBlitImage(const RenderCommandWithType & ref)1673 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1674 {
1675 PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1676 const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1677 const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1678 const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1679 if ((srcImage == nullptr) || (dstImage == nullptr)) {
1680 return;
1681 }
1682 const auto& srcDesc = srcImage->GetDesc();
1683 const auto& srcPlat = srcImage->GetPlatformData();
1684 const auto& dstDesc = dstImage->GetDesc();
1685 const auto& dstPlat = dstImage->GetPlatformData();
1686 const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1687 const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1688 const auto& src = renderCmd.imageBlit.srcSubresource;
1689 const auto& dst = renderCmd.imageBlit.dstSubresource;
1690 const GLint srcMipLevel = static_cast<GLint>(src.mipLevel);
1691 const GLint dstMipLevel = static_cast<GLint>(dst.mipLevel);
1692 const uint32_t srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1693 const uint32_t dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1694 PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1695 PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1696 glDisable(GL_SCISSOR_TEST);
1697 scissorEnabled_ = false;
1698 // NOTE: LAYERS! (texture arrays)
1699 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1700 device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1701 for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1702 const GLenum srcType = getTarget(srcPlat.type, layer, srcSampleCount);
1703 const GLenum dstType = getTarget(dstPlat.type, layer, dstSampleCount);
1704 // glFramebufferTextureLayer for array textures....
1705 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1706 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1707 DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1708 { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1709 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1710 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1711 }
1712 }
1713
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1714 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1715 {
1716 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1717 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1718 const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1719 const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1720 if (srcGpuBuffer && dstGpuBuffer) {
1721 const auto& srcData = srcGpuBuffer->GetPlatformData();
1722 const auto& dstData = dstGpuBuffer->GetPlatformData();
1723 const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1724 const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1725 device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1726 device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1727 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1728 static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1729 static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1730 device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1731 device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1732 }
1733 }
1734
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1735 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1736 {
1737 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1738 // use the workaround only for gles backend on windows. (pvr simulator bug)
1739 constexpr const bool usePixelUnpackBuffer = false;
1740 #else
1741 // expect this to work, and the nvidia bug to be fixed.
1742 constexpr const bool usePixelUnpackBuffer = true;
1743 #endif
1744 auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1745 auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1746 if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1747 return;
1748 }
1749 const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1750 if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1751 BlitCube(device_, info);
1752 } else if (info.iPlat.type == GL_TEXTURE_2D) {
1753 Blit2D(device_, info);
1754 } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1755 BlitArray(device_, info);
1756 } else if (info.iPlat.type == GL_TEXTURE_3D) {
1757 Blit3D(device_, info);
1758 #if RENDER_HAS_GLES_BACKEND
1759 } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1760 PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1761 #endif
1762 } else {
1763 PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1764 }
1765 FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1766 }
1767
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1768 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1769 {
1770 const auto& bc = renderCmd.bufferImageCopy;
1771 const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1772 const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1773 PLUGIN_ASSERT(srcGpuImage);
1774 PLUGIN_ASSERT(dstGpuBuffer);
1775 if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1776 return;
1777 }
1778 const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1779 const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1780 if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1781 PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1782 return;
1783 }
1784 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1785 PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1786 GLenum type = GL_TEXTURE_2D;
1787 if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1788 type = getCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1789 }
1790 // glFramebufferTextureLayer for array textures....
1791 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1792 static_cast<GLint>(bc.imageSubresource.mipLevel));
1793 const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1794 const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1795 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1796 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1797 glPixelStorei(GL_PACK_ALIGNMENT, 1);
1798 uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1799 glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1800 static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1801 static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1802 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1803 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1804 }
1805
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1806 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1807 {
1808 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1809 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1810 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1811 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1812 BufferToImageCopy(renderCmd);
1813 } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1814 ImageToBufferCopy(renderCmd);
1815 }
1816 }
1817
RenderCommandCopyImage(const RenderCommandWithType & ref)1818 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1819 {
1820 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1821 const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1822 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1823 const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1824 const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1825 if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1826 return;
1827 }
1828 const auto& srcDesc = srcGpuImage->GetDesc();
1829 const auto& dstDesc = dstGpuImage->GetDesc();
1830 #if RENDER_VALIDATION_ENABLED
1831 ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1832 #endif
1833 const auto srcMipLevel =
1834 static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1835 const auto dstMipLevel =
1836 static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1837
1838 auto sOffset = renderCmd.imageCopy.srcOffset;
1839 auto dOffset = renderCmd.imageCopy.dstOffset;
1840 auto size = renderCmd.imageCopy.extent;
1841
1842 // clamp negative offsets to zero and adjust extent and other offset accordingly
1843 ClampOffset(sOffset, dOffset, size);
1844 ClampOffset(dOffset, sOffset, size);
1845
1846 // clamp size to fit src and dst
1847 ClampSize(sOffset, srcDesc, size);
1848 ClampSize(dOffset, dstDesc, size);
1849
1850 const auto& srcPlatData = srcGpuImage->GetPlatformData();
1851 const auto& dstPlatData = dstGpuImage->GetPlatformData();
1852 glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1853 dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1854 static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1855 }
1856
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1857 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1858 {
1859 PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1860 const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1861 const auto& rbList = *managers_.rbList;
1862 // NOTE: proper flagging of barriers.
1863 if (const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1864 rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1865 barrierPointBarriers) {
1866 const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1867 const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1868 GLbitfield barriers = 0;
1869 GLbitfield barriersByRegion = 0;
1870 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1871 if (nextBarrierList == nullptr) {
1872 // cannot be null, just a safety
1873 PLUGIN_ASSERT(false);
1874 return;
1875 }
1876 const auto& barrierListRef = *nextBarrierList;
1877 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1878 const uint32_t barrierCount = barrierListRef.count;
1879 // helper which covers barriers supported by Barrier and BarrierByRegion
1880 auto commonBarrierBits = [](AccessFlags accessFlags, RenderHandleType resourceType) -> GLbitfield {
1881 GLbitfield barriers = 0;
1882 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
1883 barriers |= GL_UNIFORM_BARRIER_BIT;
1884 }
1885 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
1886 // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
1887 if (resourceType == RenderHandleType::GPU_IMAGE) {
1888 barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1889 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1890 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1891 } else {
1892 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT |
1893 GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1894 }
1895 }
1896 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
1897 if (resourceType == RenderHandleType::GPU_IMAGE) {
1898 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1899 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1900 barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
1901 } else {
1902 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1903 }
1904 }
1905 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1906 CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
1907 barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
1908 }
1909 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
1910 return barriers;
1911 };
1912 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1913 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1914
1915 // check if written by previous shader as an attachment or storage/ image buffer
1916 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1917 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1918 const auto resourceHandle = barrier.resourceHandle;
1919 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1920
1921 // barrier by region is between fragment shaders and supports a subset of barriers.
1922 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1923 (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1924 barriersByRegion |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1925 } else {
1926 // check the barriers shared with ByRegion
1927 barriers |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1928
1929 // the rest are invalid for ByRegion
1930 if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1931 barriers |= GL_COMMAND_BARRIER_BIT;
1932 }
1933 if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1934 barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1935 }
1936 if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1937 barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1938 }
1939 // which are the correct accessFlags?
1940 // GL_PIXEL_BUFFER_BARRIER_BIT:
1941 // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1942 // glReadPixels, glTexSubImage1D, etc.)
1943 // GL_TEXTURE_UPDATE_BARRIER_BIT:
1944 // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
1945 // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
1946 // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
1947 // mapped
1948 // by glMapBuffer or glMapBufferRange
1949 // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
1950 // CORE_ACCESS_MEMORY_WRITE_BIT?
1951 if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
1952 CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
1953 if (handleType == RenderHandleType::GPU_IMAGE) {
1954 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
1955 } else if (handleType == RenderHandleType::GPU_BUFFER) {
1956 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
1957 }
1958 }
1959 // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
1960 }
1961 }
1962 }
1963 }
1964 if (barriers) {
1965 glMemoryBarrier(barriers);
1966 }
1967 if (barriersByRegion) {
1968 // only for fragment-fragment
1969 glMemoryBarrierByRegion(barriersByRegion);
1970 }
1971 }
1972 }
1973
SetupBind(const DescriptorSetLayoutBinding & binding,vector<Gles::Bind> & resources)1974 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
1975 {
1976 PLUGIN_ASSERT(binding.binding < resources.size());
1977 auto& obj = resources[binding.binding];
1978 PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
1979 PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
1980 return obj;
1981 }
1982
BindSampler(const BindableSampler & res,Gles::Bind & obj,uint32_t index)1983 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
1984 {
1985 const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
1986 if (gpuSampler) {
1987 const auto& plat = gpuSampler->GetPlatformData();
1988 obj.resources[index].sampler.samplerId = plat.sampler;
1989 } else {
1990 obj.resources[index].sampler.samplerId = 0;
1991 }
1992 }
1993
BindImage(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)1994 void RenderBackendGLES::BindImage(
1995 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
1996 {
1997 const AccessFlags accessFlags = resState.accessFlags;
1998 auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
1999 auto& ref = obj.resources[index];
2000 ref.image.image = gpuImage;
2001 const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2002 const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2003 if (read && write) {
2004 ref.image.mode = GL_READ_WRITE;
2005 } else if (read) {
2006 ref.image.mode = GL_READ_ONLY;
2007 } else if (write) {
2008 ref.image.mode = GL_WRITE_ONLY;
2009 } else {
2010 // no read and no write?
2011 ref.image.mode = GL_READ_WRITE;
2012 }
2013 ref.image.mipLevel = res.mip;
2014 }
2015
BindImageSampler(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2016 void RenderBackendGLES::BindImageSampler(
2017 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2018 {
2019 BindImage(res, resState, obj, index);
2020 BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2021 }
2022
BindBuffer(const BindableBuffer & res,Gles::Bind & obj,uint32_t dynamicOffset,uint32_t index)2023 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2024 {
2025 const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2026 if (gpuBuffer) {
2027 const auto& plat = gpuBuffer->GetPlatformData();
2028 const uint32_t baseOffset = res.byteOffset;
2029 obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2030 obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2031 obj.resources[index].buffer.bufferId = plat.buffer;
2032 } else {
2033 obj.resources[index].buffer.offset = 0;
2034 obj.resources[index].buffer.size = 0;
2035 obj.resources[index].buffer.bufferId = 0;
2036 }
2037 }
2038
ProcessBindings(const struct RenderCommandBindDescriptorSets & renderCmd,const DescriptorSetLayoutBindingResources & data,uint32_t set)2039 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2040 const DescriptorSetLayoutBindingResources& data, uint32_t set)
2041 {
2042 BindState& bind = boundObjects_[set];
2043 vector<Gles::Bind>& resources = bind.resources;
2044 #if RENDER_HAS_GLES_BACKEND
2045 bind.oesBinds.clear();
2046 #endif
2047 const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2048 const auto& buffers = data.buffers;
2049 const auto& images = data.images;
2050 const auto& samplers = data.samplers;
2051 uint32_t currDynamic = 0U;
2052 for (const auto& res : data.bindings) {
2053 auto& obj = SetupBind(res.binding, resources);
2054 #if RENDER_HAS_GLES_BACKEND
2055 bool hasOes = false;
2056 #endif
2057 auto GetArrayOffset = [](const auto& data, const auto& res) {
2058 const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
2059 if (type == RenderHandleType::GPU_BUFFER) {
2060 return data.buffers[res.resourceIndex].arrayOffset;
2061 } else if (type == RenderHandleType::GPU_IMAGE) {
2062 return data.images[res.resourceIndex].arrayOffset;
2063 } else if (type == RenderHandleType::GPU_SAMPLER) {
2064 return data.samplers[res.resourceIndex].arrayOffset;
2065 }
2066 return 0u;
2067 };
2068 const bool hasArrOffset = (res.binding.descriptorCount > 1);
2069 const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2070 for (uint8_t index = 0; index < res.binding.descriptorCount; index++) {
2071 const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2072 GpuImageGLES* image = nullptr;
2073 switch (res.binding.descriptorType) {
2074 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2075 const auto& bRes = samplers[resIdx];
2076 BindSampler(bRes.resource, obj, index);
2077 break;
2078 }
2079 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2080 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2081 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2082 const auto& bRes = images[resIdx];
2083 BindImage(bRes.resource, bRes.state, obj, index);
2084 image = obj.resources[index].image.image;
2085 break;
2086 }
2087 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2088 const auto& bRes = images[resIdx];
2089 BindImageSampler(bRes.resource, bRes.state, obj, index);
2090 image = obj.resources[index].image.image;
2091 break;
2092 }
2093 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2094 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2095 const auto& bRes = buffers[resIdx];
2096 uint32_t dynamicOffset = 0;
2097 if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2098 dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2099 currDynamic++;
2100 }
2101 BindBuffer(bRes.resource, obj, dynamicOffset, index);
2102 break;
2103 }
2104 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2105 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2106 const auto& bRes = buffers[resIdx];
2107 BindBuffer(bRes.resource, obj, 0, index);
2108 break;
2109 }
2110 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2111 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2112 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2113 default:
2114 PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2115 break;
2116 }
2117 #if RENDER_HAS_GLES_BACKEND
2118 if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2119 hasOes = true;
2120 }
2121 #endif
2122 }
2123 #if RENDER_HAS_GLES_BACKEND
2124 if (hasOes) {
2125 bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2126 }
2127 #endif
2128 }
2129 }
2130
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2131 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2132 {
2133 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2134 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2135 return;
2136 }
2137 const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2138 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2139
2140 const auto& aNcdsm = *managers_.descriptorSetMgr;
2141 for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2142 PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2143 const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2144 PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2145 const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2146 boundObjects_[idx].dirty = true; // mark the set as "changed"
2147 ProcessBindings(renderCmd, data, idx);
2148 // (note, nothing actually gets bound yet.. just the bind cache is updated)
2149 }
2150 }
2151
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2152 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2153 {
2154 const GLint location = static_cast<GLint>(pc.location);
2155 // the consts list has been filtered and cleared of unused uniforms.
2156 PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2157 GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2158 switch (pc.type) {
2159 case GL_UNSIGNED_INT: {
2160 glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2161 break;
2162 }
2163 case GL_FLOAT: {
2164 glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2165 break;
2166 }
2167 case GL_FLOAT_VEC2: {
2168 glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2169 break;
2170 }
2171 case GL_FLOAT_VEC4: {
2172 glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2173 break;
2174 }
2175 case GL_FLOAT_MAT4: {
2176 glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2177 break;
2178 }
2179 case GL_UNSIGNED_INT_VEC4: {
2180 glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2181 break;
2182 }
2183 default:
2184 PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2185 }
2186 }
2187
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2188 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2189 {
2190 if (boundProgram_.setPushConstants) {
2191 boundProgram_.setPushConstants = false;
2192 const auto& renderCmd = boundProgram_.pushConstants;
2193 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2194 PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2195 PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2196 if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2197 return;
2198 // ASSERT: expecting data is valid
2199 // NOTE: handle rest of the types
2200 for (const auto& pc : consts) {
2201 const size_t offs = pc.offset;
2202 if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2203 PLUGIN_LOG_E(
2204 "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2205 pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2206 continue;
2207 }
2208 /*
2209 NOTE: handle the strides....
2210 consts[i].array_stride;
2211 consts[i].matrix_stride; */
2212 SetPushConstant(program, pc, &renderCmd.data[offs]);
2213 }
2214 }
2215 }
2216
RenderCommandPushConstant(const RenderCommandWithType & ref)2217 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2218 {
2219 PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2220 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2221 return;
2222 }
2223 const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2224 if (renderCmd.pushConstant.byteSize > 0) {
2225 PLUGIN_ASSERT(renderCmd.data);
2226 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2227 boundProgram_.setPushConstants = true;
2228 boundProgram_.pushConstants = renderCmd;
2229 }
2230 }
2231
RenderCommandClearColorImage(const RenderCommandWithType & ref)2232 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2233 {
2234 PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2235 #if RENDER_HAS_GLES_BACKEND
2236 #if (RENDER_VALIDATION_ENABLED == 1)
2237 PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2238 "Render command clear color image not support with GLES. One should implement higher level path for "
2239 "clearing.");
2240 #endif
2241 #else
2242 const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2243
2244 const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2245 if (imagePtr) {
2246 const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2247 // NOTE: mip levels and array layers should be handled separately
2248 for (const auto& subresRef : renderCmd.ranges) {
2249 glClearTexImage(platImage.image, // texture
2250 (int32_t)subresRef.baseMipLevel, // level
2251 platImage.format, // format
2252 platImage.dataType, // type
2253 &renderCmd.color); // data
2254 }
2255 }
2256 #endif
2257 }
2258
2259 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2260 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2261 {
2262 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2263 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2264 const ViewportDesc& vd = renderCmd.viewportDesc;
2265 SetViewport(renderArea_, vd);
2266 }
2267
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2268 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2269 {
2270 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2271 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2272 const ScissorDesc& sd = renderCmd.scissorDesc;
2273 SetScissor(renderArea_, sd);
2274 }
2275
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2276 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2277 {
2278 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2279 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2280 if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2281 cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2282 glLineWidth(renderCmd.lineWidth);
2283 }
2284 }
2285
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2286 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2287 {
2288 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2289 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2290 }
2291
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2292 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2293 {
2294 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2295 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2296 }
2297
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2298 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2299 {
2300 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2301 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2302 }
2303
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2304 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2305 const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2306 {
2307 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2308 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2309 const uint32_t FUNCMASK =
2310 (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2311 if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2312 cFront.writeMask = front.writeMask;
2313 glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2314 }
2315 if (frontFlags & FUNCMASK) {
2316 SetStencilCompareOp(cFront, front);
2317 glStencilFuncSeparate(
2318 GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2319 }
2320 if (frontFlags & StencilSetFlags::SETOP) {
2321 SetStencilOp(cFront, front);
2322 glStencilOpSeparate(
2323 GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2324 }
2325 if (backFlags & StencilSetFlags::SETWRITEMASK) {
2326 cBack.writeMask = back.writeMask;
2327 glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2328 }
2329 if (backFlags & FUNCMASK) {
2330 SetStencilCompareOp(cBack, back);
2331 glStencilFuncSeparate(
2332 GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2333 }
2334 if (backFlags & StencilSetFlags::SETOP) {
2335 SetStencilOp(cBack, back);
2336 glStencilOpSeparate(
2337 GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2338 }
2339 }
2340
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2341 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2342 {
2343 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2344 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2345 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2346 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2347 uint32_t setFront = 0;
2348 uint32_t setBack = 0;
2349 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2350 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2351 if (renderCmd.mask != cFront.compareMask) {
2352 cFront.compareMask = renderCmd.mask;
2353 setFront |= StencilSetFlags::SETCOMPAREMASK;
2354 }
2355 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2356 if (renderCmd.mask != cFront.writeMask) {
2357 cFront.writeMask = renderCmd.mask;
2358 setFront |= StencilSetFlags::SETWRITEMASK;
2359 }
2360 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2361 if (renderCmd.mask != cFront.reference) {
2362 cFront.reference = renderCmd.mask;
2363 setFront |= StencilSetFlags::SETREFERENCE;
2364 }
2365 }
2366 }
2367 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2368 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2369 if (renderCmd.mask != cBack.compareMask) {
2370 cBack.compareMask = renderCmd.mask;
2371 setBack |= StencilSetFlags::SETCOMPAREMASK;
2372 }
2373 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2374 if (renderCmd.mask != cBack.writeMask) {
2375 cBack.writeMask = renderCmd.mask;
2376 setBack |= StencilSetFlags::SETWRITEMASK;
2377 }
2378 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2379 if (renderCmd.mask != cBack.reference) {
2380 cBack.reference = renderCmd.mask;
2381 setBack |= StencilSetFlags::SETREFERENCE;
2382 }
2383 }
2384 }
2385 SetStencilState(setFront, cFront, setBack, cBack);
2386 }
2387
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2388 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2389 {
2390 #if (RENDER_VALIDATION_ENABLED == 1)
2391 PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2392 "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2393 #endif
2394 }
2395
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2396 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2397 {
2398 PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2399 }
2400
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2401 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2402 {
2403 PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2404 }
2405
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2406 void RenderBackendGLES::BindVertexInputs(
2407 const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2408 {
2409 // update bindings for the VAO.
2410 // process with attribute descriptions to only bind the needed vertex buffers
2411 // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2412 // but we only bind the ones needed for the shader
2413 const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2414 for (uint32_t i = 0; i < minBinding; ++i) {
2415 const auto& attributeRef = decldata.attributeDescriptions[i];
2416 const uint32_t location = attributeRef.location;
2417 const uint32_t binding = attributeRef.binding;
2418 // NOTE: we need to bind all the buffers to the correct bindings.
2419 // shader optimized check (vertexInputs, some locations are not in use)
2420 if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2421 const auto& slot = vertexAttribBindSlots_[binding];
2422 const auto& bindingRef = decldata.bindingDescriptions[binding];
2423 PLUGIN_ASSERT(bindingRef.binding == binding);
2424 // buffer bound to slot, and it's used by the shader.
2425 device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2426 /*
2427 core/vulkan
2428 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_VERTEX (0) attribute index advances per vertex
2429 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_INSTANCE (1) attribute index advances per instance
2430
2431 gl/gles
2432 If divisor is 0, the attributes using the buffer bound to bindingindex advance once per vertex.
2433 If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2434 rendered.
2435
2436 so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2437 ie. enum happens to match and can simply cast.
2438 */
2439 static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2440 device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2441 }
2442 }
2443 }
2444
BindResources()2445 void RenderBackendGLES::BindResources()
2446 {
2447 #if RENDER_HAS_GLES_BACKEND
2448 // scan all sets here to see if any of the sets has oes.
2449 // we don't actually need to rebuild this info every time.
2450 // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2451 // data there)
2452 oesBinds_.clear();
2453 for (const auto& state : boundObjects_) {
2454 const auto& oes = state.oesBinds;
2455 if (!oes.empty()) {
2456 oesBinds_.insert(oesBinds_.end(), oes.begin(), oes.end());
2457 }
2458 }
2459 #endif
2460 const array_view<Binder>* resourceList = nullptr;
2461 const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2462 int32_t flipLocation = Gles::INVALID_LOCATION;
2463 uint32_t program = 0;
2464 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2465 if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2466 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2467 PLUGIN_ASSERT(boundGraphicsPipeline_);
2468 if (!boundGraphicsPipeline_) {
2469 return;
2470 }
2471 array_view<const int32_t> vertexInputs;
2472 const auto& pipelineData =
2473 static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2474 const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2475 #if RENDER_HAS_GLES_BACKEND
2476 if (!oesBinds_.empty()) {
2477 // okay, oes vector contains the set/bind to which an OES texture is bounds
2478 // ask for a compatible program from the boundGraphicsPipeline_
2479 shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2480 }
2481 #endif
2482 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2483 program = sd.program;
2484 vertexInputs = { sd.inputs, countof(sd.inputs) };
2485 FlushViewportScissors();
2486 if (!scissorEnabled_) {
2487 scissorEnabled_ = true;
2488 glEnable(GL_SCISSOR_TEST); // Always enabled
2489 }
2490 #if (RENDER_PERF_ENABLED == 1)
2491 if (device_.BoundProgram() != program) {
2492 ++perfCounters_.bindProgram;
2493 }
2494 #endif
2495 device_.UseProgram(program);
2496 device_.BindVertexArray(pipelineData.vao);
2497 BindVertexInputs(pipelineData.vertexInputDeclaration, vertexInputs);
2498 device_.BindElementBuffer(boundIndexBuffer_.id);
2499 resourceList = &sd.resourceList;
2500 flipLocation = sd.flipLocation;
2501 pushConstants = &sd.pushConstants;
2502 } else {
2503 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2504 PLUGIN_ASSERT(boundComputePipeline_);
2505 if (!boundComputePipeline_) {
2506 return;
2507 }
2508 const auto& pipelineData =
2509 static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2510 if (pipelineData.computeShader) {
2511 const auto& sd =
2512 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2513 program = sd.program;
2514 #if (RENDER_PERF_ENABLED == 1)
2515 if (device_.BoundProgram() != program) {
2516 ++perfCounters_.bindProgram;
2517 }
2518 #endif
2519 device_.UseProgram(program);
2520 resourceList = &sd.resourceList;
2521 flipLocation = sd.flipLocation;
2522 pushConstants = &sd.pushConstants;
2523 }
2524 }
2525
2526 SetPushConstants(program, *pushConstants);
2527 if (flipLocation != Gles::INVALID_LOCATION) {
2528 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2529 glProgramUniform1fv(program, flipLocation, 1, &flip);
2530 }
2531
2532 for (const auto& r : *resourceList) {
2533 PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2534 if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2535 continue;
2536 }
2537 const auto& res = boundObjects_[r.set].resources[r.bind];
2538 PLUGIN_ASSERT(res.resources.size() == r.id.size());
2539 auto resType = res.descriptorType;
2540 if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2541 resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2542 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2543 resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2544 }
2545
2546 // a few helpers for updating perf counters and binding the sampler/texture/buffer
2547 auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2548 #if (RENDER_PERF_ENABLED == 1)
2549 if (device_.BoundSampler(textureUnit) != samplerId) {
2550 ++perfCounters_.bindSampler;
2551 }
2552 #endif
2553 device_.BindSampler(textureUnit, samplerId);
2554 };
2555 auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2556 #if (RENDER_PERF_ENABLED == 1)
2557 if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2558 ++perfCounters_.bindTexture;
2559 }
2560 #endif
2561 device_.BindTexture(textureUnit, dplat.type, dplat.image);
2562 };
2563 auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2564 const GpuImagePlatformDataGL& dplat) {
2565 uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2566 device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2567 };
2568 auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2569 #if (RENDER_PERF_ENABLED == 1)
2570 if (device_.BoundBuffer(target) != buffer.bufferId) {
2571 ++perfCounters_.bindBuffer;
2572 }
2573 #endif
2574 device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2575 };
2576 auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2577 // either force the defined mip level or use defaults.
2578 glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2579 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2580 glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2581 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 1000U));
2582 };
2583
2584 #if (RENDER_VALIDATION_ENABLED == 1)
2585 if (resType != r.type) {
2586 PLUGIN_LOG_ONCE_E(
2587 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2588 }
2589 #endif
2590
2591 for (uint32_t index = 0; index < res.resources.size(); index++) {
2592 const auto& obj = res.resources[index];
2593 for (const auto& id : r.id[index]) {
2594 const auto binding = index + id;
2595 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2596 bindSampler(binding, obj.sampler.samplerId);
2597 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2598 (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2599 (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2600 if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2601 bindSampler(binding, obj.sampler.samplerId);
2602 } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2603 bindSampler(binding, 0U);
2604 }
2605 if (obj.image.image) {
2606 auto& dplat = obj.image.image->GetPlatformData();
2607 bindTexture(binding, dplat);
2608
2609 // NOTE: the last setting is active, can not have different miplevels bound from single
2610 // resource.
2611 // Check and update (if needed) the forced miplevel.
2612 if (dplat.mipLevel != obj.image.mipLevel) {
2613 // NOTE: we are actually modifying the texture object bound above
2614 const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2615 setMipLevel(dplat.type, dplat.mipLevel);
2616 }
2617 }
2618 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2619 if (obj.image.image) {
2620 auto& dplat = obj.image.image->GetPlatformData();
2621 bindTextureImage(binding, obj.image, dplat);
2622 }
2623 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2624 bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2625 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2626 bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2627 }
2628 }
2629 }
2630 }
2631 // mark all bound.
2632 for (auto& b : boundObjects_) {
2633 b.dirty = false;
2634 }
2635 }
2636
2637 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2638 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2639 {
2640 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2641 const string_view& debugName = renderCommandContext.debugName;
2642 if (timers_.count(debugName) == 0) { // new timers
2643 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2644 PerfDataSet& perfDataSet = timers_[debugName];
2645 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2646 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2647 perfDataSet.counter = 0u;
2648 #else
2649 timers_.insert({ debugName, {} });
2650 #endif
2651 }
2652 }
2653 }
2654
EndFrameTimers()2655 void RenderBackendGLES::EndFrameTimers()
2656 {
2657 int64_t fullGpuTime = 0;
2658 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2659 // already in micros
2660 fullGpuTime = fullGpuCounter_;
2661 fullGpuCounter_ = 0;
2662 #endif
2663 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2664 CORE_NS::GetInstance<CORE_NS ::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2665 globalPerfData) {
2666 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2667 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2668 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2669 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2670 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2671 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2672 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2673 }
2674 }
2675
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2676 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2677 {
2678 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2679 int64_t gpuMicroSeconds = 0;
2680 if (validGpuQueries_) {
2681 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2682 PLUGIN_ASSERT(gpuQuery);
2683
2684 gpuQuery->NextQueryIndex();
2685
2686 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2687 PLUGIN_ASSERT(platData.queryObject);
2688
2689 GLint disjointOccurred = 0;
2690 #ifdef GL_GPU_DISJOINT_EXT
2691 // Clear disjoint error
2692 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2693 #endif
2694 if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2695 GLuint64 gpuNanoSeconds = 0U;
2696 #ifdef GL_GPU_DISJOINT_EXT
2697 glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2698 #else
2699 glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2700 #endif
2701 static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2702 gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2703 if (gpuMicroSeconds > UINT32_MAX) {
2704 gpuMicroSeconds = 0;
2705 }
2706 fullGpuCounter_ += gpuMicroSeconds;
2707 } else if (disjointOccurred) {
2708 PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2709 }
2710 }
2711 #endif
2712 const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2713
2714 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2715 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2716 globalPerfData) {
2717 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2718
2719 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2720 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2721 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2722 #endif
2723 perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2724 perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2725 perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2726 perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2727 perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2728 perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2729 perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2730 perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2731 perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2732 perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2733 perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2734 }
2735 }
2736 #endif
2737
PrimeDepthStencilState(const GraphicsState & graphicsState)2738 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2739 {
2740 auto& cDepth = cacheState_.depthStencilState;
2741 cDepth = graphicsState.depthStencilState;
2742 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2743 SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2744 SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2745 glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2746 glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2747 const uint32_t updateAllFlags =
2748 (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2749 StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2750 SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2751 }
2752
PrimeBlendState(const GraphicsState & graphicsState)2753 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2754 {
2755 auto& cBlend = cacheState_.colorBlendState;
2756 cBlend = graphicsState.colorBlendState;
2757 glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2758 cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2759 GLuint maxColorAttachments;
2760 glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2761 maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2762 for (GLuint i = 0; i < maxColorAttachments; i++) {
2763 const auto& cBlendState = cBlend.colorAttachments[i];
2764 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2765 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2766 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2767 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2768 if (cBlendState.enableBlend) {
2769 glEnablei(GL_BLEND, i);
2770 } else {
2771 glDisablei(GL_BLEND, i);
2772 }
2773 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2774 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2775 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2776 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2777 }
2778 // logicops are unsupported on GLES
2779 }
2780
PrimeCache(const GraphicsState & graphicsState)2781 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2782 {
2783 if (cachePrimed_) {
2784 return;
2785 }
2786 cachePrimed_ = true;
2787 /// GRAPHICSSTATE inputAssembly
2788 const auto& ia = graphicsState.inputAssembly;
2789 auto& cia = cacheState_.inputAssembly;
2790 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2791 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2792 topology_ = ia.primitiveTopology;
2793 /// GRAPHICSSTATE rasterizationState
2794 const auto& rs = graphicsState.rasterizationState;
2795 auto& crs = cacheState_.rasterizationState;
2796 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2797 polygonMode_ = rs.polygonMode;
2798 // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED CHECK GLES 3.2
2799 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2800 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2801 crs.enableDepthBias = rs.enableDepthBias;
2802 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2803 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2804 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2805 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2806 // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2807 // If cull mode Flags change...
2808 crs.cullModeFlags = rs.cullModeFlags;
2809 SetCullMode(crs);
2810 crs.frontFace = rs.frontFace;
2811 SetFrontFace(crs);
2812 crs.lineWidth = rs.lineWidth;
2813 glLineWidth(rs.lineWidth);
2814 PrimeDepthStencilState(graphicsState);
2815 PrimeBlendState(graphicsState);
2816 }
2817
UpdateDepthState(const GraphicsState & graphicsState)2818 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2819 {
2820 const auto& depth = graphicsState.depthStencilState;
2821 auto& cDepth = cacheState_.depthStencilState;
2822 if (depth.enableDepthTest != cDepth.enableDepthTest) {
2823 cDepth.enableDepthTest = depth.enableDepthTest;
2824 SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2825 }
2826 if (depth.depthCompareOp != cDepth.depthCompareOp) {
2827 cDepth.depthCompareOp = depth.depthCompareOp;
2828 glDepthFunc(GetCompareOp(depth.depthCompareOp));
2829 }
2830 if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2831 cDepth.enableDepthWrite = depth.enableDepthWrite;
2832 glDepthMask((depth.enableDepthWrite == GL_TRUE));
2833 }
2834 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2835 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2836 }
2837 }
2838
UpdateStencilState(const GraphicsState & graphicsState)2839 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2840 {
2841 const auto& depth = graphicsState.depthStencilState;
2842 auto& cDepth = cacheState_.depthStencilState;
2843 if (depth.enableStencilTest != cDepth.enableStencilTest) {
2844 cDepth.enableStencilTest = depth.enableStencilTest;
2845 SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2846 }
2847 uint32_t setFront = 0;
2848 uint32_t setBack = 0;
2849 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2850 if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2851 setFront |= StencilSetFlags::SETREFERENCE;
2852 }
2853 if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2854 setBack |= StencilSetFlags::SETREFERENCE;
2855 }
2856 }
2857 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2858 if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2859 setFront |= StencilSetFlags::SETCOMPAREMASK;
2860 }
2861 if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2862 setBack |= StencilSetFlags::SETCOMPAREMASK;
2863 }
2864 }
2865 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2866 if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2867 setFront |= StencilSetFlags::SETWRITEMASK;
2868 }
2869 if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2870 setBack |= StencilSetFlags::SETWRITEMASK;
2871 }
2872 }
2873 if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2874 setFront |= StencilSetFlags::SETCOMPAREOP;
2875 }
2876 if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2877 setBack |= StencilSetFlags::SETCOMPAREOP;
2878 }
2879 if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2880 setFront |= StencilSetFlags::SETOP;
2881 }
2882 if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2883 setBack |= StencilSetFlags::SETOP;
2884 }
2885 SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2886 }
2887
UpdateDepthStencilState(const GraphicsState & graphicsState)2888 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2889 {
2890 UpdateDepthState(graphicsState);
2891 UpdateStencilState(graphicsState);
2892 }
2893
UpdateBlendState(const GraphicsState & graphicsState)2894 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2895 {
2896 const auto& blend = graphicsState.colorBlendState;
2897 auto& cBlend = cacheState_.colorBlendState;
2898 for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2899 const auto& blendState = blend.colorAttachments[i];
2900 auto& cBlendState = cBlend.colorAttachments[i];
2901 if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2902 cBlendState.colorWriteMask = blendState.colorWriteMask;
2903 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2904 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2905 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2906 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2907 }
2908
2909 // Check if blend state has changed
2910 bool factorsChanged = false;
2911 bool opsChanged = false;
2912
2913 if (blendState.enableBlend) {
2914 factorsChanged = !CompareBlendFactors(cBlendState, blendState);
2915 opsChanged = !CompareBlendOps(cBlendState, blendState);
2916 }
2917
2918 if (blendState.enableBlend != cBlendState.enableBlend || factorsChanged || opsChanged) {
2919 cBlendState.enableBlend = blendState.enableBlend;
2920 if (blendState.enableBlend) {
2921 glEnablei(GL_BLEND, i);
2922 if (factorsChanged) {
2923 SetBlendFactors(cBlendState, blendState);
2924 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2925 GetBlendFactor(cBlendState.dstColorBlendFactor),
2926 GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2927 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2928 }
2929 if (opsChanged) {
2930 SetBlendOps(cBlendState, blendState);
2931 glBlendEquationSeparatei(
2932 i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2933 }
2934 } else {
2935 glDisablei(GL_BLEND, i);
2936 }
2937 }
2938 }
2939 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
2940 if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
2941 Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
2942 glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
2943 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
2944 }
2945 }
2946 // logicOps in blend not supported on GLES
2947 }
2948
UpdateRasterizationState(const GraphicsState & graphicsState)2949 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
2950 {
2951 const auto& rs = graphicsState.rasterizationState;
2952 auto& crs = cacheState_.rasterizationState;
2953 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2954 polygonMode_ = rs.polygonMode;
2955 #if RENDER_HAS_GL_BACKEND
2956 if (rs.polygonMode != crs.polygonMode) {
2957 crs.polygonMode = rs.polygonMode;
2958 SetPolygonMode(rs);
2959 }
2960 #endif
2961 if (rs.enableDepthClamp != crs.enableDepthClamp) {
2962 crs.enableDepthClamp = rs.enableDepthClamp;
2963 // NOT SUPPORTED (needs an extension)
2964 }
2965 if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
2966 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2967 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2968 }
2969 if (rs.enableDepthBias != crs.enableDepthBias) {
2970 crs.enableDepthBias = rs.enableDepthBias;
2971 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2972 }
2973 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
2974 if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
2975 (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
2976 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2977 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2978 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2979 }
2980 // depthBiasClamp NOT SUPPORTED (needs an extension)
2981 }
2982 // If cull mode Flags change...
2983 if (rs.cullModeFlags != crs.cullModeFlags) {
2984 crs.cullModeFlags = rs.cullModeFlags;
2985 SetCullMode(crs);
2986 }
2987 auto frontFace = rs.frontFace;
2988 if (!renderingToDefaultFbo_) {
2989 // Flip winding for default fbo.
2990 if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
2991 frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
2992 } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
2993 frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
2994 }
2995 }
2996 if (frontFace != crs.frontFace) {
2997 crs.frontFace = frontFace;
2998 SetFrontFace(crs);
2999 }
3000 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3001 if (rs.lineWidth != crs.lineWidth) {
3002 crs.lineWidth = rs.lineWidth;
3003 glLineWidth(rs.lineWidth);
3004 }
3005 }
3006 }
3007
DoGraphicsState(const GraphicsState & graphicsState)3008 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3009 {
3010 /// GRAPHICSSTATE inputAssembly
3011 const auto& ia = graphicsState.inputAssembly;
3012 if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3013 auto& cia = cacheState_.inputAssembly;
3014 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3015 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3016 }
3017 topology_ = ia.primitiveTopology;
3018 UpdateRasterizationState(graphicsState);
3019 UpdateDepthStencilState(graphicsState);
3020 UpdateBlendState(graphicsState);
3021 }
3022
SetViewport(const RenderPassDesc::RenderArea & ra,const ViewportDesc & vd)3023 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3024 {
3025 // NOTE: viewportdesc is in floats?!?
3026 bool forceV = false;
3027 bool forceD = false;
3028 if (!viewportPrimed_) {
3029 viewportPrimed_ = true;
3030 forceV = true;
3031 forceD = true;
3032 }
3033 if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3034 (vd.height != viewport_.height)) {
3035 forceV = true;
3036 }
3037 if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3038 forceD = true;
3039 }
3040
3041 if (forceV) {
3042 viewport_.x = vd.x;
3043 viewport_.y = vd.y;
3044 viewport_.width = vd.width;
3045 viewport_.height = vd.height;
3046 viewportUpdated_ = true;
3047 }
3048 if (forceD) {
3049 viewport_.minDepth = vd.minDepth;
3050 viewport_.maxDepth = vd.maxDepth;
3051 viewportDepthRangeUpdated_ = true;
3052 }
3053 }
3054
SetScissor(const RenderPassDesc::RenderArea & ra,const ScissorDesc & sd)3055 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3056 {
3057 // NOTE: scissordesc is in floats?!?
3058 bool force = false;
3059 if (!scissorPrimed_) {
3060 scissorPrimed_ = true;
3061 force = true;
3062 }
3063 if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3064 (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3065 force = true;
3066 }
3067 if (force) {
3068 scissorBox_ = sd;
3069 scissorBoxUpdated_ = true;
3070 }
3071 }
3072
FlushViewportScissors()3073 void RenderBackendGLES::FlushViewportScissors()
3074 {
3075 if (!currentFrameBuffer_) {
3076 return;
3077 }
3078 bool force = false;
3079 if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3080 force = true;
3081 scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3082 }
3083 if ((viewportUpdated_) || (force)) {
3084 viewportUpdated_ = false;
3085 // Handle top-left / bottom-left origin conversion
3086 PLUGIN_ASSERT(currentFrameBuffer_);
3087 GLint y = static_cast<GLint>(viewport_.y);
3088 const GLsizei h = static_cast<GLsizei>(viewport_.height);
3089 if (renderingToDefaultFbo_) {
3090 const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3091 y = fh - (y + h);
3092 }
3093 glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3094 }
3095 if ((scissorBoxUpdated_) || (force)) {
3096 scissorBoxUpdated_ = false;
3097 // Handle top-left / bottom-left origin conversion
3098 GLint y = static_cast<GLint>(scissorBox_.offsetY);
3099 const GLsizei h = static_cast<GLsizei>(scissorBox_.extentHeight);
3100 if (renderingToDefaultFbo_) {
3101 const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3102 y = fh - (y + h);
3103 }
3104 glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3105 }
3106 if (viewportDepthRangeUpdated_) {
3107 viewportDepthRangeUpdated_ = false;
3108 glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3109 }
3110 }
3111 RENDER_END_NAMESPACE()
3112