1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "render_backend_gles.h"
16 
17 #include <algorithm>
18 
19 #include <base/containers/fixed_string.h>
20 #include <core/perf/intf_performance_data_manager.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23 
24 #if (RENDER_PERF_ENABLED == 1)
25 #include "perf/gpu_query.h"
26 #include "perf/gpu_query_manager.h"
27 #endif
28 #include "device/gpu_resource_manager.h"
29 #include "gles/device_gles.h"
30 #include "gles/gl_functions.h"
31 #include "gles/gpu_buffer_gles.h"
32 #include "gles/gpu_image_gles.h"
33 #include "gles/gpu_program_gles.h"
34 #include "gles/gpu_query_gles.h"
35 #include "gles/gpu_sampler_gles.h"
36 #include "gles/gpu_semaphore_gles.h"
37 #include "gles/node_context_descriptor_set_manager_gles.h"
38 #include "gles/node_context_pool_manager_gles.h"
39 #include "gles/pipeline_state_object_gles.h"
40 #include "gles/render_frame_sync_gles.h"
41 #include "gles/swapchain_gles.h"
42 #include "nodecontext/render_command_list.h"
43 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
44 #include "util/log.h"
45 #include "util/render_frame_util.h"
46 
47 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
48 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
49 
50 using namespace BASE_NS;
51 
52 // NOTE: implement missing commands, add state caching and cleanup a bit more.
53 RENDER_BEGIN_NAMESPACE()
54 namespace Gles {
55 // Indices to colorBlendConstants
56 static constexpr uint32_t RED_INDEX = 0;
57 static constexpr uint32_t GREEN_INDEX = 1;
58 static constexpr uint32_t BLUE_INDEX = 2;
59 static constexpr uint32_t ALPHA_INDEX = 3;
60 static constexpr uint32_t CUBEMAP_LAYERS = 6;
61 struct Bind {
62     DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
63     struct BufferType {
64         uint32_t bufferId;
65         uint32_t offset;
66         uint32_t size;
67     };
68     struct ImageType {
69         GpuImageGLES* image;
70         uint32_t mode;
71         uint32_t mipLevel;
72     };
73     struct SamplerType {
74         uint32_t samplerId;
75     };
76     struct Resource {
77         union {
78             Bind::BufferType buffer { 0, 0, 0 };
79             Bind::ImageType image;
80         };
81         SamplerType sampler { 0 };
82     };
83     vector<Resource> resources;
84 };
85 } // namespace Gles
86 namespace {
GetRenderHandleType(const DescriptorType descriptorType)87 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
88 {
89     if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
90         return RenderHandleType::GPU_SAMPLER;
91     } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
92                    (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
93                (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
94         return RenderHandleType::GPU_IMAGE;
95     } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
96                (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
97         return RenderHandleType::GPU_BUFFER;
98     }
99     return RenderHandleType::UNDEFINED;
100 }
101 
getCubeMapTarget(GLenum type,uint32_t layer)102 GLenum getCubeMapTarget(GLenum type, uint32_t layer)
103 {
104     if (type == GL_TEXTURE_CUBE_MAP) {
105         constexpr GLenum layerId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
106             GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
107             GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
108         PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
109         return layerId[layer];
110     }
111     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
112     return GL_NONE;
113 }
114 
getTarget(GLenum type,uint32_t layer,uint32_t sampleCount)115 GLenum getTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
116 {
117     if (type == GL_TEXTURE_2D) {
118         if (sampleCount > 1) {
119             return GL_TEXTURE_2D_MULTISAMPLE;
120         }
121         return GL_TEXTURE_2D;
122     }
123     if (type == GL_TEXTURE_CUBE_MAP) {
124         PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
125         return getCubeMapTarget(type, layer);
126     }
127     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
128     return GL_NONE;
129 }
130 struct BlitArgs {
131     uint32_t mipLevel {};
132     Size3D rect0 {};
133     Size3D rect1 {};
134     uint32_t height {};
135 };
136 
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)137 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
138 {
139     // Handle top-left / bottom-left origin conversion
140     GLint sy = static_cast<GLint>(src.rect0.height);
141     const GLint sh = static_cast<const GLint>(src.rect1.height);
142     const GLint sfh = static_cast<GLint>(src.height >> src.mipLevel);
143     sy = sfh - (sy + sh);
144     GLint dy = static_cast<GLint>(dst.rect0.height);
145     const GLint dh = static_cast<const GLint>(dst.rect1.height);
146     const GLint dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
147     dy = dfh - (dy + dh);
148     GLenum glfilter = GL_NEAREST;
149     if (filter == CORE_FILTER_NEAREST) {
150         glfilter = GL_NEAREST;
151     } else if (filter == CORE_FILTER_LINEAR) {
152         glfilter = GL_LINEAR;
153     } else {
154         PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
155     }
156     glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
157         static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
158         glfilter);
159 }
160 
GetPrimFromTopology(PrimitiveTopology op)161 GLenum GetPrimFromTopology(PrimitiveTopology op)
162 {
163     switch (op) {
164         case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
165             return GL_POINTS;
166         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
167             return GL_LINES;
168         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
169             return GL_LINE_STRIP;
170         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
171             return GL_TRIANGLES;
172         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
173             return GL_TRIANGLE_STRIP;
174         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
175             return GL_TRIANGLE_FAN;
176 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
177             // The following are valid after gles 3.2
178         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
179             return GL_LINES_ADJACENCY;
180         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
181             return GL_LINE_STRIP_ADJACENCY;
182         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
183             return GL_TRIANGLES_ADJACENCY;
184         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
185             return GL_TRIANGLE_STRIP_ADJACENCY;
186         case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
187             return GL_PATCHES;
188 #endif
189         default:
190             PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
191             break;
192     }
193     return GL_POINTS;
194 }
195 
GetBlendOp(BlendOp func)196 GLenum GetBlendOp(BlendOp func)
197 {
198     switch (func) {
199         case CORE_BLEND_OP_ADD:
200             return GL_FUNC_ADD;
201         case CORE_BLEND_OP_SUBTRACT:
202             return GL_FUNC_SUBTRACT;
203         case CORE_BLEND_OP_REVERSE_SUBTRACT:
204             return GL_FUNC_REVERSE_SUBTRACT;
205         case CORE_BLEND_OP_MIN:
206             return GL_MIN;
207         case CORE_BLEND_OP_MAX:
208             return GL_MAX;
209         default:
210             break;
211     }
212     return GL_FUNC_ADD;
213 }
214 
GetBlendFactor(BlendFactor factor)215 GLenum GetBlendFactor(BlendFactor factor)
216 {
217     switch (factor) {
218         case CORE_BLEND_FACTOR_ZERO:
219             return GL_ZERO;
220         case CORE_BLEND_FACTOR_ONE:
221             return GL_ONE;
222         case CORE_BLEND_FACTOR_SRC_COLOR:
223             return GL_SRC_COLOR;
224         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
225             return GL_ONE_MINUS_SRC_COLOR;
226         case CORE_BLEND_FACTOR_DST_COLOR:
227             return GL_DST_COLOR;
228         case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
229             return GL_ONE_MINUS_DST_COLOR;
230         case CORE_BLEND_FACTOR_SRC_ALPHA:
231             return GL_SRC_ALPHA;
232         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
233             return GL_ONE_MINUS_SRC_ALPHA;
234         case CORE_BLEND_FACTOR_DST_ALPHA:
235             return GL_DST_ALPHA;
236         case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
237             return GL_ONE_MINUS_DST_ALPHA;
238         case CORE_BLEND_FACTOR_CONSTANT_COLOR:
239             return GL_CONSTANT_COLOR;
240         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
241             return GL_ONE_MINUS_CONSTANT_COLOR;
242         case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
243             return GL_CONSTANT_ALPHA;
244         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
245             return GL_ONE_MINUS_CONSTANT_ALPHA;
246         case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
247             return GL_SRC_ALPHA_SATURATE;
248             // NOTE: check the GLES3.2...
249             /* following requires EXT_blend_func_extended (dual source blending) */
250         case CORE_BLEND_FACTOR_SRC1_COLOR:
251         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
252         case CORE_BLEND_FACTOR_SRC1_ALPHA:
253         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
254         default:
255             break;
256     }
257     return GL_ONE;
258 }
259 
GetCompareOp(CompareOp aOp)260 GLenum GetCompareOp(CompareOp aOp)
261 {
262     switch (aOp) {
263         case CORE_COMPARE_OP_NEVER:
264             return GL_NEVER;
265         case CORE_COMPARE_OP_LESS:
266             return GL_LESS;
267         case CORE_COMPARE_OP_EQUAL:
268             return GL_EQUAL;
269         case CORE_COMPARE_OP_LESS_OR_EQUAL:
270             return GL_LEQUAL;
271         case CORE_COMPARE_OP_GREATER:
272             return GL_GREATER;
273         case CORE_COMPARE_OP_NOT_EQUAL:
274             return GL_NOTEQUAL;
275         case CORE_COMPARE_OP_GREATER_OR_EQUAL:
276             return GL_GEQUAL;
277         case CORE_COMPARE_OP_ALWAYS:
278             return GL_ALWAYS;
279         default:
280             break;
281     }
282     return GL_ALWAYS;
283 }
284 
GetStencilOp(StencilOp aOp)285 GLenum GetStencilOp(StencilOp aOp)
286 {
287     switch (aOp) {
288         case CORE_STENCIL_OP_KEEP:
289             return GL_KEEP;
290         case CORE_STENCIL_OP_ZERO:
291             return GL_ZERO;
292         case CORE_STENCIL_OP_REPLACE:
293             return GL_REPLACE;
294         case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
295             return GL_INCR;
296         case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
297             return GL_DECR;
298         case CORE_STENCIL_OP_INVERT:
299             return GL_INVERT;
300         case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
301             return GL_INCR_WRAP;
302         case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
303             return GL_DECR_WRAP;
304         default:
305             break;
306     }
307     return GL_KEEP;
308 }
309 
SetState(GLenum type,bool enabled)310 void SetState(GLenum type, bool enabled)
311 {
312     if (enabled) {
313         glEnable(type);
314     } else {
315         glDisable(type);
316     }
317 }
318 
SetCullMode(const GraphicsState::RasterizationState & rs)319 void SetCullMode(const GraphicsState::RasterizationState& rs)
320 {
321     SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
322 
323     switch (rs.cullModeFlags) {
324         case CORE_CULL_MODE_FRONT_BIT:
325             glCullFace(GL_FRONT);
326             break;
327         case CORE_CULL_MODE_BACK_BIT:
328             glCullFace(GL_BACK);
329             break;
330         case CORE_CULL_MODE_FRONT_AND_BACK:
331             glCullFace(GL_FRONT_AND_BACK);
332             break;
333         case CORE_CULL_MODE_NONE:
334         default:
335             break;
336     }
337 }
338 
SetFrontFace(const GraphicsState::RasterizationState & rs)339 void SetFrontFace(const GraphicsState::RasterizationState& rs)
340 {
341     switch (rs.frontFace) {
342         case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
343             glFrontFace(GL_CCW);
344             break;
345         case CORE_FRONT_FACE_CLOCKWISE:
346             glFrontFace(GL_CW);
347             break;
348         default:
349             break;
350     }
351 }
352 
353 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)354 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
355 {
356     GLenum mode;
357     switch (rs.polygonMode) {
358         default:
359         case CORE_POLYGON_MODE_FILL:
360             mode = GL_FILL;
361             break;
362         case CORE_POLYGON_MODE_LINE:
363             mode = GL_LINE;
364             break;
365         case CORE_POLYGON_MODE_POINT:
366             mode = GL_POINT;
367             break;
368     }
369     glPolygonMode(GL_FRONT_AND_BACK, mode);
370 }
371 #endif
372 
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)373 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
374     const LowlevelFramebufferGL& frameBuffer)
375 {
376     if (count > 0) {
377         if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
378             // Invalidate the whole buffer.  (attachment sizes match render area)
379             glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
380         } else {
381             // invalidate only a part of the render target..
382             // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
383             const GLint X = static_cast<const GLint>(rpd.renderArea.offsetX);
384             const GLint Y = static_cast<const GLint>(rpd.renderArea.offsetY);
385             const GLsizei W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
386             const GLsizei H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
387             glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
388         }
389     }
390 }
391 
392 struct BlitData {
393     const GpuImagePlatformDataGL& iPlat;
394     const GpuImageDesc& imageDesc;
395     const BufferImageCopy& bufferImageCopy;
396     uintptr_t data { 0 };
397     uint64_t size { 0 };
398     uint64_t sizeOfData { 0 };
399     bool compressed { false };
400 };
401 
BlitArray(DeviceGLES & device_,const BlitData & bd)402 void BlitArray(DeviceGLES& device_, const BlitData& bd)
403 {
404     const auto& iPlat = bd.iPlat;
405     const auto& bufferImageCopy = bd.bufferImageCopy;
406     const auto& imageSubresource = bufferImageCopy.imageSubresource;
407     const auto& imageDesc = bd.imageDesc;
408     const uint32_t mip = imageSubresource.mipLevel;
409     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
410     // NOTE: image offset depth is ignored
411     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
412     const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
413         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
414         Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
415     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
416     if (valid) {
417         uintptr_t data = bd.data;
418         const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
419         for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
420             const Math::UVec3 offset3D { offset.x, offset.y, layer };
421             if (bd.compressed) {
422                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
423                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
424             } else {
425                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
426                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
427             }
428             data += static_cast<ptrdiff_t>(bd.sizeOfData);
429         }
430     }
431 }
432 
Blit2D(DeviceGLES & device_,const BlitData & bd)433 void Blit2D(DeviceGLES& device_, const BlitData& bd)
434 {
435     const auto& iPlat = bd.iPlat;
436     const auto& bufferImageCopy = bd.bufferImageCopy;
437     const auto& imageSubresource = bufferImageCopy.imageSubresource;
438     const auto& imageDesc = bd.imageDesc;
439     const uint32_t mip = imageSubresource.mipLevel;
440     const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
441     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
442     const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
443         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
444     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
445         "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
446     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
447     const uintptr_t data = bd.data;
448     if (valid && bd.compressed) {
449         device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
450             iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
451     } else if (valid) {
452         device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
453             iPlat.dataType, reinterpret_cast<const void*>(data));
454     }
455 }
456 
Blit3D(DeviceGLES & device_,const BlitData & bd)457 void Blit3D(DeviceGLES& device_, const BlitData& bd)
458 {
459     const auto& iPlat = bd.iPlat;
460     const auto& bufferImageCopy = bd.bufferImageCopy;
461     const auto& imageSubresource = bufferImageCopy.imageSubresource;
462     const auto& imageDesc = bd.imageDesc;
463     const uint32_t mip = imageSubresource.mipLevel;
464     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
465     const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
466         bufferImageCopy.imageOffset.depth };
467     Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
468         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
469     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
470     if (valid) {
471         uintptr_t data = bd.data;
472         for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
473             const Math::UVec3 offset3D { offset.x, offset.y, slice };
474             if (bd.compressed) {
475                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
476                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
477             } else {
478                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
479                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
480             }
481             // offsets one slice
482             data += static_cast<ptrdiff_t>(bd.sizeOfData);
483         }
484     }
485 }
486 
BlitCube(DeviceGLES & device_,const BlitData & bd)487 void BlitCube(DeviceGLES& device_, const BlitData& bd)
488 {
489     const auto& iPlat = bd.iPlat;
490     const auto& bufferImageCopy = bd.bufferImageCopy;
491     const auto& imageSubresource = bufferImageCopy.imageSubresource;
492     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
493     const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
494     constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
495         GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
496         GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
497     PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
498     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
499         "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
500     uintptr_t data = bd.data;
501     const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
502     for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
503         const GLenum face = faceId[i]; // convert layer index to cube map face id.
504         if (face == 0) {
505             // reached the end of cubemap faces (see faceId)
506             // so must stop copying.
507             break;
508         }
509         if (bd.compressed) {
510             device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
511                 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
512         } else {
513             device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
514                 iPlat.dataType, reinterpret_cast<const void*>(data));
515         }
516         data += static_cast<ptrdiff_t>(bd.sizeOfData);
517     }
518 }
519 template<bool usePixelUnpackBuffer>
520 
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)521 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
522     const GpuImageGLES& dstGpuImage)
523 {
524     const auto& iPlat = dstGpuImage.GetPlatformData();
525     const auto& imageOffset = bufferImageCopy.imageOffset;
526     PLUGIN_UNUSED(imageOffset);
527     const auto& imageExtent = bufferImageCopy.imageExtent;
528     // size is calculated for single layer / slice
529     const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
530                           static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
531                           static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
532     uintptr_t data = bufferImageCopy.bufferOffset;
533     if constexpr (usePixelUnpackBuffer) {
534         const auto& plat = srcGpuBuffer.GetPlatformData();
535         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
536     } else {
537         // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
538         // simulator and crash with ETC2 textures on NVIDIA..
539         data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
540     }
541     uint64_t sizeOfData = size;
542     const auto& compinfo = iPlat.compression;
543     if (compinfo.compressed) {
544         // how many blocks in width
545         const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
546         // how many blocks in height
547         const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
548         // size in bytes..
549         sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
550 
551         // Warn for partial copies. we do not handle those at the moment.
552         if (bufferImageCopy.bufferRowLength != 0) {
553             if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
554                 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
555                              "Stride must match image width (with block align). "
556                              "bufferImageCopy.bufferRowLength(%d) "
557                              "imageExtent.width(%d) ",
558                     bufferImageCopy.bufferRowLength, imageExtent.width);
559             }
560         }
561         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
562         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
563     } else {
564         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
565         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
566     }
567     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
568     return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
569 }
570 
571 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)572 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
573 {
574     if constexpr (usePixelUnpackBuffer) {
575         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
576     } else {
577         srcGpuBuffer.Unmap();
578     }
579 }
580 
581 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])582 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
583 {
584     for (size_t i = 0; i < N; i++) {
585         if (a[i] != b[i])
586             return false;
587     }
588     return true;
589 }
590 
591 template<typename T, size_t N>
592 
Set(T (& a)[N],const T (& b)[N])593 constexpr size_t Set(T (&a)[N], const T (&b)[N])
594 {
595     for (size_t i = 0; i < N; i++) {
596         a[i] = b[i];
597     }
598     return true;
599 }
600 
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)601 bool CompareBlendFactors(
602     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
603 {
604     return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
605            (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
606 }
607 
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)608 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
609 {
610     a.srcColorBlendFactor = b.srcColorBlendFactor;
611     a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
612     a.dstColorBlendFactor = b.dstColorBlendFactor;
613     a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
614 }
615 
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)616 bool CompareBlendOps(
617     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
618 {
619     return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
620 }
621 
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)622 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
623 {
624     a.colorBlendOp = b.colorBlendOp;
625     a.alphaBlendOp = b.alphaBlendOp;
626 }
627 
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)628 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
629 {
630     return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
631 }
632 
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)633 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
634 {
635     a.failOp = b.failOp;
636     a.depthFailOp = b.depthFailOp;
637     a.passOp = b.passOp;
638 }
639 
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)640 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642     a.compareOp = b.compareOp;
643     a.compareMask = b.compareMask;
644     a.reference = b.reference;
645 }
646 
647 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)648 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
649 {
650     if (mipLevel >= imageDesc.mipCount) {
651         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
652     }
653     if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
654         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
655     }
656     if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
657         ((offset.z + extent.depth) > imageDesc.depth)) {
658         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
659     }
660 }
661 
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)662 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
663 {
664     ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
665     ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
666 }
667 #endif
668 
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)669 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
670 {
671     if (srcOffset < 0) {
672         size += srcOffset;
673         dstOffset -= srcOffset;
674         srcOffset = 0;
675     }
676 }
677 
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)678 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
679 {
680     ClampOffset(srcOffset.x, dstOffset.x, size.width);
681     ClampOffset(srcOffset.y, dstOffset.y, size.height);
682     ClampOffset(srcOffset.z, dstOffset.z, size.depth);
683 }
684 
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)685 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
686 {
687     if (size > (maxSize - offset)) {
688         size = maxSize - offset;
689     }
690 }
691 
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)692 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
693 {
694     ClampSize(offset.x, desc.width, size.width);
695     ClampSize(offset.y, desc.height, size.height);
696     ClampSize(offset.z, desc.depth, size.depth);
697 }
698 } // namespace
699 
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)700 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
701     : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
702 {
703 #if (RENDER_PERF_ENABLED == 1)
704     validGpuQueries_ = false;
705 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
706     gpuQueryMgr_ = make_unique<GpuQueryManager>();
707 #if RENDER_HAS_GL_BACKEND
708     if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
709         validGpuQueries_ = true;
710     }
711 #endif
712 #if RENDER_HAS_GLES_BACKEND
713     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
714         // Check if GL_EXT_disjoint_timer_query is available.
715         validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
716     }
717 #endif
718 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
719 #endif // RENDER_PERF_ENABLED
720 #if RENDER_HAS_GLES_BACKEND
721     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
722         multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
723     }
724 #endif
725     PLUGIN_ASSERT(device_.IsActive());
726     PrimeCache(GraphicsState {}); // Initializes cache.
727     glGenFramebuffers(1, &blitImageSourceFbo_);
728     glGenFramebuffers(1, &blitImageDestinationFbo_);
729 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
730     PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
731     PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
732 #endif
733 #if !RENDER_HAS_GLES_BACKEND
734     glEnable(GL_PROGRAM_POINT_SIZE);
735 #endif
736 }
737 
~RenderBackendGLES()738 RenderBackendGLES::~RenderBackendGLES()
739 {
740     PLUGIN_ASSERT(device_.IsActive());
741     device_.DeleteFrameBuffer(blitImageSourceFbo_);
742     device_.DeleteFrameBuffer(blitImageDestinationFbo_);
743 }
744 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)745 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
746 {
747     if (!backBufferConfig.swapchainData.empty()) {
748         if (device_.HasSwapchain()) {
749 #if (RENDER_PERF_ENABLED == 1)
750             commonCpuTimers_.present.Begin();
751 #endif
752             for (const auto& swapchainData : backBufferConfig.swapchainData) {
753 #if (RENDER_DEV_ENABLED == 1)
754                 if (swapchainData.config.gpuSemaphoreHandle) {
755                     // NOTE: not implemented
756                     PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
757                 }
758 #endif
759                 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
760                 if (swp) {
761 #if RENDER_GL_FLIP_Y_SWAPCHAIN
762                     // Blit and flip our swapchain frame to backbuffer..
763                     const auto& sdesc = swp->GetDesc();
764                     if (scissorEnabled_) {
765                         glDisable(GL_SCISSOR_TEST);
766                         scissorEnabled_ = false;
767                     }
768                     const auto& platSwapchain = swp->GetPlatformData();
769                     device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
770                     device_.BindWriteFrameBuffer(0); // FBO 0  is the surface bound to current context..
771                     glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
772                         (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
773                     device_.BindReadFrameBuffer(0);
774 #endif
775                     device_.SwapBuffers(*swp);
776                 }
777             }
778 #if (RENDER_PERF_ENABLED == 1)
779             commonCpuTimers_.present.End();
780 #endif
781         }
782     }
783 }
784 
ResetState()785 void RenderBackendGLES::ResetState()
786 {
787     boundProgram_ = {};
788     boundIndexBuffer_ = {};
789     vertexAttribBinds_ = 0;
790     renderingToDefaultFbo_ = false;
791     boundComputePipeline_ = nullptr;
792     boundGraphicsPipeline_ = nullptr;
793     currentPsoHandle_ = {};
794     renderArea_ = {};
795     activeRenderPass_ = {};
796     currentSubPass_ = 0;
797     currentFrameBuffer_ = nullptr;
798     scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
799     inRenderpass_ = 0;
800 }
801 
ResetBindings()802 void RenderBackendGLES::ResetBindings()
803 {
804     for (auto& b : boundObjects_) {
805         b.dirty = true;
806     }
807     boundComputePipeline_ = nullptr;
808     boundGraphicsPipeline_ = nullptr;
809     currentPsoHandle_ = {};
810 }
811 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)812 void RenderBackendGLES::Render(
813     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
814 {
815     // NOTE: all command lists are validated before entering here
816     PLUGIN_ASSERT(device_.IsActive());
817 #if (RENDER_PERF_ENABLED == 1)
818     commonCpuTimers_.full.Begin();
819     commonCpuTimers_.acquire.Begin();
820 #endif
821     presentationInfo_ = {};
822 
823     if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
824         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
825             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
826             if (const SwapchainGLES* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
827                 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
828                 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
829                 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
830                     // remap image to backbuffer
831                     const RenderHandle currentSwapchainHandle =
832                         swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
833                     // special swapchain remapping
834                     gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
835                 }
836             }
837         }
838     }
839 #if (RENDER_PERF_ENABLED == 1)
840     commonCpuTimers_.acquire.End();
841 
842     StartFrameTimers(renderCommandFrameData);
843     commonCpuTimers_.execute.Begin();
844 #endif
845     // Reset bindings.
846     ResetState();
847     for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
848         // Reset bindings between command lists..
849         ResetBindings();
850         RenderSingleCommandList(ref);
851     }
852 #if (RENDER_PERF_ENABLED == 1)
853     commonCpuTimers_.execute.End();
854 #endif
855     RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
856 #if (RENDER_PERF_ENABLED == 1)
857     commonCpuTimers_.full.End();
858     EndFrameTimers();
859 #endif
860 }
861 
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)862 void RenderBackendGLES::RenderProcessEndCommandLists(
863     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
864 {
865     if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
866         frameSync->GetFrameFence();
867     }
868     // signal external GPU fences
869     if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
870         auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
871         const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
872         PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
873         if (externalSignals.size() == externalSemaphores.size()) {
874             for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
875                 // needs to be false
876                 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
877                     if (const GpuSemaphoreGles* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
878                         auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
879                         // NOTE: currently could create only one GPU sync
880                         GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
881                         plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
882                         externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
883                         externalSignals[sigIdx].signaled = true;
884 
885                         // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
886                     }
887                 }
888             }
889         }
890     }
891 }
892 
RenderCommandUndefined(const RenderCommandWithType & renderCommand)893 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
894 {
895     PLUGIN_ASSERT_MSG(false, "non-valid render command");
896 }
897 
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)898 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
899 {
900     // these are validated in render graph
901     managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
902         renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
903 
904     managers_.poolMgr->BeginBackendFrame();
905     managers_.psoMgr->BeginBackendFrame();
906 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
907     const auto& debugName = renderCommandCtx.debugName;
908 #endif
909 #if (RENDER_PERF_ENABLED == 1)
910     perfCounters_ = {};
911     PLUGIN_ASSERT(timers_.count(debugName) == 1);
912     PerfDataSet& perfDataSet = timers_[debugName];
913     perfDataSet.cpuTimer.Begin();
914 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
915     if (validGpuQueries_) {
916 #ifdef GL_GPU_DISJOINT_EXT
917         /* Clear disjoint error */
918         GLint disjointOccurred = 0;
919         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
920 #endif
921         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
922         PLUGIN_ASSERT(gpuQuery);
923 
924         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
925         PLUGIN_ASSERT(platData.queryObject);
926         glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
927     }
928 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
929 #endif // RENDER_PERF_ENABLED
930 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
931     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
932 #endif
933     commandListValid_ = true;
934     for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
935         PLUGIN_ASSERT(ref.rc);
936         if (commandListValid_) {
937 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
938             glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1,
939                 (const GLchar*)COMMAND_NAMES[static_cast<uint32_t>(ref.type)]);
940 #endif
941             (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
942 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
943             glPopDebugGroup();
944 #endif
945         }
946     }
947 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
948     glPopDebugGroup();
949 #endif
950 #if (RENDER_PERF_ENABLED == 1)
951 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
952     if (validGpuQueries_) {
953         glEndQuery(GL_TIME_ELAPSED_EXT);
954     }
955 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
956     perfDataSet.cpuTimer.End();
957     CopyPerfTimeStamp(debugName, perfDataSet);
958 #endif // RENDER_PERF_ENABLED
959 }
960 
RenderCommandBindPipeline(const RenderCommandWithType & ref)961 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
962 {
963     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
964     const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
965     boundProgram_ = {};
966     if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
967         PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
968         BindComputePipeline(renderCmd);
969     } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
970         BindGraphicsPipeline(renderCmd);
971     }
972     currentPsoHandle_ = renderCmd.psoHandle;
973 }
974 
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)975 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
976 {
977     const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
978         managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
979     if (pso) {
980         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
981         // Setup descriptorset bind cache..
982         SetupCache(data.pipelineLayout);
983     }
984     boundComputePipeline_ = pso;
985     boundGraphicsPipeline_ = nullptr;
986 }
987 
SetupCache(const PipelineLayout & pipelineLayout)988 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
989 {
990     // based on pipeline layout. (note that compatible sets should "save state")
991     for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
992         // mark unmatching sets dirty (all for now)
993         // resize the cache stuffs.
994         const auto& s = pipelineLayout.descriptorSetLayouts[set];
995         if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
996             boundObjects_[set].dirty = true;
997 #if RENDER_HAS_GLES_BACKEND
998             boundObjects_[set].oesBinds.clear();
999 #endif
1000             boundObjects_[set].resources.clear();
1001             continue;
1002         }
1003         PLUGIN_ASSERT(s.set == set);
1004 
1005         uint32_t maxB = 0;
1006         // NOTE: compatibility optimizations?
1007         // NOTE: we expect bindings to be sorted.
1008         if (s.bindings.back().binding == s.bindings.size() - 1U) {
1009             // since the last binding matches the size, expect it to be continuous.
1010             maxB = static_cast<uint32_t>(s.bindings.size());
1011         } else {
1012             // Sparse binding.
1013             // NOTE: note sparse sets will waste memory here. (see notes in
1014             // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1015             for (const auto& bind : s.bindings) {
1016                 maxB = Math::max(maxB, bind.binding);
1017             }
1018             maxB += 1U; // zero based bindings..
1019         }
1020         if (boundObjects_[set].resources.size() != maxB) {
1021             // resource count change.. (so it's dirty then)
1022             boundObjects_[set].dirty = true;
1023 #if RENDER_HAS_GLES_BACKEND
1024             boundObjects_[set].oesBinds.clear();
1025 #endif
1026             boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1027             boundObjects_[set].resources.resize(maxB);
1028         }
1029 
1030         for (const auto& b : s.bindings) {
1031             auto& o = boundObjects_[set].resources[b.binding];
1032             // ignore b.shaderStageFlags for now.
1033             if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1034                 // mark set dirty, since "not matching"
1035                 o.resources.clear();
1036                 o.resources.resize(b.descriptorCount);
1037                 o.descriptorType = b.descriptorType;
1038                 boundObjects_[set].dirty = true;
1039 #if RENDER_HAS_GLES_BACKEND
1040                 boundObjects_[set].oesBinds.clear();
1041 #endif
1042             }
1043         }
1044     }
1045 }
1046 
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1047 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1048 {
1049     const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1050         managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1051             activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1052     if (pso) {
1053         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1054         dynamicStateFlags_ = data.dynamicStateFlags;
1055         DoGraphicsState(data.graphicsState);
1056         // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1057         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1058             SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1059                                          static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1060         }
1061         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1062             SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1063         }
1064         // Setup descriptorset bind cache..
1065         SetupCache(data.pipelineLayout);
1066     }
1067     boundComputePipeline_ = nullptr;
1068     boundGraphicsPipeline_ = pso;
1069 }
1070 
RenderCommandDraw(const RenderCommandWithType & ref)1071 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1072 {
1073     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1074     const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1075     if (!boundGraphicsPipeline_) {
1076         return;
1077     }
1078     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1079     BindResources();
1080     const auto type = GetPrimFromTopology(topology_);
1081     const GLsizei firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1082     const GLsizei instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1083     // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1084     // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1085     if (renderCmd.indexCount) {
1086         uintptr_t offsetp = boundIndexBuffer_.offset;
1087         GLenum indexType = GL_UNSIGNED_SHORT;
1088         switch (boundIndexBuffer_.type) {
1089             case CORE_INDEX_TYPE_UINT16:
1090                 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1091                 indexType = GL_UNSIGNED_SHORT;
1092                 break;
1093             case CORE_INDEX_TYPE_UINT32:
1094                 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1095                 indexType = GL_UNSIGNED_INT;
1096                 break;
1097             default:
1098                 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1099                 break;
1100         }
1101         const GLsizei indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1102         const void* offset = reinterpret_cast<const void*>(offsetp);
1103         if (renderCmd.instanceCount > 1) {
1104             if (renderCmd.firstVertex) {
1105                 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, firstVertex);
1106             } else {
1107                 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1108             }
1109         } else {
1110             if (renderCmd.vertexOffset) {
1111                 glDrawElementsBaseVertex(
1112                     type, indexCount, indexType, offset, static_cast<GLint>(renderCmd.vertexOffset));
1113             } else {
1114                 glDrawElements(type, indexCount, indexType, offset);
1115             }
1116         }
1117 #if (RENDER_PERF_ENABLED == 1)
1118         ++perfCounters_.drawCount;
1119         perfCounters_.instanceCount += renderCmd.instanceCount;
1120         perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1121 #endif
1122     } else {
1123         const GLsizei vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1124         if (renderCmd.instanceCount > 1) {
1125             glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1126         } else {
1127             glDrawArrays(type, firstVertex, vertexCount);
1128         }
1129 #if (RENDER_PERF_ENABLED == 1)
1130         ++perfCounters_.drawCount;
1131         perfCounters_.instanceCount += renderCmd.instanceCount;
1132         perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1133 #endif
1134     }
1135 }
1136 
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1137 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1138 {
1139     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1140     const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1141     if (!boundGraphicsPipeline_) {
1142         return;
1143     }
1144     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1145     BindResources();
1146     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1147         const auto& plat = gpuBuffer->GetPlatformData();
1148         device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1149         const auto type = GetPrimFromTopology(topology_);
1150         auto offset = static_cast<GLintptr>(renderCmd.offset);
1151         if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1152             GLenum indexType = GL_UNSIGNED_SHORT;
1153             switch (boundIndexBuffer_.type) {
1154                 case CORE_INDEX_TYPE_UINT16:
1155                     indexType = GL_UNSIGNED_SHORT;
1156                     break;
1157                 case CORE_INDEX_TYPE_UINT32:
1158                     indexType = GL_UNSIGNED_INT;
1159                     break;
1160                 default:
1161                     PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1162                     break;
1163             }
1164             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1165                 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1166                 offset += renderCmd.stride;
1167             }
1168         } else {
1169             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1170                 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1171                 offset += renderCmd.stride;
1172             }
1173         }
1174 #if (RENDER_PERF_ENABLED == 1)
1175         perfCounters_.drawIndirectCount += renderCmd.drawCount;
1176 #endif
1177     }
1178 }
1179 
RenderCommandDispatch(const RenderCommandWithType & ref)1180 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1181 {
1182     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1183     const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1184     if (!boundComputePipeline_) {
1185         return;
1186     }
1187     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1188     BindResources();
1189     glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1190 #if (RENDER_PERF_ENABLED == 1)
1191     ++perfCounters_.dispatchCount;
1192 #endif
1193 }
1194 
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1195 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1196 {
1197     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1198     const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1199     if (!boundComputePipeline_) {
1200         return;
1201     }
1202     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1203     BindResources();
1204     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1205         const auto& plat = gpuBuffer->GetPlatformData();
1206         device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1207         glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1208 #if (RENDER_PERF_ENABLED == 1)
1209         ++perfCounters_.dispatchIndirectCount;
1210 #endif
1211     }
1212 }
1213 
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1214 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1215 {
1216     resetScissor_ = false;           // need to reset scissor state after clear?
1217     clearScissorSet_ = true;         // need to setup clear scissors before clear?
1218     clearScissor_ = aArea;           // area to be cleared
1219     if (scissorPrimed_) {            // have scissors been set yet?
1220         if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1221             (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1222             (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1223             (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1224             // Current scissors match clearscissor area, so no need to set it again.
1225             clearScissorSet_ = false;
1226         }
1227     }
1228 }
1229 
ClearScissorSet()1230 void RenderBackendGLES::ClearScissorSet()
1231 {
1232     if (clearScissorSet_) {       // do we need to set clear scissors.
1233         clearScissorSet_ = false; // clear scissors have been set now.
1234         resetScissor_ = true;     // we are modifying scissors, so remember to reset them afterwards.
1235         glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1236             static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1237     }
1238 }
1239 
ClearScissorReset()1240 void RenderBackendGLES::ClearScissorReset()
1241 {
1242     if (resetScissor_) { // need to reset correct scissors?
1243         if (!scissorPrimed_) {
1244             // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1245             // setting)
1246             scissorPrimed_ = true;
1247             scissorBox_.offsetX = clearScissor_.offsetX;
1248             scissorBox_.offsetY = clearScissor_.offsetY;
1249             scissorBox_.extentHeight = clearScissor_.extentHeight;
1250             scissorBox_.extentWidth = clearScissor_.extentWidth;
1251         } else {
1252             // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1253             scissorBoxUpdated_ = true; // ie. request to update scissor state.
1254         }
1255     }
1256 }
1257 
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1258 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1259 {
1260     constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1261                                              CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1262     const auto& cBlend = cacheState_.colorBlendState;
1263     for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1264         if (colorAttachments[idx] == nullptr) {
1265             continue;
1266         }
1267         const auto& ref = *(colorAttachments[idx]);
1268         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1269             const auto& cBlendState = cBlend.colorAttachments[idx];
1270             if (clearAll != cBlendState.colorWriteMask) {
1271                 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1272             }
1273             ClearScissorSet();
1274             // glClearBufferfv only for float formats?
1275             // glClearBufferiv & glClearbufferuv only for integer formats?
1276             glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1277             if (clearAll != cBlendState.colorWriteMask) {
1278                 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1279                 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1280                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1281                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1282                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1283             }
1284         }
1285     }
1286 }
1287 
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1288 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1289 {
1290     const GLuint allBits = 0xFFFFFFFFu;
1291     const auto& ref = depthAttachment;
1292     const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1293     const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1294     // Change state if needed.
1295     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1296         glDepthMask(GL_TRUE);
1297     }
1298     if (clearStencil) {
1299         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1300             glStencilMaskSeparate(GL_FRONT, allBits);
1301         }
1302         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1303             glStencilMaskSeparate(GL_BACK, allBits);
1304         }
1305     }
1306     if (clearDepth || clearStencil) {
1307         // Set the scissors for clear..
1308         ClearScissorSet();
1309     }
1310     // Do clears.
1311     if (clearDepth && clearStencil) {
1312         glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1313             static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1314     } else if (clearDepth) {
1315         glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1316     } else if (clearStencil) {
1317         glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1318     }
1319 
1320     // Restore cached state, if we touched the state.
1321     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1322         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1323         glDepthMask(GL_FALSE);
1324     }
1325     if (clearStencil) {
1326         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1327         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1328             glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1329         }
1330         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1331             glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1332         }
1333     }
1334 }
1335 
DoSubPass(uint32_t subPass)1336 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1337 {
1338     if (currentFrameBuffer_ == nullptr) {
1339         // Completely invalid state in backend.
1340         return;
1341     }
1342     const auto& rpd = activeRenderPass_.renderPassDesc;
1343     const auto& sb = activeRenderPass_.subpasses[subPass];
1344 
1345     // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1346     if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1347         auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1348         device_.Activate(color);
1349     }
1350     device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1351     ClearScissorInit(renderArea_);
1352     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1353         SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1354     }
1355     {
1356         // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1357         // possible. (ie. all buffers at once)
1358         renderingToDefaultFbo_ = false;
1359         if (sb.colorAttachmentCount > 0) {
1360             // collect color attachment infos..
1361             const RenderPassDesc::AttachmentDesc*
1362                 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1363             for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1364                 uint32_t index = sb.colorAttachmentIndices[ci];
1365                 if (resolveToBackbuffer_[index]) {
1366                     // NOTE: this could fail with multiple color attachments....
1367                     renderingToDefaultFbo_ = true;
1368                 }
1369                 if (!attachmentCleared_[index]) {
1370                     attachmentCleared_[index] = true;
1371                     colorAttachments[ci] = &rpd.attachments[index];
1372                 } else {
1373                     colorAttachments[ci] = nullptr;
1374                 }
1375             }
1376             HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1377         }
1378         if (sb.depthAttachmentCount) {
1379             if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1380                 attachmentCleared_[sb.depthAttachmentIndex] = true;
1381                 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1382             }
1383         }
1384     }
1385     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1386         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1387         SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1388     }
1389     ClearScissorReset();
1390 }
1391 
ScanPasses(const RenderPassDesc & rpd)1392 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1393 {
1394     for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1395         const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1396         for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1397             uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1398             if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1399                 attachmentFirstUse_[resolveTo] = sub;
1400             }
1401             attachmentLastUse_[resolveTo] = sub;
1402             const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1403             if ((p.image == 0) && (p.renderBuffer == 0)) {
1404                 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1405                 // backbuffer...
1406                 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1407                 resolveToBackbuffer_[resolveFrom] = true;
1408             }
1409         }
1410         for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1411             uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1412             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1413                 attachmentFirstUse_[index] = sub;
1414             }
1415             attachmentLastUse_[index] = sub;
1416         }
1417         for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1418             uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1419             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1420                 attachmentFirstUse_[index] = sub;
1421             }
1422             attachmentLastUse_[index] = sub;
1423             if (attachmentImage_[index]) {
1424                 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1425                 if ((p.image == 0) && (p.renderBuffer == 0)) {
1426                     resolveToBackbuffer_[index] = true;
1427                 }
1428             }
1429         }
1430         if (currentSubPass.depthAttachmentCount > 0) {
1431             uint32_t index = currentSubPass.depthAttachmentIndex;
1432             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1433                 attachmentFirstUse_[index] = sub;
1434             }
1435             attachmentLastUse_[index] = sub;
1436         }
1437     }
1438 }
1439 
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1440 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1441 {
1442     PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1443     const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1444     switch (renderCmd.beginType) {
1445         case RenderPassBeginType::RENDER_PASS_BEGIN: {
1446             ++inRenderpass_;
1447             PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1448             activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1449 
1450             const auto& rpd = activeRenderPass_.renderPassDesc;
1451             renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1452             auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1453             if (multisampledRenderToTexture_) {
1454                 cpm.FilterRenderPass(activeRenderPass_);
1455             }
1456             currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1457             if (currentFrameBuffer_ == nullptr) {
1458                 // Completely invalid state in backend.
1459                 commandListValid_ = false;
1460                 --inRenderpass_;
1461                 return;
1462             }
1463             PLUGIN_ASSERT_MSG(
1464                 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1465             currentSubPass_ = 0;
1466             // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1467             for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1468                 attachmentCleared_[i] = false;
1469                 attachmentFirstUse_[i] = 0xFFFFFFFF;
1470                 attachmentLastUse_[i] = 0;
1471                 resolveToBackbuffer_[i] = false;
1472                 attachmentImage_[i] =
1473                     static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1474             }
1475             ScanPasses(rpd);
1476             DoSubPass(0);
1477 #if (RENDER_PERF_ENABLED == 1)
1478             ++perfCounters_.renderPassCount;
1479 #endif
1480         } break;
1481 
1482         case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1483             ++currentSubPass_;
1484             PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1485             DoSubPass(activeRenderPass_.subpassStartIndex);
1486         } break;
1487 
1488         default:
1489             break;
1490     }
1491 }
1492 
RenderCommandNextSubpass(const RenderCommandWithType & ref)1493 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1494 {
1495     PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1496     const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1497     PLUGIN_UNUSED(renderCmd);
1498     PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1499     ++currentSubPass_;
1500     PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1501     DoSubPass(currentSubPass_);
1502 }
1503 
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1504 int32_t RenderBackendGLES::InvalidateDepthStencil(
1505     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1506 {
1507     int32_t depthCount = 0;
1508     if (currentSubPass.depthAttachmentCount > 0) {
1509         const uint32_t index = currentSubPass.depthAttachmentIndex;
1510         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1511             const auto& image = attachmentImage_[index];
1512             const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1513             // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1514             if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1515                 bool depth = false;
1516                 bool stencil = false;
1517                 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1518                     if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1519                         depth = true;
1520                     }
1521                 }
1522                 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1523                     if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1524                         stencil = true;
1525                     }
1526                 }
1527                 if (depth && stencil) {
1528                     invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1529                     depthCount++;
1530                 } else if (stencil) {
1531                     invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1532                     depthCount++;
1533                 } else if (depth) {
1534                     invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1535                     depthCount++;
1536                 }
1537             }
1538         }
1539     }
1540     return depthCount;
1541 }
1542 
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1543 int32_t RenderBackendGLES::InvalidateColor(
1544     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1545 {
1546     int32_t colorCount = 0;
1547     // see which parts of the fbo can be invalidated...
1548     // collect color attachment infos..
1549     for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1550         const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1551         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1552             if (const auto* image = attachmentImage_[index]) {
1553                 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1554                 if (dplat.image || dplat.renderBuffer) {
1555                     if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1556                         invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1557                         colorCount++;
1558                     }
1559                 }
1560             }
1561         }
1562     }
1563     return colorCount;
1564 }
1565 
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1566 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1567 {
1568     const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1569                             ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1570     if (mask) {
1571         // Resolve MSAA buffers.
1572         // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1573         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1574         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1575         if (scissorEnabled_) {
1576             glDisable(GL_SCISSOR_TEST);
1577             scissorEnabled_ = false;
1578         }
1579         // FLIP_RESOLVE_DEFAULT_FBO not needed, since we render flipped if end result will be resolved to fbo..
1580         // hopefully it works now.
1581 #if defined(FLIP_RESOLVE_DEFAULT_FBO) && FLIP_RESOLVE_DEFAULT_FBO
1582         if (currentFrameBuffer_->resolveFbo[currentSubPass_] == 0) {
1583             // flip if resolving to default fbo. (NOTE: sample count of destination must be zero or equal to source)
1584             // and in mali devices src and dst rects MUST be equal. (which is not according to spec)
1585             // IE. can't flip and resolve at the same time on MALI based devices.
1586             // NEED A FIX HERE!
1587             glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1588                 static_cast<GLint>(currentFrameBuffer_->height), 0, static_cast<GLint>(currentFrameBuffer_->height),
1589                 static_cast<GLint>(currentFrameBuffer_->width), 0, mask, GL_NEAREST);
1590             return GL_READ_FRAMEBUFFER;
1591         }
1592 #endif
1593         glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1594             static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1595             static_cast<GLint>(currentFrameBuffer_->height), mask,
1596             GL_NEAREST); // no flip
1597         return GL_READ_FRAMEBUFFER;
1598     }
1599     return GL_FRAMEBUFFER;
1600 }
1601 
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1602 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1603 {
1604     PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1605     const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1606     if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1607         PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1608         inRenderpass_--;
1609     }
1610     if (currentFrameBuffer_ == nullptr) {
1611         // Completely invalid state in backend.
1612         return;
1613     }
1614     const auto& rpd = activeRenderPass_.renderPassDesc;
1615     const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1616 
1617     // Resolve MSAA
1618     const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1619 
1620     // Finally invalidate color and depth..
1621     GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1622     int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1623     invalidateCount += InvalidateDepthStencil(
1624         array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1625 
1626     // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1627     Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1628 
1629     if (inRenderpass_ == 0) {
1630         currentFrameBuffer_ = nullptr;
1631     }
1632 }
1633 
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1634 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1635 {
1636     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1637     const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1638     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1639     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1640     if (!boundGraphicsPipeline_) {
1641         return;
1642     }
1643     vertexAttribBinds_ = renderCmd.vertexBufferCount;
1644     for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1645         const auto& currVb = renderCmd.vertexBuffers[i];
1646         if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1647             const auto& plat = gpuBuffer->GetPlatformData();
1648             uintptr_t offset = currVb.bufferOffset;
1649             offset += plat.currentByteOffset;
1650             vertexAttribBindSlots_[i].id = plat.buffer;
1651             vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1652         } else {
1653             vertexAttribBindSlots_[i].id = 0;
1654             vertexAttribBindSlots_[i].offset = 0;
1655         }
1656     }
1657 }
1658 
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1659 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1660 {
1661     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1662     const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1663     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1664         gpuBuffer) {
1665         const auto& plat = gpuBuffer->GetPlatformData();
1666         boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1667         boundIndexBuffer_.offset += plat.currentByteOffset;
1668         boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1669         boundIndexBuffer_.id = plat.buffer;
1670     }
1671 }
1672 
RenderCommandBlitImage(const RenderCommandWithType & ref)1673 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1674 {
1675     PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1676     const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1677     const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1678     const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1679     if ((srcImage == nullptr) || (dstImage == nullptr)) {
1680         return;
1681     }
1682     const auto& srcDesc = srcImage->GetDesc();
1683     const auto& srcPlat = srcImage->GetPlatformData();
1684     const auto& dstDesc = dstImage->GetDesc();
1685     const auto& dstPlat = dstImage->GetPlatformData();
1686     const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1687     const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1688     const auto& src = renderCmd.imageBlit.srcSubresource;
1689     const auto& dst = renderCmd.imageBlit.dstSubresource;
1690     const GLint srcMipLevel = static_cast<GLint>(src.mipLevel);
1691     const GLint dstMipLevel = static_cast<GLint>(dst.mipLevel);
1692     const uint32_t srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1693     const uint32_t dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1694     PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1695     PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1696     glDisable(GL_SCISSOR_TEST);
1697     scissorEnabled_ = false;
1698     // NOTE: LAYERS! (texture arrays)
1699     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1700     device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1701     for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1702         const GLenum srcType = getTarget(srcPlat.type, layer, srcSampleCount);
1703         const GLenum dstType = getTarget(dstPlat.type, layer, dstSampleCount);
1704         // glFramebufferTextureLayer for array textures....
1705         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1706         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1707         DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1708             { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1709         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1710         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1711     }
1712 }
1713 
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1714 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1715 {
1716     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1717     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1718     const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1719     const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1720     if (srcGpuBuffer && dstGpuBuffer) {
1721         const auto& srcData = srcGpuBuffer->GetPlatformData();
1722         const auto& dstData = dstGpuBuffer->GetPlatformData();
1723         const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1724         const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1725         device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1726         device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1727         glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1728             static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1729             static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1730         device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1731         device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1732     }
1733 }
1734 
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1735 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1736 {
1737 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1738     // use the workaround only for gles backend on windows. (pvr simulator bug)
1739     constexpr const bool usePixelUnpackBuffer = false;
1740 #else
1741     // expect this to work, and the nvidia bug to be fixed.
1742     constexpr const bool usePixelUnpackBuffer = true;
1743 #endif
1744     auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1745     auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1746     if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1747         return;
1748     }
1749     const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1750     if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1751         BlitCube(device_, info);
1752     } else if (info.iPlat.type == GL_TEXTURE_2D) {
1753         Blit2D(device_, info);
1754     } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1755         BlitArray(device_, info);
1756     } else if (info.iPlat.type == GL_TEXTURE_3D) {
1757         Blit3D(device_, info);
1758 #if RENDER_HAS_GLES_BACKEND
1759     } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1760         PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1761 #endif
1762     } else {
1763         PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1764     }
1765     FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1766 }
1767 
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1768 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1769 {
1770     const auto& bc = renderCmd.bufferImageCopy;
1771     const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1772     const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1773     PLUGIN_ASSERT(srcGpuImage);
1774     PLUGIN_ASSERT(dstGpuBuffer);
1775     if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1776         return;
1777     }
1778     const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1779     const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1780     if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1781         PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1782         return;
1783     }
1784     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1785     PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1786     GLenum type = GL_TEXTURE_2D;
1787     if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1788         type = getCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1789     }
1790     // glFramebufferTextureLayer for array textures....
1791     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1792         static_cast<GLint>(bc.imageSubresource.mipLevel));
1793     const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1794     const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1795     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1796     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1797     glPixelStorei(GL_PACK_ALIGNMENT, 1);
1798     uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1799     glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1800         static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1801         static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1802     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1803     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1804 }
1805 
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1806 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1807 {
1808     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1809     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1810     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1811     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1812         BufferToImageCopy(renderCmd);
1813     } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1814         ImageToBufferCopy(renderCmd);
1815     }
1816 }
1817 
RenderCommandCopyImage(const RenderCommandWithType & ref)1818 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1819 {
1820     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1821     const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1822     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1823     const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1824     const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1825     if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1826         return;
1827     }
1828     const auto& srcDesc = srcGpuImage->GetDesc();
1829     const auto& dstDesc = dstGpuImage->GetDesc();
1830 #if RENDER_VALIDATION_ENABLED
1831     ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1832 #endif
1833     const auto srcMipLevel =
1834         static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1835     const auto dstMipLevel =
1836         static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1837 
1838     auto sOffset = renderCmd.imageCopy.srcOffset;
1839     auto dOffset = renderCmd.imageCopy.dstOffset;
1840     auto size = renderCmd.imageCopy.extent;
1841 
1842     // clamp negative offsets to zero and adjust extent and other offset accordingly
1843     ClampOffset(sOffset, dOffset, size);
1844     ClampOffset(dOffset, sOffset, size);
1845 
1846     // clamp size to fit src and dst
1847     ClampSize(sOffset, srcDesc, size);
1848     ClampSize(dOffset, dstDesc, size);
1849 
1850     const auto& srcPlatData = srcGpuImage->GetPlatformData();
1851     const auto& dstPlatData = dstGpuImage->GetPlatformData();
1852     glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1853         dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1854         static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1855 }
1856 
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1857 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1858 {
1859     PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1860     const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1861     const auto& rbList = *managers_.rbList;
1862     // NOTE: proper flagging of barriers.
1863     if (const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1864             rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1865         barrierPointBarriers) {
1866         const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1867         const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1868         GLbitfield barriers = 0;
1869         GLbitfield barriersByRegion = 0;
1870         for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1871             if (nextBarrierList == nullptr) {
1872                 // cannot be null, just a safety
1873                 PLUGIN_ASSERT(false);
1874                 return;
1875             }
1876             const auto& barrierListRef = *nextBarrierList;
1877             nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1878             const uint32_t barrierCount = barrierListRef.count;
1879             // helper which covers barriers supported by Barrier and BarrierByRegion
1880             auto commonBarrierBits = [](AccessFlags accessFlags, RenderHandleType resourceType) -> GLbitfield {
1881                 GLbitfield barriers = 0;
1882                 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
1883                     barriers |= GL_UNIFORM_BARRIER_BIT;
1884                 }
1885                 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
1886                     // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
1887                     if (resourceType == RenderHandleType::GPU_IMAGE) {
1888                         barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1889                     } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1890                         barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1891                     } else {
1892                         barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT |
1893                                     GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1894                     }
1895                 }
1896                 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
1897                     if (resourceType == RenderHandleType::GPU_IMAGE) {
1898                         barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1899                     } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1900                         barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
1901                     } else {
1902                         barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1903                     }
1904                 }
1905                 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1906                                       CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
1907                     barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
1908                 }
1909                 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
1910                 return barriers;
1911             };
1912             for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1913                 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1914 
1915                 // check if written by previous shader as an attachment or storage/ image buffer
1916                 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1917                                                   CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1918                     const auto resourceHandle = barrier.resourceHandle;
1919                     const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1920 
1921                     // barrier by region is between fragment shaders and supports a subset of barriers.
1922                     if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1923                         (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1924                         barriersByRegion |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1925                     } else {
1926                         // check the barriers shared with ByRegion
1927                         barriers |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1928 
1929                         // the rest are invalid for ByRegion
1930                         if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1931                             barriers |= GL_COMMAND_BARRIER_BIT;
1932                         }
1933                         if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1934                             barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1935                         }
1936                         if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1937                             barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1938                         }
1939                         // which are the correct accessFlags?
1940                         // GL_PIXEL_BUFFER_BARRIER_BIT:
1941                         // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1942                         // glReadPixels, glTexSubImage1D, etc.)
1943                         // GL_TEXTURE_UPDATE_BARRIER_BIT:
1944                         // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
1945                         // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
1946                         // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
1947                         // mapped
1948                         //  by glMapBuffer or glMapBufferRange
1949                         // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
1950                         // CORE_ACCESS_MEMORY_WRITE_BIT?
1951                         if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
1952                                                           CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
1953                             if (handleType == RenderHandleType::GPU_IMAGE) {
1954                                 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
1955                             } else if (handleType == RenderHandleType::GPU_BUFFER) {
1956                                 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
1957                             }
1958                         }
1959                         // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
1960                     }
1961                 }
1962             }
1963         }
1964         if (barriers) {
1965             glMemoryBarrier(barriers);
1966         }
1967         if (barriersByRegion) {
1968             // only for fragment-fragment
1969             glMemoryBarrierByRegion(barriersByRegion);
1970         }
1971     }
1972 }
1973 
SetupBind(const DescriptorSetLayoutBinding & binding,vector<Gles::Bind> & resources)1974 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
1975 {
1976     PLUGIN_ASSERT(binding.binding < resources.size());
1977     auto& obj = resources[binding.binding];
1978     PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
1979     PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
1980     return obj;
1981 }
1982 
BindSampler(const BindableSampler & res,Gles::Bind & obj,uint32_t index)1983 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
1984 {
1985     const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
1986     if (gpuSampler) {
1987         const auto& plat = gpuSampler->GetPlatformData();
1988         obj.resources[index].sampler.samplerId = plat.sampler;
1989     } else {
1990         obj.resources[index].sampler.samplerId = 0;
1991     }
1992 }
1993 
BindImage(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)1994 void RenderBackendGLES::BindImage(
1995     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
1996 {
1997     const AccessFlags accessFlags = resState.accessFlags;
1998     auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
1999     auto& ref = obj.resources[index];
2000     ref.image.image = gpuImage;
2001     const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2002     const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2003     if (read && write) {
2004         ref.image.mode = GL_READ_WRITE;
2005     } else if (read) {
2006         ref.image.mode = GL_READ_ONLY;
2007     } else if (write) {
2008         ref.image.mode = GL_WRITE_ONLY;
2009     } else {
2010         // no read and no write?
2011         ref.image.mode = GL_READ_WRITE;
2012     }
2013     ref.image.mipLevel = res.mip;
2014 }
2015 
BindImageSampler(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2016 void RenderBackendGLES::BindImageSampler(
2017     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2018 {
2019     BindImage(res, resState, obj, index);
2020     BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2021 }
2022 
BindBuffer(const BindableBuffer & res,Gles::Bind & obj,uint32_t dynamicOffset,uint32_t index)2023 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2024 {
2025     const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2026     if (gpuBuffer) {
2027         const auto& plat = gpuBuffer->GetPlatformData();
2028         const uint32_t baseOffset = res.byteOffset;
2029         obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2030         obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2031         obj.resources[index].buffer.bufferId = plat.buffer;
2032     } else {
2033         obj.resources[index].buffer.offset = 0;
2034         obj.resources[index].buffer.size = 0;
2035         obj.resources[index].buffer.bufferId = 0;
2036     }
2037 }
2038 
ProcessBindings(const struct RenderCommandBindDescriptorSets & renderCmd,const DescriptorSetLayoutBindingResources & data,uint32_t set)2039 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2040     const DescriptorSetLayoutBindingResources& data, uint32_t set)
2041 {
2042     BindState& bind = boundObjects_[set];
2043     vector<Gles::Bind>& resources = bind.resources;
2044 #if RENDER_HAS_GLES_BACKEND
2045     bind.oesBinds.clear();
2046 #endif
2047     const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2048     const auto& buffers = data.buffers;
2049     const auto& images = data.images;
2050     const auto& samplers = data.samplers;
2051     uint32_t currDynamic = 0U;
2052     for (const auto& res : data.bindings) {
2053         auto& obj = SetupBind(res.binding, resources);
2054 #if RENDER_HAS_GLES_BACKEND
2055         bool hasOes = false;
2056 #endif
2057         auto GetArrayOffset = [](const auto& data, const auto& res) {
2058             const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
2059             if (type == RenderHandleType::GPU_BUFFER) {
2060                 return data.buffers[res.resourceIndex].arrayOffset;
2061             } else if (type == RenderHandleType::GPU_IMAGE) {
2062                 return data.images[res.resourceIndex].arrayOffset;
2063             } else if (type == RenderHandleType::GPU_SAMPLER) {
2064                 return data.samplers[res.resourceIndex].arrayOffset;
2065             }
2066             return 0u;
2067         };
2068         const bool hasArrOffset = (res.binding.descriptorCount > 1);
2069         const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2070         for (uint8_t index = 0; index < res.binding.descriptorCount; index++) {
2071             const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2072             GpuImageGLES* image = nullptr;
2073             switch (res.binding.descriptorType) {
2074                 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2075                     const auto& bRes = samplers[resIdx];
2076                     BindSampler(bRes.resource, obj, index);
2077                     break;
2078                 }
2079                 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2080                 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2081                 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2082                     const auto& bRes = images[resIdx];
2083                     BindImage(bRes.resource, bRes.state, obj, index);
2084                     image = obj.resources[index].image.image;
2085                     break;
2086                 }
2087                 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2088                     const auto& bRes = images[resIdx];
2089                     BindImageSampler(bRes.resource, bRes.state, obj, index);
2090                     image = obj.resources[index].image.image;
2091                     break;
2092                 }
2093                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2094                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2095                     const auto& bRes = buffers[resIdx];
2096                     uint32_t dynamicOffset = 0;
2097                     if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2098                         dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2099                         currDynamic++;
2100                     }
2101                     BindBuffer(bRes.resource, obj, dynamicOffset, index);
2102                     break;
2103                 }
2104                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2105                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2106                     const auto& bRes = buffers[resIdx];
2107                     BindBuffer(bRes.resource, obj, 0, index);
2108                     break;
2109                 }
2110                 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2111                 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2112                 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2113                 default:
2114                     PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2115                     break;
2116             }
2117 #if RENDER_HAS_GLES_BACKEND
2118             if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2119                 hasOes = true;
2120             }
2121 #endif
2122         }
2123 #if RENDER_HAS_GLES_BACKEND
2124         if (hasOes) {
2125             bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2126         }
2127 #endif
2128     }
2129 }
2130 
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2131 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2132 {
2133     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2134     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2135         return;
2136     }
2137     const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2138     PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2139 
2140     const auto& aNcdsm = *managers_.descriptorSetMgr;
2141     for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2142         PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2143         const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2144         PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2145         const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2146         boundObjects_[idx].dirty = true; // mark the set as "changed"
2147         ProcessBindings(renderCmd, data, idx);
2148         // (note, nothing actually gets bound yet.. just the bind cache is updated)
2149     }
2150 }
2151 
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2152 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2153 {
2154     const GLint location = static_cast<GLint>(pc.location);
2155     // the consts list has been filtered and cleared of unused uniforms.
2156     PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2157     GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2158     switch (pc.type) {
2159         case GL_UNSIGNED_INT: {
2160             glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2161             break;
2162         }
2163         case GL_FLOAT: {
2164             glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2165             break;
2166         }
2167         case GL_FLOAT_VEC2: {
2168             glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2169             break;
2170         }
2171         case GL_FLOAT_VEC4: {
2172             glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2173             break;
2174         }
2175         case GL_FLOAT_MAT4: {
2176             glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2177             break;
2178         }
2179         case GL_UNSIGNED_INT_VEC4: {
2180             glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2181             break;
2182         }
2183         default:
2184             PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2185     }
2186 }
2187 
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2188 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2189 {
2190     if (boundProgram_.setPushConstants) {
2191         boundProgram_.setPushConstants = false;
2192         const auto& renderCmd = boundProgram_.pushConstants;
2193         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2194         PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2195         PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2196         if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2197             return;
2198         // ASSERT: expecting data is valid
2199         // NOTE: handle rest of the types
2200         for (const auto& pc : consts) {
2201             const size_t offs = pc.offset;
2202             if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2203                 PLUGIN_LOG_E(
2204                     "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2205                     pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2206                 continue;
2207             }
2208             /*
2209             NOTE: handle the strides....
2210             consts[i].array_stride;
2211             consts[i].matrix_stride; */
2212             SetPushConstant(program, pc, &renderCmd.data[offs]);
2213         }
2214     }
2215 }
2216 
RenderCommandPushConstant(const RenderCommandWithType & ref)2217 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2218 {
2219     PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2220     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2221         return;
2222     }
2223     const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2224     if (renderCmd.pushConstant.byteSize > 0) {
2225         PLUGIN_ASSERT(renderCmd.data);
2226         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2227         boundProgram_.setPushConstants = true;
2228         boundProgram_.pushConstants = renderCmd;
2229     }
2230 }
2231 
RenderCommandClearColorImage(const RenderCommandWithType & ref)2232 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2233 {
2234     PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2235 #if RENDER_HAS_GLES_BACKEND
2236 #if (RENDER_VALIDATION_ENABLED == 1)
2237     PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2238         "Render command clear color image not support with GLES. One should implement higher level path for "
2239         "clearing.");
2240 #endif
2241 #else
2242     const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2243 
2244     const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2245     if (imagePtr) {
2246         const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2247         // NOTE: mip levels and array layers should be handled separately
2248         for (const auto& subresRef : renderCmd.ranges) {
2249             glClearTexImage(platImage.image,     // texture
2250                 (int32_t)subresRef.baseMipLevel, // level
2251                 platImage.format,                // format
2252                 platImage.dataType,              // type
2253                 &renderCmd.color);               // data
2254         }
2255     }
2256 #endif
2257 }
2258 
2259 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2260 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2261 {
2262     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2263     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2264     const ViewportDesc& vd = renderCmd.viewportDesc;
2265     SetViewport(renderArea_, vd);
2266 }
2267 
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2268 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2269 {
2270     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2271     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2272     const ScissorDesc& sd = renderCmd.scissorDesc;
2273     SetScissor(renderArea_, sd);
2274 }
2275 
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2276 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2277 {
2278     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2279     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2280     if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2281         cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2282         glLineWidth(renderCmd.lineWidth);
2283     }
2284 }
2285 
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2286 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2287 {
2288     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2289     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2290 }
2291 
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2292 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2293 {
2294     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2295     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2296 }
2297 
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2298 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2299 {
2300     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2301     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2302 }
2303 
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2304 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2305     const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2306 {
2307     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2308     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2309     const uint32_t FUNCMASK =
2310         (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2311     if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2312         cFront.writeMask = front.writeMask;
2313         glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2314     }
2315     if (frontFlags & FUNCMASK) {
2316         SetStencilCompareOp(cFront, front);
2317         glStencilFuncSeparate(
2318             GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2319     }
2320     if (frontFlags & StencilSetFlags::SETOP) {
2321         SetStencilOp(cFront, front);
2322         glStencilOpSeparate(
2323             GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2324     }
2325     if (backFlags & StencilSetFlags::SETWRITEMASK) {
2326         cBack.writeMask = back.writeMask;
2327         glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2328     }
2329     if (backFlags & FUNCMASK) {
2330         SetStencilCompareOp(cBack, back);
2331         glStencilFuncSeparate(
2332             GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2333     }
2334     if (backFlags & StencilSetFlags::SETOP) {
2335         SetStencilOp(cBack, back);
2336         glStencilOpSeparate(
2337             GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2338     }
2339 }
2340 
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2341 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2342 {
2343     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2344     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2345     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2346     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2347     uint32_t setFront = 0;
2348     uint32_t setBack = 0;
2349     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2350         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2351             if (renderCmd.mask != cFront.compareMask) {
2352                 cFront.compareMask = renderCmd.mask;
2353                 setFront |= StencilSetFlags::SETCOMPAREMASK;
2354             }
2355         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2356             if (renderCmd.mask != cFront.writeMask) {
2357                 cFront.writeMask = renderCmd.mask;
2358                 setFront |= StencilSetFlags::SETWRITEMASK;
2359             }
2360         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2361             if (renderCmd.mask != cFront.reference) {
2362                 cFront.reference = renderCmd.mask;
2363                 setFront |= StencilSetFlags::SETREFERENCE;
2364             }
2365         }
2366     }
2367     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2368         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2369             if (renderCmd.mask != cBack.compareMask) {
2370                 cBack.compareMask = renderCmd.mask;
2371                 setBack |= StencilSetFlags::SETCOMPAREMASK;
2372             }
2373         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2374             if (renderCmd.mask != cBack.writeMask) {
2375                 cBack.writeMask = renderCmd.mask;
2376                 setBack |= StencilSetFlags::SETWRITEMASK;
2377             }
2378         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2379             if (renderCmd.mask != cBack.reference) {
2380                 cBack.reference = renderCmd.mask;
2381                 setBack |= StencilSetFlags::SETREFERENCE;
2382             }
2383         }
2384     }
2385     SetStencilState(setFront, cFront, setBack, cBack);
2386 }
2387 
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2388 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2389 {
2390 #if (RENDER_VALIDATION_ENABLED == 1)
2391     PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2392         "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2393 #endif
2394 }
2395 
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2396 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2397 {
2398     PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2399 }
2400 
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2401 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2402 {
2403     PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2404 }
2405 
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2406 void RenderBackendGLES::BindVertexInputs(
2407     const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2408 {
2409     // update bindings for the VAO.
2410     // process with attribute descriptions to only bind the needed vertex buffers
2411     // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2412     // but we only bind the ones needed for the shader
2413     const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2414     for (uint32_t i = 0; i < minBinding; ++i) {
2415         const auto& attributeRef = decldata.attributeDescriptions[i];
2416         const uint32_t location = attributeRef.location;
2417         const uint32_t binding = attributeRef.binding;
2418         // NOTE: we need to bind all the buffers to the correct bindings.
2419         // shader optimized check (vertexInputs, some locations are not in use)
2420         if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2421             const auto& slot = vertexAttribBindSlots_[binding];
2422             const auto& bindingRef = decldata.bindingDescriptions[binding];
2423             PLUGIN_ASSERT(bindingRef.binding == binding);
2424             // buffer bound to slot, and it's used by the shader.
2425             device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2426             /*
2427             core/vulkan
2428             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_VERTEX (0)  attribute index advances per vertex
2429             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_INSTANCE (1)  attribute index advances per instance
2430 
2431             gl/gles
2432             If divisor is  0, the attributes using the buffer bound to bindingindex advance once per vertex.
2433             If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2434             rendered.
2435 
2436             so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2437             ie. enum happens to match and can simply cast.
2438             */
2439             static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2440             device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2441         }
2442     }
2443 }
2444 
BindResources()2445 void RenderBackendGLES::BindResources()
2446 {
2447 #if RENDER_HAS_GLES_BACKEND
2448     // scan all sets here to see if any of the sets has oes.
2449     // we don't actually need to rebuild this info every time.
2450     // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2451     // data there)
2452     oesBinds_.clear();
2453     for (const auto& state : boundObjects_) {
2454         const auto& oes = state.oesBinds;
2455         if (!oes.empty()) {
2456             oesBinds_.insert(oesBinds_.end(), oes.begin(), oes.end());
2457         }
2458     }
2459 #endif
2460     const array_view<Binder>* resourceList = nullptr;
2461     const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2462     int32_t flipLocation = Gles::INVALID_LOCATION;
2463     uint32_t program = 0;
2464     // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2465     if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2466         PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2467         PLUGIN_ASSERT(boundGraphicsPipeline_);
2468         if (!boundGraphicsPipeline_) {
2469             return;
2470         }
2471         array_view<const int32_t> vertexInputs;
2472         const auto& pipelineData =
2473             static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2474         const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2475 #if RENDER_HAS_GLES_BACKEND
2476         if (!oesBinds_.empty()) {
2477             // okay, oes vector contains the set/bind to which an OES texture is bounds
2478             // ask for a compatible program from the boundGraphicsPipeline_
2479             shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2480         }
2481 #endif
2482         const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2483         program = sd.program;
2484         vertexInputs = { sd.inputs, countof(sd.inputs) };
2485         FlushViewportScissors();
2486         if (!scissorEnabled_) {
2487             scissorEnabled_ = true;
2488             glEnable(GL_SCISSOR_TEST); // Always enabled
2489         }
2490 #if (RENDER_PERF_ENABLED == 1)
2491         if (device_.BoundProgram() != program) {
2492             ++perfCounters_.bindProgram;
2493         }
2494 #endif
2495         device_.UseProgram(program);
2496         device_.BindVertexArray(pipelineData.vao);
2497         BindVertexInputs(pipelineData.vertexInputDeclaration, vertexInputs);
2498         device_.BindElementBuffer(boundIndexBuffer_.id);
2499         resourceList = &sd.resourceList;
2500         flipLocation = sd.flipLocation;
2501         pushConstants = &sd.pushConstants;
2502     } else {
2503         PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2504         PLUGIN_ASSERT(boundComputePipeline_);
2505         if (!boundComputePipeline_) {
2506             return;
2507         }
2508         const auto& pipelineData =
2509             static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2510         if (pipelineData.computeShader) {
2511             const auto& sd =
2512                 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2513             program = sd.program;
2514 #if (RENDER_PERF_ENABLED == 1)
2515             if (device_.BoundProgram() != program) {
2516                 ++perfCounters_.bindProgram;
2517             }
2518 #endif
2519             device_.UseProgram(program);
2520             resourceList = &sd.resourceList;
2521             flipLocation = sd.flipLocation;
2522             pushConstants = &sd.pushConstants;
2523         }
2524     }
2525 
2526     SetPushConstants(program, *pushConstants);
2527     if (flipLocation != Gles::INVALID_LOCATION) {
2528         const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2529         glProgramUniform1fv(program, flipLocation, 1, &flip);
2530     }
2531 
2532     for (const auto& r : *resourceList) {
2533         PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2534         if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2535             continue;
2536         }
2537         const auto& res = boundObjects_[r.set].resources[r.bind];
2538         PLUGIN_ASSERT(res.resources.size() == r.id.size());
2539         auto resType = res.descriptorType;
2540         if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2541             resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2542         } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2543             resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2544         }
2545 
2546         // a few helpers for updating perf counters and binding the sampler/texture/buffer
2547         auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2548 #if (RENDER_PERF_ENABLED == 1)
2549             if (device_.BoundSampler(textureUnit) != samplerId) {
2550                 ++perfCounters_.bindSampler;
2551             }
2552 #endif
2553             device_.BindSampler(textureUnit, samplerId);
2554         };
2555         auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2556 #if (RENDER_PERF_ENABLED == 1)
2557             if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2558                 ++perfCounters_.bindTexture;
2559             }
2560 #endif
2561             device_.BindTexture(textureUnit, dplat.type, dplat.image);
2562         };
2563         auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2564                                     const GpuImagePlatformDataGL& dplat) {
2565             uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2566             device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2567         };
2568         auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2569 #if (RENDER_PERF_ENABLED == 1)
2570             if (device_.BoundBuffer(target) != buffer.bufferId) {
2571                 ++perfCounters_.bindBuffer;
2572             }
2573 #endif
2574             device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2575         };
2576         auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2577             // either force the defined mip level or use defaults.
2578             glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2579                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2580             glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2581                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 1000U));
2582         };
2583 
2584 #if (RENDER_VALIDATION_ENABLED == 1)
2585         if (resType != r.type) {
2586             PLUGIN_LOG_ONCE_E(
2587                 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2588         }
2589 #endif
2590 
2591         for (uint32_t index = 0; index < res.resources.size(); index++) {
2592             const auto& obj = res.resources[index];
2593             for (const auto& id : r.id[index]) {
2594                 const auto binding = index + id;
2595                 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2596                     bindSampler(binding, obj.sampler.samplerId);
2597                 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2598                            (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2599                            (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2600                     if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2601                         bindSampler(binding, obj.sampler.samplerId);
2602                     } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2603                         bindSampler(binding, 0U);
2604                     }
2605                     if (obj.image.image) {
2606                         auto& dplat = obj.image.image->GetPlatformData();
2607                         bindTexture(binding, dplat);
2608 
2609                         // NOTE: the last setting is active, can not have different miplevels bound from single
2610                         // resource.
2611                         // Check and update (if needed) the forced miplevel.
2612                         if (dplat.mipLevel != obj.image.mipLevel) {
2613                             // NOTE: we are actually modifying the texture object bound above
2614                             const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2615                             setMipLevel(dplat.type, dplat.mipLevel);
2616                         }
2617                     }
2618                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2619                     if (obj.image.image) {
2620                         auto& dplat = obj.image.image->GetPlatformData();
2621                         bindTextureImage(binding, obj.image, dplat);
2622                     }
2623                 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2624                     bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2625                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2626                     bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2627                 }
2628             }
2629         }
2630     }
2631     // mark all bound.
2632     for (auto& b : boundObjects_) {
2633         b.dirty = false;
2634     }
2635 }
2636 
2637 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2638 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2639 {
2640     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2641         const string_view& debugName = renderCommandContext.debugName;
2642         if (timers_.count(debugName) == 0) { // new timers
2643 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2644             PerfDataSet& perfDataSet = timers_[debugName];
2645             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2646             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2647             perfDataSet.counter = 0u;
2648 #else
2649             timers_.insert({ debugName, {} });
2650 #endif
2651         }
2652     }
2653 }
2654 
EndFrameTimers()2655 void RenderBackendGLES::EndFrameTimers()
2656 {
2657     int64_t fullGpuTime = 0;
2658 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2659     // already in micros
2660     fullGpuTime = fullGpuCounter_;
2661     fullGpuCounter_ = 0;
2662 #endif
2663     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2664             CORE_NS::GetInstance<CORE_NS ::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2665         globalPerfData) {
2666         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2667         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2668         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2669         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2670         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2671         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2672         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2673     }
2674 }
2675 
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2676 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2677 {
2678 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2679     int64_t gpuMicroSeconds = 0;
2680     if (validGpuQueries_) {
2681         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2682         PLUGIN_ASSERT(gpuQuery);
2683 
2684         gpuQuery->NextQueryIndex();
2685 
2686         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2687         PLUGIN_ASSERT(platData.queryObject);
2688 
2689         GLint disjointOccurred = 0;
2690 #ifdef GL_GPU_DISJOINT_EXT
2691         // Clear disjoint error
2692         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2693 #endif
2694         if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2695             GLuint64 gpuNanoSeconds = 0U;
2696 #ifdef GL_GPU_DISJOINT_EXT
2697             glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2698 #else
2699             glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2700 #endif
2701             static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2702             gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2703             if (gpuMicroSeconds > UINT32_MAX) {
2704                 gpuMicroSeconds = 0;
2705             }
2706             fullGpuCounter_ += gpuMicroSeconds;
2707         } else if (disjointOccurred) {
2708             PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2709         }
2710     }
2711 #endif
2712     const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2713 
2714     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2715             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2716         globalPerfData) {
2717         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2718 
2719         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2720 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2721         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2722 #endif
2723         perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2724         perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2725         perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2726         perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2727         perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2728         perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2729         perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2730         perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2731         perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2732         perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2733         perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2734     }
2735 }
2736 #endif
2737 
PrimeDepthStencilState(const GraphicsState & graphicsState)2738 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2739 {
2740     auto& cDepth = cacheState_.depthStencilState;
2741     cDepth = graphicsState.depthStencilState;
2742     // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2743     SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2744     SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2745     glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2746     glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2747     const uint32_t updateAllFlags =
2748         (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2749             StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2750     SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2751 }
2752 
PrimeBlendState(const GraphicsState & graphicsState)2753 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2754 {
2755     auto& cBlend = cacheState_.colorBlendState;
2756     cBlend = graphicsState.colorBlendState;
2757     glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2758         cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2759     GLuint maxColorAttachments;
2760     glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2761     maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2762     for (GLuint i = 0; i < maxColorAttachments; i++) {
2763         const auto& cBlendState = cBlend.colorAttachments[i];
2764         glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2765             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2766             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2767             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2768         if (cBlendState.enableBlend) {
2769             glEnablei(GL_BLEND, i);
2770         } else {
2771             glDisablei(GL_BLEND, i);
2772         }
2773         glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2774             GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2775             GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2776         glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2777     }
2778     // logicops are unsupported on GLES
2779 }
2780 
PrimeCache(const GraphicsState & graphicsState)2781 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2782 {
2783     if (cachePrimed_) {
2784         return;
2785     }
2786     cachePrimed_ = true;
2787     /// GRAPHICSSTATE     inputAssembly
2788     const auto& ia = graphicsState.inputAssembly;
2789     auto& cia = cacheState_.inputAssembly;
2790     cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2791     SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2792     topology_ = ia.primitiveTopology;
2793     /// GRAPHICSSTATE     rasterizationState
2794     const auto& rs = graphicsState.rasterizationState;
2795     auto& crs = cacheState_.rasterizationState;
2796     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2797     polygonMode_ = rs.polygonMode;
2798     // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED    CHECK GLES 3.2
2799     crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2800     SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2801     crs.enableDepthBias = rs.enableDepthBias;
2802     SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2803     crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2804     crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2805     glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2806     // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2807     // If cull mode Flags change...
2808     crs.cullModeFlags = rs.cullModeFlags;
2809     SetCullMode(crs);
2810     crs.frontFace = rs.frontFace;
2811     SetFrontFace(crs);
2812     crs.lineWidth = rs.lineWidth;
2813     glLineWidth(rs.lineWidth);
2814     PrimeDepthStencilState(graphicsState);
2815     PrimeBlendState(graphicsState);
2816 }
2817 
UpdateDepthState(const GraphicsState & graphicsState)2818 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2819 {
2820     const auto& depth = graphicsState.depthStencilState;
2821     auto& cDepth = cacheState_.depthStencilState;
2822     if (depth.enableDepthTest != cDepth.enableDepthTest) {
2823         cDepth.enableDepthTest = depth.enableDepthTest;
2824         SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2825     }
2826     if (depth.depthCompareOp != cDepth.depthCompareOp) {
2827         cDepth.depthCompareOp = depth.depthCompareOp;
2828         glDepthFunc(GetCompareOp(depth.depthCompareOp));
2829     }
2830     if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2831         cDepth.enableDepthWrite = depth.enableDepthWrite;
2832         glDepthMask((depth.enableDepthWrite == GL_TRUE));
2833     }
2834     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2835         // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2836     }
2837 }
2838 
UpdateStencilState(const GraphicsState & graphicsState)2839 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2840 {
2841     const auto& depth = graphicsState.depthStencilState;
2842     auto& cDepth = cacheState_.depthStencilState;
2843     if (depth.enableStencilTest != cDepth.enableStencilTest) {
2844         cDepth.enableStencilTest = depth.enableStencilTest;
2845         SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2846     }
2847     uint32_t setFront = 0;
2848     uint32_t setBack = 0;
2849     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2850         if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2851             setFront |= StencilSetFlags::SETREFERENCE;
2852         }
2853         if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2854             setBack |= StencilSetFlags::SETREFERENCE;
2855         }
2856     }
2857     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2858         if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2859             setFront |= StencilSetFlags::SETCOMPAREMASK;
2860         }
2861         if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2862             setBack |= StencilSetFlags::SETCOMPAREMASK;
2863         }
2864     }
2865     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2866         if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2867             setFront |= StencilSetFlags::SETWRITEMASK;
2868         }
2869         if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2870             setBack |= StencilSetFlags::SETWRITEMASK;
2871         }
2872     }
2873     if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2874         setFront |= StencilSetFlags::SETCOMPAREOP;
2875     }
2876     if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2877         setBack |= StencilSetFlags::SETCOMPAREOP;
2878     }
2879     if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2880         setFront |= StencilSetFlags::SETOP;
2881     }
2882     if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2883         setBack |= StencilSetFlags::SETOP;
2884     }
2885     SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2886 }
2887 
UpdateDepthStencilState(const GraphicsState & graphicsState)2888 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2889 {
2890     UpdateDepthState(graphicsState);
2891     UpdateStencilState(graphicsState);
2892 }
2893 
UpdateBlendState(const GraphicsState & graphicsState)2894 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2895 {
2896     const auto& blend = graphicsState.colorBlendState;
2897     auto& cBlend = cacheState_.colorBlendState;
2898     for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2899         const auto& blendState = blend.colorAttachments[i];
2900         auto& cBlendState = cBlend.colorAttachments[i];
2901         if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2902             cBlendState.colorWriteMask = blendState.colorWriteMask;
2903             glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2904                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2905                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2906                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2907         }
2908 
2909         // Check if blend state has changed
2910         bool factorsChanged = false;
2911         bool opsChanged = false;
2912 
2913         if (blendState.enableBlend) {
2914             factorsChanged = !CompareBlendFactors(cBlendState, blendState);
2915             opsChanged = !CompareBlendOps(cBlendState, blendState);
2916         }
2917 
2918         if (blendState.enableBlend != cBlendState.enableBlend || factorsChanged || opsChanged) {
2919             cBlendState.enableBlend = blendState.enableBlend;
2920             if (blendState.enableBlend) {
2921                 glEnablei(GL_BLEND, i);
2922                 if (factorsChanged) {
2923                     SetBlendFactors(cBlendState, blendState);
2924                     glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2925                         GetBlendFactor(cBlendState.dstColorBlendFactor),
2926                         GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2927                         GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2928                 }
2929                 if (opsChanged) {
2930                     SetBlendOps(cBlendState, blendState);
2931                     glBlendEquationSeparatei(
2932                         i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2933                 }
2934             } else {
2935                 glDisablei(GL_BLEND, i);
2936             }
2937         }
2938     }
2939     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
2940         if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
2941             Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
2942             glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
2943                 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
2944         }
2945     }
2946     // logicOps in blend not supported on GLES
2947 }
2948 
UpdateRasterizationState(const GraphicsState & graphicsState)2949 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
2950 {
2951     const auto& rs = graphicsState.rasterizationState;
2952     auto& crs = cacheState_.rasterizationState;
2953     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2954     polygonMode_ = rs.polygonMode;
2955 #if RENDER_HAS_GL_BACKEND
2956     if (rs.polygonMode != crs.polygonMode) {
2957         crs.polygonMode = rs.polygonMode;
2958         SetPolygonMode(rs);
2959     }
2960 #endif
2961     if (rs.enableDepthClamp != crs.enableDepthClamp) {
2962         crs.enableDepthClamp = rs.enableDepthClamp;
2963         // NOT SUPPORTED    (needs an extension)
2964     }
2965     if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
2966         crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2967         SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2968     }
2969     if (rs.enableDepthBias != crs.enableDepthBias) {
2970         crs.enableDepthBias = rs.enableDepthBias;
2971         SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2972     }
2973     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
2974         if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
2975             (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
2976             crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2977             crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2978             glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2979         }
2980         // depthBiasClamp NOT SUPPORTED    (needs an extension)
2981     }
2982     // If cull mode Flags change...
2983     if (rs.cullModeFlags != crs.cullModeFlags) {
2984         crs.cullModeFlags = rs.cullModeFlags;
2985         SetCullMode(crs);
2986     }
2987     auto frontFace = rs.frontFace;
2988     if (!renderingToDefaultFbo_) {
2989         // Flip winding for default fbo.
2990         if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
2991             frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
2992         } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
2993             frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
2994         }
2995     }
2996     if (frontFace != crs.frontFace) {
2997         crs.frontFace = frontFace;
2998         SetFrontFace(crs);
2999     }
3000     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3001         if (rs.lineWidth != crs.lineWidth) {
3002             crs.lineWidth = rs.lineWidth;
3003             glLineWidth(rs.lineWidth);
3004         }
3005     }
3006 }
3007 
DoGraphicsState(const GraphicsState & graphicsState)3008 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3009 {
3010     /// GRAPHICSSTATE     inputAssembly
3011     const auto& ia = graphicsState.inputAssembly;
3012     if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3013         auto& cia = cacheState_.inputAssembly;
3014         cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3015         SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3016     }
3017     topology_ = ia.primitiveTopology;
3018     UpdateRasterizationState(graphicsState);
3019     UpdateDepthStencilState(graphicsState);
3020     UpdateBlendState(graphicsState);
3021 }
3022 
SetViewport(const RenderPassDesc::RenderArea & ra,const ViewportDesc & vd)3023 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3024 {
3025     // NOTE: viewportdesc is in floats?!?
3026     bool forceV = false;
3027     bool forceD = false;
3028     if (!viewportPrimed_) {
3029         viewportPrimed_ = true;
3030         forceV = true;
3031         forceD = true;
3032     }
3033     if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3034         (vd.height != viewport_.height)) {
3035         forceV = true;
3036     }
3037     if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3038         forceD = true;
3039     }
3040 
3041     if (forceV) {
3042         viewport_.x = vd.x;
3043         viewport_.y = vd.y;
3044         viewport_.width = vd.width;
3045         viewport_.height = vd.height;
3046         viewportUpdated_ = true;
3047     }
3048     if (forceD) {
3049         viewport_.minDepth = vd.minDepth;
3050         viewport_.maxDepth = vd.maxDepth;
3051         viewportDepthRangeUpdated_ = true;
3052     }
3053 }
3054 
SetScissor(const RenderPassDesc::RenderArea & ra,const ScissorDesc & sd)3055 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3056 {
3057     // NOTE: scissordesc is in floats?!?
3058     bool force = false;
3059     if (!scissorPrimed_) {
3060         scissorPrimed_ = true;
3061         force = true;
3062     }
3063     if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3064         (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3065         force = true;
3066     }
3067     if (force) {
3068         scissorBox_ = sd;
3069         scissorBoxUpdated_ = true;
3070     }
3071 }
3072 
FlushViewportScissors()3073 void RenderBackendGLES::FlushViewportScissors()
3074 {
3075     if (!currentFrameBuffer_) {
3076         return;
3077     }
3078     bool force = false;
3079     if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3080         force = true;
3081         scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3082     }
3083     if ((viewportUpdated_) || (force)) {
3084         viewportUpdated_ = false;
3085         // Handle top-left / bottom-left origin conversion
3086         PLUGIN_ASSERT(currentFrameBuffer_);
3087         GLint y = static_cast<GLint>(viewport_.y);
3088         const GLsizei h = static_cast<GLsizei>(viewport_.height);
3089         if (renderingToDefaultFbo_) {
3090             const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3091             y = fh - (y + h);
3092         }
3093         glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3094     }
3095     if ((scissorBoxUpdated_) || (force)) {
3096         scissorBoxUpdated_ = false;
3097         // Handle top-left / bottom-left origin conversion
3098         GLint y = static_cast<GLint>(scissorBox_.offsetY);
3099         const GLsizei h = static_cast<GLsizei>(scissorBox_.extentHeight);
3100         if (renderingToDefaultFbo_) {
3101             const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3102             y = fh - (y + h);
3103         }
3104         glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3105     }
3106     if (viewportDepthRangeUpdated_) {
3107         viewportDepthRangeUpdated_ = false;
3108         glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3109     }
3110 }
3111 RENDER_END_NAMESPACE()
3112