1 /*
2  * Copyright (c) 2020-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H
17 #define GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H
18 
19 #include "graphic_config.h"
20 #ifdef ARM_NEON_OPT
21 #include <arm_neon.h>
22 
23 #include "gfx_utils/color.h"
24 #include "graphic_neon_utils.h"
25 
26 namespace OHOS {
27 using LoadBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a);
28 using LoadBufA = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a, uint8_t opa);
29 using NeonBlend = void (*)(uint8x8_t& r1, uint8x8_t& g1, uint8x8_t& b1, uint8x8_t& a1,
30                            uint8x8_t r2, uint8x8_t g2, uint8x8_t b2, uint8x8_t a2);
31 using StoreBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a);
32 
33 struct {
34     ColorMode dm;
35     LoadBuf loadDstFunc;
36     NeonBlend blendFunc;
37     StoreBuf storeDstFunc;
38 }
39 g_dstFunc[] = {
40     {ARGB8888, LoadBuf_ARGB8888, NeonBlendRGBA, StoreBuf_ARGB8888},
41     {XRGB8888, LoadBuf_XRGB8888, NeonBlendXRGB, StoreBuf_XRGB8888},
42     {RGB888, LoadBuf_RGB888, NeonBlendRGB, StoreBuf_RGB888},
43     {RGB565, LoadBuf_RGB565, NeonBlendRGB, StoreBuf_RGB565}
44 };
45 
46 struct {
47     ColorMode sm;
48     LoadBufA loadSrcFunc;
49 }
50 g_srcFunc[] = {
51     {ARGB8888, LoadBufA_ARGB8888},
52     {XRGB8888, LoadBufA_XRGB8888},
53     {RGB888, LoadBufA_RGB888},
54     {RGB565, LoadBufA_RGB565}
55 };
56 
57 class NeonBlendPipeLine {
58 public:
NeonBlendPipeLine()59     NeonBlendPipeLine() {}
~NeonBlendPipeLine()60     ~NeonBlendPipeLine() {}
61 
62     void Construct(ColorMode dm, ColorMode sm, void* srcColor = nullptr, uint8_t opa = OPA_OPAQUE)
63     {
64         int16_t dstNum = sizeof(g_dstFunc) / sizeof(g_dstFunc[0]);
65         for (int16_t i = 0; i < dstNum; ++i) {
66             if (g_dstFunc[i].dm == dm) {
67                 loadDstFunc_ = g_dstFunc[i].loadDstFunc;
68                 blendFunc_ = g_dstFunc[i].blendFunc;
69                 storeDstFunc_ = g_dstFunc[i].storeDstFunc;
70                 break;
71             }
72         }
73         int16_t srcNum = sizeof(g_srcFunc) / sizeof(g_srcFunc[0]);
74         for (int16_t i = 0; i < srcNum; ++i) {
75             if (g_srcFunc[i].sm == sm) {
76                 loadSrcFunc_ = g_srcFunc[i].loadSrcFunc;
77                 break;
78             }
79         }
80         if (srcColor != nullptr) {
81             ConstructSrcColor(sm, srcColor, opa, r2_, g2_, b2_, a2_);
82         }
83     }
84 
Invoke(uint8_t * dst,uint8_t * src,uint8_t opa)85     void Invoke(uint8_t* dst, uint8_t* src, uint8_t opa)
86     {
87         loadDstFunc_(dst, r1_, g1_, b1_, a1_);
88         loadSrcFunc_(src, r2_, g2_, b2_, a2_, opa);
89         blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_);
90         storeDstFunc_(dst, r1_, g1_, b1_, a1_);
91     }
92 
Invoke(uint8_t * dst)93     void Invoke(uint8_t* dst)
94     {
95         loadDstFunc_(dst, r1_, g1_, b1_, a1_);
96         blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_);
97         storeDstFunc_(dst, r1_, g1_, b1_, a1_);
98     }
99 
Invoke(uint8_t * dst,uint8x8_t & r,uint8x8_t & g,uint8x8_t & b,uint8x8_t & a)100     void Invoke(uint8_t* dst, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a)
101     {
102         loadDstFunc_(dst, r1_, g1_, b1_, a1_);
103         blendFunc_(r1_, g1_, b1_, a1_, r, g, b, a);
104         storeDstFunc_(dst, r1_, g1_, b1_, a1_);
105     }
106 
NeonPreLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t * covers)107     void NeonPreLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t* covers)
108     {
109         uint8x8x4_t vBuf = vld4_u8(buf);
110         uint8x8_t r0 = vBuf.val[NEON_R];
111         uint8x8_t g0 = vBuf.val[NEON_G];
112         uint8x8_t b0 = vBuf.val[NEON_B];
113         uint8x8_t a0 = vBuf.val[NEON_A];
114 
115         uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers));
116         uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers));
117         uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers));
118         uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers));
119 
120         uint8x8_t rs = NeonLerp(r0, r1, a1);
121         uint8x8_t gs = NeonLerp(g0, g1, a1);
122         uint8x8_t bs = NeonLerp(b0, b1, a1);
123         uint8x8_t as = NeonPreLerp(a0, a1, a1);
124 
125         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
126     }
NeonPrelerpARGB8888(uint8_t * buf,uint8_t red,uint8_t green,uint8_t blue,uint8_t alpha)127     void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha)
128     {
129         uint8x8x4_t vBuf = vld4_u8(buf);
130         uint8x8_t r0 = vBuf.val[NEON_R];
131         uint8x8_t g0 = vBuf.val[NEON_G];
132         uint8x8_t b0 = vBuf.val[NEON_B];
133         uint8x8_t a0 = vBuf.val[NEON_A];
134 
135         uint8x8_t r1 = vdup_n_u8(red);
136         uint8x8_t g1 = vdup_n_u8(green);
137         uint8x8_t b1 = vdup_n_u8(blue);
138         uint8x8_t a1 = vdup_n_u8(alpha);
139 
140         uint8x8_t rs = NeonPreLerp(r0, r1, a1);
141         uint8x8_t gs = NeonPreLerp(g0, g1, a1);
142         uint8x8_t bs = NeonPreLerp(b0, b1, a1);
143         uint8x8_t as = NeonPreLerp(a0, a1, a1);
144 
145         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
146     }
147 
NeonPrelerpARGB8888(uint8_t * buf,uint8_t red,uint8_t green,uint8_t blue,uint8_t alpha,uint8_t cover)148     void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha, uint8_t cover)
149     {
150         uint8x8x4_t vBuf = vld4_u8(buf);
151         uint8x8_t r0 = vBuf.val[NEON_R];
152         uint8x8_t g0 = vBuf.val[NEON_G];
153         uint8x8_t b0 = vBuf.val[NEON_B];
154         uint8x8_t a0 = vBuf.val[NEON_A];
155 
156         uint8x8_t r1 = Multipling(vdup_n_u8(red), vdup_n_u8(cover));
157         uint8x8_t g1 = Multipling(vdup_n_u8(green), vdup_n_u8(cover));
158         uint8x8_t b1 = Multipling(vdup_n_u8(blue), vdup_n_u8(cover));
159         uint8x8_t a1 = Multipling(vdup_n_u8(alpha), vdup_n_u8(cover));
160 
161         uint8x8_t rs = NeonPreLerp(r0, r1, a1);
162         uint8x8_t gs = NeonPreLerp(g0, g1, a1);
163         uint8x8_t bs = NeonPreLerp(b0, b1, a1);
164         uint8x8_t as = NeonPreLerp(a0, a1, a1);
165 
166         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
167     }
168 
NeonPrelerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t cover)169     void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover)
170     {
171         uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
172         uint8x8_t r0 = vDstBuf.val[NEON_R];
173         uint8x8_t g0 = vDstBuf.val[NEON_G];
174         uint8x8_t b0 = vDstBuf.val[NEON_B];
175         uint8x8_t a0 = vDstBuf.val[NEON_A];
176         uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
177         uint8x8_t r1 = vSrcBuf.val[NEON_R];
178         uint8x8_t g1 = vSrcBuf.val[NEON_G];
179         uint8x8_t b1 = vSrcBuf.val[NEON_B];
180         uint8x8_t a1 = vSrcBuf.val[NEON_A];
181 
182         r1 = Multipling(r1, vdup_n_u8(cover));
183         g1 = Multipling(g1, vdup_n_u8(cover));
184         b1 = Multipling(b1, vdup_n_u8(cover));
185         a1 = Multipling(a1, vdup_n_u8(cover));
186 
187         uint8x8_t rs = NeonPreLerp(r0, r1, a1);
188         uint8x8_t gs = NeonPreLerp(g0, g1, a1);
189         uint8x8_t bs = NeonPreLerp(b0, b1, a1);
190         uint8x8_t as = NeonPreLerp(a0, a1, a1);
191 
192         StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
193     }
194 
NeonPrelerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t * covers)195     void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers)
196     {
197         uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
198         uint8x8_t r0 = vDstBuf.val[NEON_R];
199         uint8x8_t g0 = vDstBuf.val[NEON_G];
200         uint8x8_t b0 = vDstBuf.val[NEON_B];
201         uint8x8_t a0 = vDstBuf.val[NEON_A];
202 
203         uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
204 
205         uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers));
206         uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers));
207         uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers));
208         uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers));
209 
210         uint8x8_t rs = NeonPreLerp(r0, r1, a1);
211         uint8x8_t gs = NeonPreLerp(g0, g1, a1);
212         uint8x8_t bs = NeonPreLerp(b0, b1, a1);
213         uint8x8_t as = NeonPreLerp(a0, a1, a1);
214 
215         StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
216     }
NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t * covers)217     void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a,
218                            uint8_t* covers)
219     {
220         uint8x8x4_t vBuf = vld4_u8(buf);
221         uint8x8_t r0 = vBuf.val[NEON_R];
222         uint8x8_t g0 = vBuf.val[NEON_G];
223         uint8x8_t b0 = vBuf.val[NEON_B];
224         uint8x8_t a0 = vBuf.val[NEON_A];
225 
226         uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers));
227         uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers));
228         uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers));
229         uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers));
230 
231         uint8x8_t rs = NeonLerp(r0, r1, a1);
232         uint8x8_t gs = NeonLerp(g0, g1, a1);
233         uint8x8_t bs = NeonLerp(b0, b1, a1);
234         uint8x8_t as = NeonPreLerp(a0, a1, a1);
235 
236         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
237     }
NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a)238     void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
239     {
240         uint8x8x4_t vBuf = vld4_u8(buf);
241         uint8x8_t r0 = vBuf.val[NEON_R];
242         uint8x8_t g0 = vBuf.val[NEON_G];
243         uint8x8_t b0 = vBuf.val[NEON_B];
244         uint8x8_t a0 = vBuf.val[NEON_A];
245 
246         uint8x8_t r1 = vdup_n_u8(r);
247         uint8x8_t g1 = vdup_n_u8(g);
248         uint8x8_t b1 = vdup_n_u8(b);
249         uint8x8_t a1 = vdup_n_u8(a);
250 
251         uint8x8_t rs = NeonLerp(r0, r1, a1);
252         uint8x8_t gs = NeonLerp(g0, g1, a1);
253         uint8x8_t bs = NeonLerp(b0, b1, a1);
254         uint8x8_t as = NeonPreLerp(a0, a1, a1);
255 
256         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
257     }
258 
NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t cover)259     void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t cover)
260     {
261         uint8x8x4_t vBuf = vld4_u8(buf);
262         uint8x8_t r0 = vBuf.val[NEON_R];
263         uint8x8_t g0 = vBuf.val[NEON_G];
264         uint8x8_t b0 = vBuf.val[NEON_B];
265         uint8x8_t a0 = vBuf.val[NEON_A];
266 
267         uint8x8_t r1 = Multipling(vdup_n_u8(r), vdup_n_u8(cover));
268         uint8x8_t g1 = Multipling(vdup_n_u8(g), vdup_n_u8(cover));
269         uint8x8_t b1 = Multipling(vdup_n_u8(b), vdup_n_u8(cover));
270         uint8x8_t a1 = Multipling(vdup_n_u8(a), vdup_n_u8(cover));
271 
272         uint8x8_t rs = NeonLerp(r0, r1, a1);
273         uint8x8_t gs = NeonLerp(g0, g1, a1);
274         uint8x8_t bs = NeonLerp(b0, b1, a1);
275         uint8x8_t as = NeonPreLerp(a0, a1, a1);
276 
277         StoreBuf_ARGB8888(buf, rs, gs, bs, as);
278     }
279 
NeonLerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t cover)280     void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover)
281     {
282         uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
283         uint8x8_t r0 = vDstBuf.val[NEON_R];
284         uint8x8_t g0 = vDstBuf.val[NEON_G];
285         uint8x8_t b0 = vDstBuf.val[NEON_B];
286         uint8x8_t a0 = vDstBuf.val[NEON_A];
287         uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
288         uint8x8_t r1 = vSrcBuf.val[NEON_R];
289         uint8x8_t g1 = vSrcBuf.val[NEON_G];
290         uint8x8_t b1 = vSrcBuf.val[NEON_B];
291         uint8x8_t a1 = vSrcBuf.val[NEON_A];
292 
293         r1 = Multipling(r1, vdup_n_u8(cover));
294         g1 = Multipling(g1, vdup_n_u8(cover));
295         b1 = Multipling(b1, vdup_n_u8(cover));
296         a1 = Multipling(a1, vdup_n_u8(cover));
297 
298         uint8x8_t rs = NeonLerp(r0, r1, a1);
299         uint8x8_t gs = NeonLerp(g0, g1, a1);
300         uint8x8_t bs = NeonLerp(b0, b1, a1);
301         uint8x8_t as = NeonPreLerp(a0, a1, a1);
302 
303         StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
304     }
305 
NeonLerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t * covers)306     void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers)
307     {
308         uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
309         uint8x8_t r0 = vDstBuf.val[NEON_R];
310         uint8x8_t g0 = vDstBuf.val[NEON_G];
311         uint8x8_t b0 = vDstBuf.val[NEON_B];
312         uint8x8_t a0 = vDstBuf.val[NEON_A];
313 
314         uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
315 
316         uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers));
317         uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers));
318         uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers));
319         uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers));
320 
321         uint8x8_t rs = NeonLerp(r0, r1, a1);
322         uint8x8_t gs = NeonLerp(g0, g1, a1);
323         uint8x8_t bs = NeonLerp(b0, b1, a1);
324         uint8x8_t as = NeonPreLerp(a0, a1, a1);
325 
326         StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
327     }
328 private:
ConstructSrcColor(ColorMode sm,void * srcColor,uint8_t opa,uint8x8_t & r,uint8x8_t & g,uint8x8_t & b,uint8x8_t & a)329     void ConstructSrcColor(ColorMode sm, void* srcColor, uint8_t opa,
330                            uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a)
331     {
332         if (sm == ARGB8888) {
333             Color32* color = reinterpret_cast<Color32*>(srcColor);
334             r = vdup_n_u8(color->red);
335             g = vdup_n_u8(color->green);
336             b = vdup_n_u8(color->blue);
337             a = NeonMulDiv255(vdup_n_u8(opa), vdup_n_u8(color->alpha));
338         } else if (sm == XRGB8888) {
339             Color32* color = reinterpret_cast<Color32*>(srcColor);
340             r = vdup_n_u8(color->red);
341             g = vdup_n_u8(color->green);
342             b = vdup_n_u8(color->blue);
343             a = vdup_n_u8(opa);
344         } else if (sm == RGB888) {
345             Color24* color = reinterpret_cast<Color24*>(srcColor);
346             r = vdup_n_u8(color->red);
347             g = vdup_n_u8(color->green);
348             b = vdup_n_u8(color->blue);
349             a = vdup_n_u8(opa);
350         } else if (sm == RGB565) {
351             Color16* color = reinterpret_cast<Color16*>(srcColor);
352             r = vdup_n_u8(color->red);
353             g = vdup_n_u8(color->green);
354             b = vdup_n_u8(color->blue);
355             a = vdup_n_u8(opa);
356         }
357     }
358 
359     LoadBuf loadDstFunc_ = nullptr;
360     LoadBufA loadSrcFunc_ = nullptr;
361     NeonBlend blendFunc_ = nullptr;
362     StoreBuf storeDstFunc_ = nullptr;
363     uint8x8_t r1_;
364     uint8x8_t g1_;
365     uint8x8_t b1_;
366     uint8x8_t a1_;
367     uint8x8_t r2_;
368     uint8x8_t g2_;
369     uint8x8_t b2_;
370     uint8x8_t a2_;
371 };
372 } // namespace OHOS
373 #endif
374 #endif
375