1 /* 2 * Copyright (c) 2020-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H 17 #define GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H 18 19 #include "graphic_config.h" 20 #ifdef ARM_NEON_OPT 21 #include <arm_neon.h> 22 23 #include "gfx_utils/color.h" 24 #include "graphic_neon_utils.h" 25 26 namespace OHOS { 27 using LoadBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a); 28 using LoadBufA = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a, uint8_t opa); 29 using NeonBlend = void (*)(uint8x8_t& r1, uint8x8_t& g1, uint8x8_t& b1, uint8x8_t& a1, 30 uint8x8_t r2, uint8x8_t g2, uint8x8_t b2, uint8x8_t a2); 31 using StoreBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a); 32 33 struct { 34 ColorMode dm; 35 LoadBuf loadDstFunc; 36 NeonBlend blendFunc; 37 StoreBuf storeDstFunc; 38 } 39 g_dstFunc[] = { 40 {ARGB8888, LoadBuf_ARGB8888, NeonBlendRGBA, StoreBuf_ARGB8888}, 41 {XRGB8888, LoadBuf_XRGB8888, NeonBlendXRGB, StoreBuf_XRGB8888}, 42 {RGB888, LoadBuf_RGB888, NeonBlendRGB, StoreBuf_RGB888}, 43 {RGB565, LoadBuf_RGB565, NeonBlendRGB, StoreBuf_RGB565} 44 }; 45 46 struct { 47 ColorMode sm; 48 LoadBufA loadSrcFunc; 49 } 50 g_srcFunc[] = { 51 {ARGB8888, LoadBufA_ARGB8888}, 52 {XRGB8888, LoadBufA_XRGB8888}, 53 {RGB888, LoadBufA_RGB888}, 54 {RGB565, LoadBufA_RGB565} 55 }; 56 57 class NeonBlendPipeLine { 58 public: NeonBlendPipeLine()59 NeonBlendPipeLine() {} ~NeonBlendPipeLine()60 ~NeonBlendPipeLine() {} 61 62 void Construct(ColorMode dm, ColorMode sm, void* srcColor = nullptr, uint8_t opa = OPA_OPAQUE) 63 { 64 int16_t dstNum = sizeof(g_dstFunc) / sizeof(g_dstFunc[0]); 65 for (int16_t i = 0; i < dstNum; ++i) { 66 if (g_dstFunc[i].dm == dm) { 67 loadDstFunc_ = g_dstFunc[i].loadDstFunc; 68 blendFunc_ = g_dstFunc[i].blendFunc; 69 storeDstFunc_ = g_dstFunc[i].storeDstFunc; 70 break; 71 } 72 } 73 int16_t srcNum = sizeof(g_srcFunc) / sizeof(g_srcFunc[0]); 74 for (int16_t i = 0; i < srcNum; ++i) { 75 if (g_srcFunc[i].sm == sm) { 76 loadSrcFunc_ = g_srcFunc[i].loadSrcFunc; 77 break; 78 } 79 } 80 if (srcColor != nullptr) { 81 ConstructSrcColor(sm, srcColor, opa, r2_, g2_, b2_, a2_); 82 } 83 } 84 Invoke(uint8_t * dst,uint8_t * src,uint8_t opa)85 void Invoke(uint8_t* dst, uint8_t* src, uint8_t opa) 86 { 87 loadDstFunc_(dst, r1_, g1_, b1_, a1_); 88 loadSrcFunc_(src, r2_, g2_, b2_, a2_, opa); 89 blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_); 90 storeDstFunc_(dst, r1_, g1_, b1_, a1_); 91 } 92 Invoke(uint8_t * dst)93 void Invoke(uint8_t* dst) 94 { 95 loadDstFunc_(dst, r1_, g1_, b1_, a1_); 96 blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_); 97 storeDstFunc_(dst, r1_, g1_, b1_, a1_); 98 } 99 Invoke(uint8_t * dst,uint8x8_t & r,uint8x8_t & g,uint8x8_t & b,uint8x8_t & a)100 void Invoke(uint8_t* dst, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a) 101 { 102 loadDstFunc_(dst, r1_, g1_, b1_, a1_); 103 blendFunc_(r1_, g1_, b1_, a1_, r, g, b, a); 104 storeDstFunc_(dst, r1_, g1_, b1_, a1_); 105 } 106 NeonPreLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t * covers)107 void NeonPreLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t* covers) 108 { 109 uint8x8x4_t vBuf = vld4_u8(buf); 110 uint8x8_t r0 = vBuf.val[NEON_R]; 111 uint8x8_t g0 = vBuf.val[NEON_G]; 112 uint8x8_t b0 = vBuf.val[NEON_B]; 113 uint8x8_t a0 = vBuf.val[NEON_A]; 114 115 uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers)); 116 uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers)); 117 uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers)); 118 uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers)); 119 120 uint8x8_t rs = NeonLerp(r0, r1, a1); 121 uint8x8_t gs = NeonLerp(g0, g1, a1); 122 uint8x8_t bs = NeonLerp(b0, b1, a1); 123 uint8x8_t as = NeonPreLerp(a0, a1, a1); 124 125 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 126 } NeonPrelerpARGB8888(uint8_t * buf,uint8_t red,uint8_t green,uint8_t blue,uint8_t alpha)127 void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) 128 { 129 uint8x8x4_t vBuf = vld4_u8(buf); 130 uint8x8_t r0 = vBuf.val[NEON_R]; 131 uint8x8_t g0 = vBuf.val[NEON_G]; 132 uint8x8_t b0 = vBuf.val[NEON_B]; 133 uint8x8_t a0 = vBuf.val[NEON_A]; 134 135 uint8x8_t r1 = vdup_n_u8(red); 136 uint8x8_t g1 = vdup_n_u8(green); 137 uint8x8_t b1 = vdup_n_u8(blue); 138 uint8x8_t a1 = vdup_n_u8(alpha); 139 140 uint8x8_t rs = NeonPreLerp(r0, r1, a1); 141 uint8x8_t gs = NeonPreLerp(g0, g1, a1); 142 uint8x8_t bs = NeonPreLerp(b0, b1, a1); 143 uint8x8_t as = NeonPreLerp(a0, a1, a1); 144 145 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 146 } 147 NeonPrelerpARGB8888(uint8_t * buf,uint8_t red,uint8_t green,uint8_t blue,uint8_t alpha,uint8_t cover)148 void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha, uint8_t cover) 149 { 150 uint8x8x4_t vBuf = vld4_u8(buf); 151 uint8x8_t r0 = vBuf.val[NEON_R]; 152 uint8x8_t g0 = vBuf.val[NEON_G]; 153 uint8x8_t b0 = vBuf.val[NEON_B]; 154 uint8x8_t a0 = vBuf.val[NEON_A]; 155 156 uint8x8_t r1 = Multipling(vdup_n_u8(red), vdup_n_u8(cover)); 157 uint8x8_t g1 = Multipling(vdup_n_u8(green), vdup_n_u8(cover)); 158 uint8x8_t b1 = Multipling(vdup_n_u8(blue), vdup_n_u8(cover)); 159 uint8x8_t a1 = Multipling(vdup_n_u8(alpha), vdup_n_u8(cover)); 160 161 uint8x8_t rs = NeonPreLerp(r0, r1, a1); 162 uint8x8_t gs = NeonPreLerp(g0, g1, a1); 163 uint8x8_t bs = NeonPreLerp(b0, b1, a1); 164 uint8x8_t as = NeonPreLerp(a0, a1, a1); 165 166 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 167 } 168 NeonPrelerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t cover)169 void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover) 170 { 171 uint8x8x4_t vDstBuf = vld4_u8(dstBuffer); 172 uint8x8_t r0 = vDstBuf.val[NEON_R]; 173 uint8x8_t g0 = vDstBuf.val[NEON_G]; 174 uint8x8_t b0 = vDstBuf.val[NEON_B]; 175 uint8x8_t a0 = vDstBuf.val[NEON_A]; 176 uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer); 177 uint8x8_t r1 = vSrcBuf.val[NEON_R]; 178 uint8x8_t g1 = vSrcBuf.val[NEON_G]; 179 uint8x8_t b1 = vSrcBuf.val[NEON_B]; 180 uint8x8_t a1 = vSrcBuf.val[NEON_A]; 181 182 r1 = Multipling(r1, vdup_n_u8(cover)); 183 g1 = Multipling(g1, vdup_n_u8(cover)); 184 b1 = Multipling(b1, vdup_n_u8(cover)); 185 a1 = Multipling(a1, vdup_n_u8(cover)); 186 187 uint8x8_t rs = NeonPreLerp(r0, r1, a1); 188 uint8x8_t gs = NeonPreLerp(g0, g1, a1); 189 uint8x8_t bs = NeonPreLerp(b0, b1, a1); 190 uint8x8_t as = NeonPreLerp(a0, a1, a1); 191 192 StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as); 193 } 194 NeonPrelerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t * covers)195 void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers) 196 { 197 uint8x8x4_t vDstBuf = vld4_u8(dstBuffer); 198 uint8x8_t r0 = vDstBuf.val[NEON_R]; 199 uint8x8_t g0 = vDstBuf.val[NEON_G]; 200 uint8x8_t b0 = vDstBuf.val[NEON_B]; 201 uint8x8_t a0 = vDstBuf.val[NEON_A]; 202 203 uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer); 204 205 uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers)); 206 uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers)); 207 uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers)); 208 uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers)); 209 210 uint8x8_t rs = NeonPreLerp(r0, r1, a1); 211 uint8x8_t gs = NeonPreLerp(g0, g1, a1); 212 uint8x8_t bs = NeonPreLerp(b0, b1, a1); 213 uint8x8_t as = NeonPreLerp(a0, a1, a1); 214 215 StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as); 216 } NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t * covers)217 void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, 218 uint8_t* covers) 219 { 220 uint8x8x4_t vBuf = vld4_u8(buf); 221 uint8x8_t r0 = vBuf.val[NEON_R]; 222 uint8x8_t g0 = vBuf.val[NEON_G]; 223 uint8x8_t b0 = vBuf.val[NEON_B]; 224 uint8x8_t a0 = vBuf.val[NEON_A]; 225 226 uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers)); 227 uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers)); 228 uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers)); 229 uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers)); 230 231 uint8x8_t rs = NeonLerp(r0, r1, a1); 232 uint8x8_t gs = NeonLerp(g0, g1, a1); 233 uint8x8_t bs = NeonLerp(b0, b1, a1); 234 uint8x8_t as = NeonPreLerp(a0, a1, a1); 235 236 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 237 } NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a)238 void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a) 239 { 240 uint8x8x4_t vBuf = vld4_u8(buf); 241 uint8x8_t r0 = vBuf.val[NEON_R]; 242 uint8x8_t g0 = vBuf.val[NEON_G]; 243 uint8x8_t b0 = vBuf.val[NEON_B]; 244 uint8x8_t a0 = vBuf.val[NEON_A]; 245 246 uint8x8_t r1 = vdup_n_u8(r); 247 uint8x8_t g1 = vdup_n_u8(g); 248 uint8x8_t b1 = vdup_n_u8(b); 249 uint8x8_t a1 = vdup_n_u8(a); 250 251 uint8x8_t rs = NeonLerp(r0, r1, a1); 252 uint8x8_t gs = NeonLerp(g0, g1, a1); 253 uint8x8_t bs = NeonLerp(b0, b1, a1); 254 uint8x8_t as = NeonPreLerp(a0, a1, a1); 255 256 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 257 } 258 NeonLerpARGB8888(uint8_t * buf,uint8_t r,uint8_t g,uint8_t b,uint8_t a,uint8_t cover)259 void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t cover) 260 { 261 uint8x8x4_t vBuf = vld4_u8(buf); 262 uint8x8_t r0 = vBuf.val[NEON_R]; 263 uint8x8_t g0 = vBuf.val[NEON_G]; 264 uint8x8_t b0 = vBuf.val[NEON_B]; 265 uint8x8_t a0 = vBuf.val[NEON_A]; 266 267 uint8x8_t r1 = Multipling(vdup_n_u8(r), vdup_n_u8(cover)); 268 uint8x8_t g1 = Multipling(vdup_n_u8(g), vdup_n_u8(cover)); 269 uint8x8_t b1 = Multipling(vdup_n_u8(b), vdup_n_u8(cover)); 270 uint8x8_t a1 = Multipling(vdup_n_u8(a), vdup_n_u8(cover)); 271 272 uint8x8_t rs = NeonLerp(r0, r1, a1); 273 uint8x8_t gs = NeonLerp(g0, g1, a1); 274 uint8x8_t bs = NeonLerp(b0, b1, a1); 275 uint8x8_t as = NeonPreLerp(a0, a1, a1); 276 277 StoreBuf_ARGB8888(buf, rs, gs, bs, as); 278 } 279 NeonLerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t cover)280 void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover) 281 { 282 uint8x8x4_t vDstBuf = vld4_u8(dstBuffer); 283 uint8x8_t r0 = vDstBuf.val[NEON_R]; 284 uint8x8_t g0 = vDstBuf.val[NEON_G]; 285 uint8x8_t b0 = vDstBuf.val[NEON_B]; 286 uint8x8_t a0 = vDstBuf.val[NEON_A]; 287 uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer); 288 uint8x8_t r1 = vSrcBuf.val[NEON_R]; 289 uint8x8_t g1 = vSrcBuf.val[NEON_G]; 290 uint8x8_t b1 = vSrcBuf.val[NEON_B]; 291 uint8x8_t a1 = vSrcBuf.val[NEON_A]; 292 293 r1 = Multipling(r1, vdup_n_u8(cover)); 294 g1 = Multipling(g1, vdup_n_u8(cover)); 295 b1 = Multipling(b1, vdup_n_u8(cover)); 296 a1 = Multipling(a1, vdup_n_u8(cover)); 297 298 uint8x8_t rs = NeonLerp(r0, r1, a1); 299 uint8x8_t gs = NeonLerp(g0, g1, a1); 300 uint8x8_t bs = NeonLerp(b0, b1, a1); 301 uint8x8_t as = NeonPreLerp(a0, a1, a1); 302 303 StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as); 304 } 305 NeonLerpARGB8888(uint8_t * dstBuffer,uint8_t * srcBuffer,uint8_t * covers)306 void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers) 307 { 308 uint8x8x4_t vDstBuf = vld4_u8(dstBuffer); 309 uint8x8_t r0 = vDstBuf.val[NEON_R]; 310 uint8x8_t g0 = vDstBuf.val[NEON_G]; 311 uint8x8_t b0 = vDstBuf.val[NEON_B]; 312 uint8x8_t a0 = vDstBuf.val[NEON_A]; 313 314 uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer); 315 316 uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers)); 317 uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers)); 318 uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers)); 319 uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers)); 320 321 uint8x8_t rs = NeonLerp(r0, r1, a1); 322 uint8x8_t gs = NeonLerp(g0, g1, a1); 323 uint8x8_t bs = NeonLerp(b0, b1, a1); 324 uint8x8_t as = NeonPreLerp(a0, a1, a1); 325 326 StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as); 327 } 328 private: ConstructSrcColor(ColorMode sm,void * srcColor,uint8_t opa,uint8x8_t & r,uint8x8_t & g,uint8x8_t & b,uint8x8_t & a)329 void ConstructSrcColor(ColorMode sm, void* srcColor, uint8_t opa, 330 uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a) 331 { 332 if (sm == ARGB8888) { 333 Color32* color = reinterpret_cast<Color32*>(srcColor); 334 r = vdup_n_u8(color->red); 335 g = vdup_n_u8(color->green); 336 b = vdup_n_u8(color->blue); 337 a = NeonMulDiv255(vdup_n_u8(opa), vdup_n_u8(color->alpha)); 338 } else if (sm == XRGB8888) { 339 Color32* color = reinterpret_cast<Color32*>(srcColor); 340 r = vdup_n_u8(color->red); 341 g = vdup_n_u8(color->green); 342 b = vdup_n_u8(color->blue); 343 a = vdup_n_u8(opa); 344 } else if (sm == RGB888) { 345 Color24* color = reinterpret_cast<Color24*>(srcColor); 346 r = vdup_n_u8(color->red); 347 g = vdup_n_u8(color->green); 348 b = vdup_n_u8(color->blue); 349 a = vdup_n_u8(opa); 350 } else if (sm == RGB565) { 351 Color16* color = reinterpret_cast<Color16*>(srcColor); 352 r = vdup_n_u8(color->red); 353 g = vdup_n_u8(color->green); 354 b = vdup_n_u8(color->blue); 355 a = vdup_n_u8(opa); 356 } 357 } 358 359 LoadBuf loadDstFunc_ = nullptr; 360 LoadBufA loadSrcFunc_ = nullptr; 361 NeonBlend blendFunc_ = nullptr; 362 StoreBuf storeDstFunc_ = nullptr; 363 uint8x8_t r1_; 364 uint8x8_t g1_; 365 uint8x8_t b1_; 366 uint8x8_t a1_; 367 uint8x8_t r2_; 368 uint8x8_t g2_; 369 uint8x8_t b2_; 370 uint8x8_t a2_; 371 }; 372 } // namespace OHOS 373 #endif 374 #endif 375