1 /*
2  * Copyright (c) 2022-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifdef ENABLE_OPENCL
17 #include <securec.h>
18 #endif // ENABLE_OPENCL
19 
20 #ifdef USE_ROSEN_DRAWING
21 #include "include/core/SkData.h"
22 #include "image/bitmap.h"
23 #include "utils/data.h"
24 #endif
25 
26 #include "base/log/ace_trace.h"
27 #include "base/thread/background_task_executor.h"
28 #include "core/image/image_file_cache.h"
29 #include "core/image/image_compressor.h"
30 
31 namespace OHOS::Ace {
load(void)32 __attribute__((constructor)) void load(void)
33 {
34 #ifdef ENABLE_OPENCL
35 #ifdef __MUSL__
36     OHOS::InitOpenCL();
37 #endif
38 #endif
39 }
40 
41 std::shared_ptr<ImageCompressor> ImageCompressor::instance_ = nullptr;
42 std::mutex ImageCompressor::instanceMutex_;
GetInstance()43 std::shared_ptr<ImageCompressor> ImageCompressor::GetInstance()
44 {
45     if (instance_ == nullptr) {
46         std::lock_guard<std::mutex> lock(instanceMutex_);
47         if (instance_ == nullptr) {
48             instance_.reset(new ImageCompressor());
49             instance_->Init();
50         }
51     }
52     return instance_;
53 }
54 
Init()55 void ImageCompressor::Init()
56 {
57 #ifdef ENABLE_OPENCL
58     switch_ = SystemProperties::IsAstcEnabled();
59     if (switch_) {
60         clOk_ = OHOS::InitOpenCL();
61         maxErr_ = SystemProperties::GetAstcMaxError();
62         psnr_ = SystemProperties::GetAstcPsnr();
63         InitPartition();
64         InitRecords();
65     }
66 #endif // ENABLE_OPENCL
67 }
68 
CanCompress()69 bool ImageCompressor::CanCompress()
70 {
71 #ifdef UPLOAD_GPU_DISABLED
72     return false;
73 #else
74     if (switch_ && clOk_) {
75         return true;
76     }
77     return false;
78 #endif
79 }
80 
81 #ifdef ENABLE_OPENCL
LoadShaderBin(cl_context context,cl_device_id device_id)82 cl_program ImageCompressor::LoadShaderBin(cl_context context, cl_device_id device_id)
83 {
84     ACE_FUNCTION_TRACE();
85     std::unique_ptr<FILE, decltype(&fclose)> file(fopen(shader_path_.c_str(), "rb"), fclose);
86     if (!file) {
87         LOGE("load cl shader failed");
88         return nullptr;
89     }
90 #ifndef USE_ROSEN_DRAWING
91     auto data = SkData::MakeFromFILE(file.get());
92 #else
93     auto data = SkData::MakeFromFILE(file.get());
94 #endif
95     if (!data) {
96         return nullptr;
97     }
98     cl_int err;
99     size_t len = data->size();
100     auto ptr = (const unsigned char*) data->data();
101     cl_program p = clCreateProgramWithBinary(context, 1, &device_id, &len, &ptr, NULL, &err);
102     if (err) {
103         return nullptr;
104     }
105     return p;
106 }
107 
CreateKernel()108 bool ImageCompressor::CreateKernel()
109 {
110     if (!context_ || !kernel_) {
111         cl_int err;
112         cl_platform_id platform_id;
113         cl_device_id device_id;
114         clGetPlatformIDs(1, &platform_id, NULL);
115         clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
116         context_ = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
117         queue_ = clCreateCommandQueueWithProperties(context_, device_id, 0, &err);
118 
119         cl_program program = LoadShaderBin(context_, device_id);
120         clBuildProgram(program, 1, &device_id, compileOption_.c_str(), NULL, NULL);
121         ACE_SCOPED_TRACE("clCreateKernel");
122         kernel_ = clCreateKernel(program, "astc", &err);
123         clReleaseProgram(program);
124     }
125     if (!context_ || !kernel_ || !queue_) {
126         ReleaseResource();
127         LOGE("build opencl program failed");
128         clOk_ = false;
129         return false;
130     }
131     refCount_++;
132     return true;
133 }
134 
CheckImageQuality(std::string key,uint32_t sumErr,uint32_t maxErr,int32_t width,int32_t height)135 bool ImageCompressor::CheckImageQuality(std::string key, uint32_t sumErr, uint32_t maxErr, int32_t width, int32_t height)
136 {
137     bool isOk = true;
138     float mse = (float)sumErr / (width * height);
139     float psnr = 10 * log10(255 * 255 / mse);
140     if (maxErr == 0 || psnr == 0 || maxErr > static_cast<uint32_t>(maxErr_)|| static_cast<int32_t>(psnr) < psnr_) {
141         isOk = false;
142         std::lock_guard<std::mutex> mLock(recordsMutex_);
143         failedRecords_.insert(key);
144     }
145     LOGI("compress quality %{public}s [%{public}u, %{public}.2f] size(%{public}d×%{public}d) %{public}s",
146         key.c_str(), maxErr, psnr, width, height, isOk ? "ok" : "no");
147     return isOk;
148 }
149 
ReleaseResource()150 void ImageCompressor::ReleaseResource()
151 {
152     ACE_FUNCTION_TRACE();
153     clReleaseKernel(kernel_);
154     kernel_ = NULL;
155     clReleaseCommandQueue(queue_);
156     queue_ = NULL;
157     clReleaseContext(context_);
158     context_ = NULL;
159 }
160 #endif // ENABLE_OPENCL
161 
162 #ifndef USE_ROSEN_DRAWING
GpuCompress(std::string key,SkPixmap & pixmap,int32_t width,int32_t height)163 sk_sp<SkData> ImageCompressor::GpuCompress(std::string key, SkPixmap& pixmap, int32_t width, int32_t height)
164 #else
165 std::shared_ptr<RSData> ImageCompressor::GpuCompress(std::string key, RSBitmap& bitmap, int32_t width, int32_t height)
166 #endif
167 {
168 #ifdef ENABLE_OPENCL
169     std::lock_guard<std::mutex> lock(instanceMutex_);
170     if (width <= 0 || height <= 0 || !clOk_ || IsFailedImage(key) || width > maxSize_ || height > maxSize_) {
171         return nullptr;
172     }
173     if (!CreateKernel()) {
174         return nullptr;
175     }
176     ACE_SCOPED_TRACE("GpuCompress %d×%d", width, height);
177 
178     cl_int err;
179 
180     // Number of work items in each local work group
181     int32_t blockX = ceil((width + DIM - 1) / DIM);
182     int32_t blockY = ceil((height + DIM - 1) / DIM);
183     int32_t numBlocks = blockX * blockY;
184     uint32_t uWidth = static_cast<uint32_t>(width);
185     uint32_t uHeight = static_cast<uint32_t>(height);
186     size_t local[] = { DIM, DIM };
187     size_t global[2];
188     global[0] = (uWidth % local[0] == 0 ? uWidth : (uWidth + local[0] - uWidth % local[0]));
189     global[1] = (uHeight % local[1] == 0 ? uHeight : (uHeight + local[1] - uHeight % local[1]));
190 
191     size_t astc_size = static_cast<size_t>(numBlocks) * DIM * DIM;
192 
193     cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
194     cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height };
195     cl_mem inputImage = clCreateImage(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
196 #ifndef USE_ROSEN_DRAWING
197         &image_format, &desc, const_cast<void*>(pixmap.addr()), &err);
198 #else
199         &image_format, &desc, bitmap.GetPixels(), &err);
200 #endif
201     cl_mem astcResult = clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, astc_size, NULL, &err);
202     cl_mem partInfos = clCreateBuffer(context_, CL_MEM_COPY_HOST_PTR,
203         sizeof(PartInfo) * parts_.size(), &parts_[0], &err);
204 
205     uint32_t* blockErrs = new uint32_t[numBlocks]{0};
206     cl_mem clErrs = clCreateBuffer(context_, CL_MEM_USE_HOST_PTR, sizeof(uint32_t) * numBlocks, blockErrs, &err);
207     err |= clSetKernelArg(kernel_, 0, sizeof(cl_mem), &inputImage);
208     err |= clSetKernelArg(kernel_, 1, sizeof(cl_mem), &astcResult);
209     err |= clSetKernelArg(kernel_, 2, sizeof(cl_mem), &partInfos);
210     err |= clSetKernelArg(kernel_, 3, sizeof(cl_mem), &clErrs);
211 
212     err = clEnqueueNDRangeKernel(queue_, kernel_, 2, NULL, global, local, 0, NULL, NULL);
213 
214     clFinish(queue_);
215 
216     uint32_t max_val = 0, sum_val = 0;
217     err = clEnqueueReadBuffer(queue_, clErrs, CL_TRUE, 0, sizeof(uint32_t) * numBlocks, blockErrs, 0, NULL, NULL);
218     for (int32_t i = 0; i < numBlocks; i++) {
219         sum_val += blockErrs[i];
220         max_val = fmax(max_val, blockErrs[i]);
221     }
222 
223     clReleaseMemObject(inputImage);
224     clReleaseMemObject(partInfos);
225     clReleaseMemObject(clErrs);
226     delete[] blockErrs;
227 
228     if (!CheckImageQuality(key, sum_val, max_val, width, height)) {
229         clReleaseMemObject(astcResult);
230         return nullptr;
231     }
232 
233 #ifndef USE_ROSEN_DRAWING
234     auto astc_data = SkData::MakeUninitialized(astc_size);
235     clEnqueueReadBuffer(queue_, astcResult, CL_TRUE, 0, astc_size, astc_data->writable_data(), 0, NULL, NULL);
236 #else
237     auto astc_data = std::make_shared<RSData>();
238     astc_data->BuildUninitialized(astc_size);
239     clEnqueueReadBuffer(queue_, astcResult, CL_TRUE, 0, astc_size, astc_data->WritableData(), 0, NULL, NULL);
240 #endif
241     clReleaseMemObject(astcResult);
242     return astc_data;
243 #else
244     return nullptr;
245 #endif // ENABLE_OPENCL
246 }
247 
248 
ScheduleReleaseTask()249 std::function<void()> ImageCompressor::ScheduleReleaseTask()
250 {
251 #ifdef ENABLE_OPENCL
252     std::function<void()> task = [this]() {
253         if (refCount_ > 0 && clOk_) {
254             refCount_--;
255             if (refCount_ <= 0) {
256                 this->ReleaseResource();
257 
258                 // save failed records
259                 std::ofstream saveFile(recordsPath_);
260                 if (!saveFile.is_open()) {
261                     return;
262                 }
263                 std::lock_guard<std::mutex> mLock(recordsMutex_);
264                 for (auto s : failedRecords_) {
265                     saveFile << s << "\n";
266                 }
267                 saveFile.close();
268             }
269         }
270 #else
271     std::function<void()> task = []() {
272 #endif // ENABLE_OPENCL
273     };
274 
275     return task;
276 }
277 
278 #ifndef USE_ROSEN_DRAWING
279 void ImageCompressor::WriteToFile(std::string srcKey, sk_sp<SkData> compressedData, Size imgSize)
280 #else
281 void ImageCompressor::WriteToFile(std::string srcKey, std::shared_ptr<RSData> compressedData, Size imgSize)
282 #endif
283 {
284     if (!compressedData || srcKey.empty()) {
285         return;
286     }
287 #ifdef ENABLE_OPENCL
288     BackgroundTaskExecutor::GetInstance().PostTask(
289         [srcKey, compressedData, imgSize]() {
290             AstcHeader header;
291             uint32_t xsize = static_cast<uint32_t>(imgSize.Width());
292             uint32_t ysize = static_cast<uint32_t>(imgSize.Height());
293             header.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
294             header.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
295             header.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
296             header.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
297             header.blockdimX = DIM;
298             header.blockdimY = DIM;
299             header.blockdimZ = 1;
300             header.xsize[0] = xsize & 0xFF;
301             header.xsize[1] = (xsize >> 8) & 0xFF;
302             header.xsize[2] = (xsize >> 16) & 0xFF;
303             header.ysize[0] = ysize & 0xFF;
304             header.ysize[1] = (ysize >> 8) & 0xFF;
305             header.ysize[2] = (ysize >> 16) & 0xFF;
306             header.zsize[0] = 1;
307             header.zsize[1] = 0;
308             header.zsize[2] = 0;
309 
310 #ifndef USE_ROSEN_DRAWING
311             int32_t fileSize = compressedData->size() + sizeof(header);
312             sk_sp<SkData> toWrite = SkData::MakeUninitialized(fileSize);
313             uint8_t* toWritePtr = (uint8_t*) toWrite->writable_data();
314 #else
315             int32_t fileSize = compressedData->GetSize() + sizeof(header);
316             auto toWrite = std::shared_ptr<RSData>();
317             toWrite->BuildUninitialized(fileSize);
318             uint8_t* toWritePtr = (uint8_t*) toWrite->WritableData();
319 #endif
320             if (memcpy_s(toWritePtr, fileSize, &header, sizeof(header)) != EOK) {
321                 LOGE("astc write file failed");
322                 return;
323             }
324 #ifndef USE_ROSEN_DRAWING
325             if (memcpy_s(toWritePtr + sizeof(header), compressedData->size(),
326                     compressedData->data(), compressedData->size()) != EOK) {
327 #else
328             if (memcpy_s(toWritePtr + sizeof(header), compressedData->GetSize(),
329                     compressedData->GetData(), compressedData->GetSize()) != EOK) {
330 #endif
331                 LOGE("astc write file failed");
332                 return;
333             }
334 
335             ImageFileCache::GetInstance().WriteCacheFile(srcKey, toWritePtr, fileSize, ".astc");
336         }, BgTaskPriority::LOW);
337 #endif
338 }
339 
340 #ifndef USE_ROSEN_DRAWING
341 sk_sp<SkData> ImageCompressor::StripFileHeader(sk_sp<SkData> fileData)
342 {
343     if (fileData) {
344         auto imageData = SkData::MakeSubset(fileData.get(), sizeof(AstcHeader), fileData->size() - sizeof(AstcHeader));
345         if (!imageData->isEmpty()) {
346             return imageData;
347         }
348     }
349     return nullptr;
350 }
351 #else
352 std::shared_ptr<RSData> ImageCompressor::StripFileHeader(std::shared_ptr<RSData> fileData)
353 {
354     if (fileData) {
355         if (fileData->GetSize() <= sizeof(AstcHeader)) {
356             return nullptr;
357         }
358         auto imageData = std::make_shared<RSData>();
359         if (imageData->BuildWithCopy(static_cast<const uint8_t*>(fileData->GetData()) + sizeof(AstcHeader),
360             fileData->GetSize() - sizeof(AstcHeader))) {
361             return imageData;
362         }
363     }
364     return nullptr;
365 }
366 #endif
367 
368 /**
369  * @brief Hash function used for procedural partition assignment.
370  *
371  * @param seed The hash seed.
372  *
373  * @return The hashed value.
374  */
375 static uint32_t Hash52(uint32_t seed)
376 {
377     seed ^= seed >> 15;
378 
379     // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
380     seed *= 0xEEDE0891;
381     seed ^= seed >> 5;
382     seed += seed << 16;
383     seed ^= seed >> 7;
384     seed ^= seed >> 3;
385     seed ^= seed << 6;
386     seed ^= seed >> 17;
387     return seed;
388 }
389 
390 /**
391  * @brief Select texel assignment for a single coordinate.
392  *
393  * @param seed              The seed - the partition index from the block.
394  * @param x                 The texel X coordinate in the block.
395  * @param y                 The texel Y coordinate in the block.
396  * @param z                 The texel Z coordinate in the block.
397  * @param partitionCount   The total partition count of this encoding.
398  * @param smallBlock       @c true if the blockhas fewer than 32 texels.
399  *
400  * @return The assigned partition index for this texel.
401  */
402 static uint8_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, int32_t partitionCount, bool smallBlock)
403 {
404     // For small blocks bias the coordinates to get better distribution
405     if (smallBlock) {
406         x *= 2;
407         y *= 2;
408         z *= 2;
409     }
410 
411     seed += (partitionCount - 1) * 1024;
412 
413     uint32_t num = Hash52(seed);
414 
415     uint8_t seed1 = num & 0xF;
416     uint8_t seed2 = (num >> 4) & 0xF;
417     uint8_t seed3 = (num >> 8) & 0xF;
418     uint8_t seed4 = (num >> 12) & 0xF;
419     uint8_t seed5 = (num >> 16) & 0xF;
420     uint8_t seed6 = (num >> 20) & 0xF;
421     uint8_t seed7 = (num >> 24) & 0xF;
422     uint8_t seed8 = (num >> 28) & 0xF;
423     uint8_t seed9 = (num >> 18) & 0xF;
424     uint8_t seed10 = (num >> 22) & 0xF;
425     uint8_t seed11 = (num >> 26) & 0xF;
426     uint8_t seed12 = ((num >> 30) | (num << 2)) & 0xF;
427 
428     // Squaring all the seeds in order to bias their distribution towards lower values.
429     seed1 *= seed1;
430     seed2 *= seed2;
431     seed3 *= seed3;
432     seed4 *= seed4;
433     seed5 *= seed5;
434     seed6 *= seed6;
435     seed7 *= seed7;
436     seed8 *= seed8;
437     seed9 *= seed9;
438     seed10 *= seed10;
439     seed11 *= seed11;
440     seed12 *= seed12;
441 
442     int32_t sh1, sh2;
443     if (seed & 1) {
444         sh1 = (seed & 2 ? 4 : 5);
445         sh2 = (partitionCount == 3 ? 6 : 5);
446     } else {
447         sh1 = (partitionCount == 3 ? 6 : 5);
448         sh2 = (seed & 2 ? 4 : 5);
449     }
450 
451     int32_t sh3 = (seed & 0x10) ? sh1 : sh2;
452 
453     seed1 >>= sh1;
454     seed2 >>= sh2;
455     seed3 >>= sh1;
456     seed4 >>= sh2;
457     seed5 >>= sh1;
458     seed6 >>= sh2;
459     seed7 >>= sh1;
460     seed8 >>= sh2;
461 
462     seed9 >>= sh3;
463     seed10 >>= sh3;
464     seed11 >>= sh3;
465     seed12 >>= sh3;
466 
467     uint32_t a = static_cast<uint32_t>(seed1 * x + seed2 * y + seed11 * z + (num >> 14));
468     uint32_t b = static_cast<uint32_t>(seed3 * x + seed4 * y + seed12 * z + (num >> 10));
469     uint32_t c = static_cast<uint32_t>(seed5 * x + seed6 * y + seed9 * z + (num >> 6));
470     uint32_t d = static_cast<uint32_t>(seed7 * x + seed8 * y + seed10 * z + (num >> 2));
471 
472     // Apply the saw
473     a &= 0x3F;
474     b &= 0x3F;
475     c &= 0x3F;
476     d &= 0x3F;
477 
478     // Remove some of the components if we are to output < 4 partitions_.
479     if (partitionCount <= 3) {
480         d = 0;
481     }
482 
483     if (partitionCount <= 2) {
484         c = 0;
485     }
486 
487     if (partitionCount <= 1) {
488         b = 0;
489     }
490 
491     uint8_t partition;
492     if (a >= b && a >= c && a >= d) {
493         partition = 0;
494     } else if (b >= c && b >= d) {
495         partition = 1;
496     } else if (c >= d) {
497         partition = 2;
498     } else {
499         partition = 3;
500     }
501 
502     return partition;
503 }
504 
505 bool ImageCompressor::InitPartitionInfo(PartInfo *partInfos, int32_t part_index, int32_t part_count)
506 {
507     int32_t texIdx = 0;
508     int32_t counts[4] = {0};
509     for (int32_t y = 0; y < DIM; y++) {
510         for (int32_t x = 0; x < DIM; x++) {
511             int32_t part = SelectPartition(part_index, x, y, 0, part_count, true);
512             partInfos->bitmaps[part] |= 1u << texIdx;
513             counts[part]++;
514             texIdx++;
515         }
516     }
517     int32_t realPartCount = 0;
518     if (counts[0] == 0) {
519         realPartCount = 0;
520     } else if (counts[1] == 0) {
521         realPartCount = 1;
522     } else if (counts[2] == 0) {
523         realPartCount = 2;
524     } else if (counts[3] == 0) {
525         realPartCount = 3;
526     } else {
527         realPartCount = 4;
528     }
529     if (realPartCount == part_count) {
530         return true;
531     }
532     return false;
533 }
534 
535 void ImageCompressor::InitPartition()
536 {
537     parts_.clear();
538     int32_t arrSize = sizeof(partitions_) / sizeof(partitions_[0]);
539     for (int32_t i = 0; i < arrSize; i++) {
540         PartInfo p = {};
541         if (InitPartitionInfo(&p, partitions_[i], 2)) {
542             p.partid = partitions_[i];
543             parts_.push_back(p);
544         }
545     }
546     compileOption_ = "-D PARTITION_SERACH_MAX=" + std::to_string(parts_.size());
547 }
548 
549 #ifdef ENABLE_OPENCL
550 bool ImageCompressor::IsFailedImage(std::string key)
551 {
552     std::lock_guard<std::mutex> mLock(recordsMutex_);
553     return failedRecords_.find(key) != failedRecords_.end();
554 }
555 #endif
556 
557 void ImageCompressor::InitRecords()
558 {
559     recordsPath_ = ImageFileCache::GetInstance().GetImageCacheFilePath("record") + ".txt";
560     std::ifstream openFile(recordsPath_);
561     if (!openFile.is_open()) {
562         return;
563     }
564     std::string line;
565     std::lock_guard<std::mutex> mLock(recordsMutex_);
566     while (!openFile.eof()) {
567         std::getline(openFile, line);
568         failedRecords_.insert(line);
569     }
570     openFile.close();
571 }
572 #ifdef FUZZTEST
573 void ImageCompressor::PartDoing()
574 {
575     InitPartition();
576     InitRecords();
577 }
578 #endif
579 } // namespace OHOS::Ace
580