1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "util_helper.h"
17 
18 #include "native_engine.h"
19 #include "securec.h"
20 
21 namespace Commonlibrary::Platform {
CreateConverter(const std::string & encStr_,UErrorCode & codeflag)22     UConverter* CreateConverter(const std::string& encStr_, UErrorCode& codeflag)
23     {
24         UConverter *conv = ucnv_open(encStr_.c_str(), &codeflag);
25         if (U_FAILURE(codeflag)) {
26             HILOG_ERROR("Unable to create a UConverter object: %s\n", u_errorName(codeflag));
27             return NULL;
28         }
29         ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag);
30         if (U_FAILURE(codeflag)) {
31             HILOG_ERROR("Unable to set the from Unicode callback function");
32             ucnv_close(conv);
33             return NULL;
34         }
35 
36         ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag);
37         if (U_FAILURE(codeflag)) {
38             HILOG_ERROR("Unable to set the to Unicode callback function");
39             ucnv_close(conv);
40             return NULL;
41         }
42         return conv;
43     }
44 
ConvertToString(UChar * uchar,size_t length)45     std::string ConvertToString(UChar * uchar, size_t length)
46     {
47         std::u16string tempStr16(uchar);
48         std::string tepStr = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(tempStr16);
49         return tepStr;
50     }
51 
EncodeIntoChinese(napi_env env,napi_value src,std::string encoding,std::string & buffer)52     void EncodeIntoChinese(napi_env env, napi_value src, std::string encoding, std::string& buffer)
53     {
54         NativeEngine *engine = reinterpret_cast<NativeEngine*>(env);
55         engine->EncodeToChinese(src, buffer, encoding);
56     }
57 
UnicodeConversion(std::string encoding,char16_t * originalBuffer,size_t inputSize)58     std::string UnicodeConversion(std::string encoding, char16_t* originalBuffer, size_t inputSize)
59     {
60         std::string buffer = "";
61         UErrorCode codeflag = U_ZERO_ERROR;
62         UConverter* converter = ucnv_open(encoding.c_str(), &codeflag);
63         if (U_FAILURE(codeflag)) {
64             HILOG_ERROR("textencoder::ucnv_open failed !");
65             return "";
66         }
67 
68         size_t maxByteSize = static_cast<size_t>(ucnv_getMaxCharSize(converter));
69         const UChar *source = originalBuffer;
70         size_t limit = maxByteSize * inputSize;
71         size_t len = limit * sizeof(char);
72         char *targetArray = nullptr;
73         if (limit > 0) {
74             targetArray = new char[limit + 1];
75             if (memset_s(targetArray, len + sizeof(char), 0, len + sizeof(char)) != EOK) {
76                 HILOG_ERROR("textencoder::encode targetArray memset_s failed");
77                 ucnv_close(converter);
78                 FreedMemory(targetArray);
79                 return "";
80             }
81         } else {
82             HILOG_ERROR("textencoder::limit is error");
83             ucnv_close(converter);
84             return "";
85         }
86 
87         char *target = targetArray;
88         const char *targetLimit = targetArray + limit;
89         const UChar *sourceLimit = source + u_strlen(source);
90         if (sourceLimit == nullptr) {
91             HILOG_ERROR("textencoder::sourceLimit is nullptr");
92             return "";
93         }
94         ucnv_fromUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, true, &codeflag);
95         if (U_FAILURE(codeflag)) {
96             HILOG_ERROR("textencoder::ucnv_fromUnicode conversion failed.");
97             ucnv_close(converter);
98             FreedMemory(targetArray);
99             return "";
100         }
101 
102         buffer = targetArray;
103         ucnv_close(converter);
104         FreedMemory(targetArray);
105         return buffer;
106     }
107 
EncodeToUtf8(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchars)108     void EncodeToUtf8(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchars)
109     {
110         if (encodeInfo.encoding == "utf-16be" || encodeInfo.encoding == "utf-16le") {
111             EncodeTo16BE(encodeInfo, writeResult, written, length, nchars);
112         } else {
113             OtherEncodeUtf8(encodeInfo, writeResult, written, length, nchars);
114         }
115     }
116 
EncodeConversion(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)117     void EncodeConversion(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding)
118     {
119         if (encoding == "utf-16le") {
120             size_t  outLen = 0;
121             void *data = nullptr;
122             std::u16string u16Str = EncodeUtf16BE(env, src);
123             outLen = u16Str.length() * 2; // 2:multiple
124             outLens = outLen;
125             napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
126             if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16Str.data()), outLen) != EOK) {
127                 HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
128                 return;
129             }
130         } else if (encoding == "utf-16be") {
131             size_t  outLen = 0;
132             void *data = nullptr;
133             std::u16string u16BEStr = EncodeUtf16BE(env, src);
134             std::u16string u16LEStr = Utf16BEToLE(u16BEStr);
135             outLen = u16LEStr.length() * 2; // 2:multiple
136             outLens = outLen;
137             napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
138             if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16LEStr.data()), outLen) != EOK) {
139                 HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
140                 return;
141             }
142         } else {
143             OtherEncode(env, src, arrayBuffer, outLens, encoding);
144         }
145     }
146 
GetMaxByteSize(std::string encoding)147     int GetMaxByteSize(std::string encoding)
148     {
149         UErrorCode codeflag = U_ZERO_ERROR;
150         UConverter* converter = ucnv_open(encoding.c_str(), &codeflag);
151         if (U_FAILURE(codeflag)) {
152             HILOG_ERROR("textencoder::ucnv_open failed !");
153             return 0;
154         }
155 
156         int maxByteSize = static_cast<int>(ucnv_getMaxCharSize(converter));
157         ucnv_close(converter);
158         return maxByteSize;
159     }
160 
FreedMemory(char * data)161     void FreedMemory(char *data)
162     {
163         if (data != nullptr) {
164             delete[] data;
165             data = nullptr;
166         }
167     }
168 
IsOneByte(uint8_t u8Char)169     bool IsOneByte(uint8_t u8Char)
170     {
171         return (u8Char & 0x80) == 0;
172     }
173 
Utf8ToUtf16BE(const std::string & u8Str,bool * ok)174     std::u16string Utf8ToUtf16BE(const std::string &u8Str, bool *ok)
175     {
176         std::u16string u16Str = u"";
177         u16Str.reserve(u8Str.size());
178         std::string::size_type len = u8Str.length();
179         const unsigned char *data = reinterpret_cast<const unsigned char *>(u8Str.data());
180         bool isOk = true;
181         for (std::string::size_type i = 0; i < len; ++i) {
182             uint8_t c1 = data[i];
183             if (IsOneByte(c1)) {
184                 u16Str.push_back(static_cast<char16_t>(c1));
185                 continue;
186             }
187             switch (c1 & HIGER_4_BITS_MASK) {
188                 case FOUR_BYTES_STYLE: {
189                     uint8_t c2 = data[++i];
190                     uint8_t c3 = data[++i];
191                     uint8_t c4 = data[++i];
192                     uint32_t codePoint = ((c1 & LOWER_3_BITS_MASK) << (3 * UTF8_VALID_BITS)) | // 3:multiple
193                         ((c2 & LOWER_6_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple
194                         ((c3 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c4 & LOWER_6_BITS_MASK);
195                     if (codePoint >= UTF16_SPECIAL_VALUE) {
196                         codePoint -= UTF16_SPECIAL_VALUE;
197                         u16Str.push_back(static_cast<char16_t>((codePoint >> 10) | HIGH_AGENT_MASK)); // 10:offset value
198                         u16Str.push_back(static_cast<char16_t>((codePoint & LOWER_10_BITS_MASK) | LOW_AGENT_MASK));
199                     } else {
200                         u16Str.push_back(static_cast<char16_t>(codePoint));
201                     }
202                     break;
203                 }
204                 case THREE_BYTES_STYLE: {
205                     uint8_t c2 = data[++i];
206                     uint8_t c3 = data[++i];
207                     uint32_t codePoint = ((c1 & LOWER_4_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple
208                         ((c2 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c3 & LOWER_6_BITS_MASK);
209                     u16Str.push_back(static_cast<char16_t>(codePoint));
210                     break;
211                 }
212                 case TWO_BYTES_STYLE1:
213                 case TWO_BYTES_STYLE2: {
214                     uint8_t c2 = data[++i];
215                     uint32_t codePoint = ((c1 & LOWER_5_BITS_MASK) << UTF8_VALID_BITS) | (c2 & LOWER_6_BITS_MASK);
216                     u16Str.push_back(static_cast<char16_t>(codePoint));
217                     break;
218                 }
219                 default: {
220                     isOk = false;
221                     break;
222                 }
223             }
224         }
225         if (ok != nullptr) {
226             *ok = isOk;
227         }
228         return u16Str;
229     }
230 
Utf16BEToLE(const std::u16string & wstr)231     std::u16string Utf16BEToLE(const std::u16string &wstr)
232     {
233         std::u16string str16 = u"";
234         const char16_t *data = wstr.data();
235         for (unsigned int i = 0; i < wstr.length(); i++) {
236             char16_t wc = data[i];
237             char16_t high = (wc >> 8) & 0x00FF; // 8:offset value
238             char16_t low = wc & 0x00FF;
239             char16_t c16 = (low << 8) | high; // 8:offset value
240             str16.push_back(c16);
241         }
242         return str16;
243     }
244 
OtherEncode(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)245     void OtherEncode(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding)
246     {
247         size_t  outLen = 0;
248         void *data = nullptr;
249         size_t inputSize = 0;
250         napi_get_value_string_utf16(env, src, nullptr, 0, &inputSize);
251         char16_t *originalBuffer = ApplyMemory(inputSize);
252         if (originalBuffer == nullptr) {
253             HILOG_ERROR("textencoder::originalBuffer is nullptr");
254             return;
255         }
256         napi_get_value_string_utf16(env, src, originalBuffer, inputSize + 1, &inputSize);
257         outLen = static_cast<size_t>(GetMaxByteSize(encoding)) * inputSize;
258         napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
259         char *writeResult = static_cast<char*>(data);
260         if (writeResult == nullptr) {
261             HILOG_ERROR("textencoder::writeResult is nullptr");
262             return;
263         }
264         std::string buffer = "";
265         std::u16string originalStr(originalBuffer, inputSize);
266         size_t shifting = 0;
267         size_t resultShifting = 0;
268         size_t findIndex = originalStr.find('\0');
269         if (findIndex == std::string::npos) {
270             buffer = UnicodeConversion(encoding, originalBuffer, inputSize);
271             outLens = buffer.length();
272             if (memcpy_s(writeResult, outLens, reinterpret_cast<char*>(buffer.data()), outLens) != EOK) {
273                 HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
274                 return;
275             }
276         } else {
277             while (findIndex != std::string::npos) {
278                 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize);
279                 if (memcpy_s(writeResult + resultShifting, buffer.length(),
280                              reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) {
281                     HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
282                     return;
283                 }
284                 resultShifting +=  buffer.length();
285                 *(writeResult + resultShifting) = '\0';
286                 resultShifting += 1;
287                 outLens += buffer.length() + 1;
288                 shifting += findIndex + 1;
289                 originalStr = originalStr.substr(findIndex + 1, inputSize);
290                 findIndex = originalStr.find('\0');
291             }
292             buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize);
293             outLens += buffer.length();
294             if (memcpy_s(writeResult + resultShifting, buffer.length(),
295                          reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) {
296                 HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
297                 return;
298             }
299         }
300         FreedMemory(originalBuffer);
301     }
302 
EncodeUtf16BE(napi_env env,napi_value src)303     std::u16string EncodeUtf16BE(napi_env env, napi_value src)
304     {
305         std::string buffer = "";
306         size_t bufferSize = 0;
307 
308         if (napi_get_value_string_utf8(env, src, nullptr, 0, &bufferSize) != napi_ok) {
309             HILOG_ERROR("textencoder::can not get src size");
310             return u"";
311         }
312         buffer.resize(bufferSize);
313         if (napi_get_value_string_utf8(env, src, buffer.data(), bufferSize + 1, &bufferSize) != napi_ok) {
314             HILOG_ERROR("textencoder::can not get src value");
315             return u"";
316         }
317         std::u16string u16Str = Utf8ToUtf16BE(buffer);
318         return u16Str;
319     }
320 
IsValidLowSurrogate(char16_t high)321     bool IsValidLowSurrogate(char16_t high)
322     {
323         // 0xD800: minimum value of low proxy term. 0xDBFF: Maximum value of low proxy term.
324         return (high >= 0xD800 && high <= 0xDBFF);
325     }
326 
IsValidHighSurrogate(char16_t low)327     bool IsValidHighSurrogate(char16_t low)
328     {
329         // 0xDC00: minimum value of high proxy item. 0xDFFF: maximum value of high proxy item.
330         return (low >= 0xDC00 && low <= 0xDFFF);
331     }
332 
OtherEncodeUtf8Inner(char16_t * originalBuffer,InputBufferInfo inputInfo,size_t & index,OutBufferInfo & outInfo)333     uint32_t OtherEncodeUtf8Inner(char16_t *originalBuffer, InputBufferInfo inputInfo, size_t &index,
334         OutBufferInfo &outInfo)
335     {
336         if (IsValidLowSurrogate(originalBuffer[index]) && inputInfo.encoding == "utf-8") {
337             size_t tempIndex = index + 1;
338             if (tempIndex < inputInfo.inputSize && IsValidHighSurrogate(originalBuffer[index + 1])) {
339                 // 2: move the pointer forward to the position of two elements.
340                 std::u16string utf16String(&originalBuffer[index], &originalBuffer[index] + 2);
341                 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
342                 outInfo.rstBuffer = converter.to_bytes(utf16String);
343                 outInfo.rstBufferLength = outInfo.rstBuffer.length();
344                 if (outInfo.rstBufferLength > outInfo.writedSize) {
345                     return STATE_BREAK_ZERO;
346                 }
347                 index++;
348                 outInfo.cntSize += 2; // 2: two bytes
349                 outInfo.bufferResult += outInfo.rstBuffer;
350                 outInfo.writedSize -= outInfo.rstBufferLength;
351                 return STATE_CONTINUE_ONE;
352             }
353         }
354         return STATE_OTHER_TWO;
355     }
356 
OtherEncodeUtf8(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchar)357     void OtherEncodeUtf8(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchar)
358     {
359         size_t inputSize = 0;
360         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize);
361         char16_t *originalBuffer = ApplyMemory(inputSize);
362         if (originalBuffer == nullptr) {
363             HILOG_ERROR("textencoder::originalBuffer is nullptr");
364             return;
365         }
366         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize);
367         std::vector<char16_t> targetBuffer(inputSize + 1, u'\0');
368         InputBufferInfo inputInfo(encodeInfo.encoding, inputSize);
369         OutBufferInfo outInfo(length, "", 0, 0, "");
370         for (size_t i = 0; i < inputSize; i++) {
371             targetBuffer[i] = originalBuffer[i];
372             uint32_t rstState = OtherEncodeUtf8Inner(originalBuffer, inputInfo, i, outInfo);
373             if (rstState == STATE_BREAK_ZERO) {
374                 break;
375             } else if (rstState == STATE_CONTINUE_ONE) {
376                 continue;
377             }
378             outInfo.rstBuffer = UnicodeConversion(encodeInfo.encoding, &targetBuffer[i], inputSize);
379             outInfo.rstBufferLength = outInfo.rstBuffer.length();
380             if (outInfo.rstBufferLength > outInfo.writedSize) {
381                 break;
382             }
383             outInfo.cntSize++;
384             outInfo.bufferResult += outInfo.rstBuffer;
385             outInfo.writedSize -= outInfo.rstBufferLength;
386         }
387         size_t writeLength = outInfo.bufferResult.length();
388         for (size_t j = 0; j < writeLength; j++) {
389             *writeResult = outInfo.bufferResult[j];
390             writeResult++;
391         }
392         *nchar = static_cast<int32_t>(outInfo.cntSize);
393         *written = static_cast<int32_t>(writeLength);
394         FreedMemory(originalBuffer);
395     }
396 
EncodeTo16BE(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchars)397     void EncodeTo16BE(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchars)
398     {
399         size_t inputSize = 0;
400         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize);
401         char16_t *originalBuffer = ApplyMemory(inputSize);
402         if (originalBuffer == nullptr) {
403             HILOG_ERROR("textencoder::originalBuffer is nullptr");
404             return;
405         }
406         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize);
407 
408         size_t writableSize = length;
409         std::u16string bufferResult = u"";
410         size_t i = 0;
411         for (; i < inputSize; i++) {
412             std::string strBuff = "";
413             std::u16string buffer = u"";
414             strBuff = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(originalBuffer[i]);
415             if (encodeInfo.encoding == "utf-16le") {
416                 buffer = Utf8ToUtf16BE(strBuff);
417             } else {
418                 std::u16string u16Str = Utf8ToUtf16BE(strBuff);
419                 buffer = Utf16BEToLE(u16Str);
420             }
421             size_t bufferLength = buffer.length() * 2; // 2:multiple
422             if (bufferLength > writableSize) {
423                 break;
424             }
425             bufferResult += buffer;
426             writableSize -= bufferLength;
427         }
428 
429         size_t writeLength = bufferResult.length() * 2; // 2:multiple
430         if (memcpy_s(writeResult, writeLength, reinterpret_cast<char*>(bufferResult.data()), writeLength) != EOK) {
431             HILOG_FATAL("textencoder::copy buffer to arraybuffer error");
432             return;
433         }
434         *nchars = static_cast<int32_t>(i);
435         *written = static_cast<int32_t>(writeLength);
436         FreedMemory(originalBuffer);
437     }
438 
ApplyMemory(const size_t & inputSize)439     char16_t *ApplyMemory(const size_t &inputSize)
440     {
441         char16_t *originalBuffer = nullptr;
442         if (inputSize > 0) {
443             originalBuffer = new char16_t[inputSize + 1];
444             if (memset_s(originalBuffer, inputSize + 1, u'\0', inputSize + 1) != EOK) {
445                 HILOG_ERROR("encode originalBuffer memset_s failed");
446                 FreedMemory(originalBuffer);
447                 return nullptr;
448             }
449         } else {
450             HILOG_ERROR("inputSize is error");
451             return nullptr;
452         }
453         return originalBuffer;
454     }
455 
FreedMemory(char16_t * & data)456     void FreedMemory(char16_t *&data)
457     {
458         if (data != nullptr) {
459             delete[] data;
460             data = nullptr;
461         }
462     }
463 } // namespace Commonlibrary::Platform