1 /* 2 * Copyright (c) 2023 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "util_helper.h" 17 18 #include "native_engine.h" 19 #include "securec.h" 20 21 namespace Commonlibrary::Platform { CreateConverter(const std::string & encStr_,UErrorCode & codeflag)22 UConverter* CreateConverter(const std::string& encStr_, UErrorCode& codeflag) 23 { 24 UConverter *conv = ucnv_open(encStr_.c_str(), &codeflag); 25 if (U_FAILURE(codeflag)) { 26 HILOG_ERROR("Unable to create a UConverter object: %s\n", u_errorName(codeflag)); 27 return NULL; 28 } 29 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag); 30 if (U_FAILURE(codeflag)) { 31 HILOG_ERROR("Unable to set the from Unicode callback function"); 32 ucnv_close(conv); 33 return NULL; 34 } 35 36 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag); 37 if (U_FAILURE(codeflag)) { 38 HILOG_ERROR("Unable to set the to Unicode callback function"); 39 ucnv_close(conv); 40 return NULL; 41 } 42 return conv; 43 } 44 ConvertToString(UChar * uchar,size_t length)45 std::string ConvertToString(UChar * uchar, size_t length) 46 { 47 std::u16string tempStr16(uchar); 48 std::string tepStr = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(tempStr16); 49 return tepStr; 50 } 51 EncodeIntoChinese(napi_env env,napi_value src,std::string encoding,std::string & buffer)52 void EncodeIntoChinese(napi_env env, napi_value src, std::string encoding, std::string& buffer) 53 { 54 NativeEngine *engine = reinterpret_cast<NativeEngine*>(env); 55 engine->EncodeToChinese(src, buffer, encoding); 56 } 57 UnicodeConversion(std::string encoding,char16_t * originalBuffer,size_t inputSize)58 std::string UnicodeConversion(std::string encoding, char16_t* originalBuffer, size_t inputSize) 59 { 60 std::string buffer = ""; 61 UErrorCode codeflag = U_ZERO_ERROR; 62 UConverter* converter = ucnv_open(encoding.c_str(), &codeflag); 63 if (U_FAILURE(codeflag)) { 64 HILOG_ERROR("textencoder::ucnv_open failed !"); 65 return ""; 66 } 67 68 size_t maxByteSize = static_cast<size_t>(ucnv_getMaxCharSize(converter)); 69 const UChar *source = originalBuffer; 70 size_t limit = maxByteSize * inputSize; 71 size_t len = limit * sizeof(char); 72 char *targetArray = nullptr; 73 if (limit > 0) { 74 targetArray = new char[limit + 1]; 75 if (memset_s(targetArray, len + sizeof(char), 0, len + sizeof(char)) != EOK) { 76 HILOG_ERROR("textencoder::encode targetArray memset_s failed"); 77 ucnv_close(converter); 78 FreedMemory(targetArray); 79 return ""; 80 } 81 } else { 82 HILOG_ERROR("textencoder::limit is error"); 83 ucnv_close(converter); 84 return ""; 85 } 86 87 char *target = targetArray; 88 const char *targetLimit = targetArray + limit; 89 const UChar *sourceLimit = source + u_strlen(source); 90 if (sourceLimit == nullptr) { 91 HILOG_ERROR("textencoder::sourceLimit is nullptr"); 92 return ""; 93 } 94 ucnv_fromUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, true, &codeflag); 95 if (U_FAILURE(codeflag)) { 96 HILOG_ERROR("textencoder::ucnv_fromUnicode conversion failed."); 97 ucnv_close(converter); 98 FreedMemory(targetArray); 99 return ""; 100 } 101 102 buffer = targetArray; 103 ucnv_close(converter); 104 FreedMemory(targetArray); 105 return buffer; 106 } 107 EncodeToUtf8(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchars)108 void EncodeToUtf8(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchars) 109 { 110 if (encodeInfo.encoding == "utf-16be" || encodeInfo.encoding == "utf-16le") { 111 EncodeTo16BE(encodeInfo, writeResult, written, length, nchars); 112 } else { 113 OtherEncodeUtf8(encodeInfo, writeResult, written, length, nchars); 114 } 115 } 116 EncodeConversion(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)117 void EncodeConversion(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding) 118 { 119 if (encoding == "utf-16le") { 120 size_t outLen = 0; 121 void *data = nullptr; 122 std::u16string u16Str = EncodeUtf16BE(env, src); 123 outLen = u16Str.length() * 2; // 2:multiple 124 outLens = outLen; 125 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 126 if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16Str.data()), outLen) != EOK) { 127 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 128 return; 129 } 130 } else if (encoding == "utf-16be") { 131 size_t outLen = 0; 132 void *data = nullptr; 133 std::u16string u16BEStr = EncodeUtf16BE(env, src); 134 std::u16string u16LEStr = Utf16BEToLE(u16BEStr); 135 outLen = u16LEStr.length() * 2; // 2:multiple 136 outLens = outLen; 137 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 138 if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16LEStr.data()), outLen) != EOK) { 139 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 140 return; 141 } 142 } else { 143 OtherEncode(env, src, arrayBuffer, outLens, encoding); 144 } 145 } 146 GetMaxByteSize(std::string encoding)147 int GetMaxByteSize(std::string encoding) 148 { 149 UErrorCode codeflag = U_ZERO_ERROR; 150 UConverter* converter = ucnv_open(encoding.c_str(), &codeflag); 151 if (U_FAILURE(codeflag)) { 152 HILOG_ERROR("textencoder::ucnv_open failed !"); 153 return 0; 154 } 155 156 int maxByteSize = static_cast<int>(ucnv_getMaxCharSize(converter)); 157 ucnv_close(converter); 158 return maxByteSize; 159 } 160 FreedMemory(char * data)161 void FreedMemory(char *data) 162 { 163 if (data != nullptr) { 164 delete[] data; 165 data = nullptr; 166 } 167 } 168 IsOneByte(uint8_t u8Char)169 bool IsOneByte(uint8_t u8Char) 170 { 171 return (u8Char & 0x80) == 0; 172 } 173 Utf8ToUtf16BE(const std::string & u8Str,bool * ok)174 std::u16string Utf8ToUtf16BE(const std::string &u8Str, bool *ok) 175 { 176 std::u16string u16Str = u""; 177 u16Str.reserve(u8Str.size()); 178 std::string::size_type len = u8Str.length(); 179 const unsigned char *data = reinterpret_cast<const unsigned char *>(u8Str.data()); 180 bool isOk = true; 181 for (std::string::size_type i = 0; i < len; ++i) { 182 uint8_t c1 = data[i]; 183 if (IsOneByte(c1)) { 184 u16Str.push_back(static_cast<char16_t>(c1)); 185 continue; 186 } 187 switch (c1 & HIGER_4_BITS_MASK) { 188 case FOUR_BYTES_STYLE: { 189 uint8_t c2 = data[++i]; 190 uint8_t c3 = data[++i]; 191 uint8_t c4 = data[++i]; 192 uint32_t codePoint = ((c1 & LOWER_3_BITS_MASK) << (3 * UTF8_VALID_BITS)) | // 3:multiple 193 ((c2 & LOWER_6_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple 194 ((c3 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c4 & LOWER_6_BITS_MASK); 195 if (codePoint >= UTF16_SPECIAL_VALUE) { 196 codePoint -= UTF16_SPECIAL_VALUE; 197 u16Str.push_back(static_cast<char16_t>((codePoint >> 10) | HIGH_AGENT_MASK)); // 10:offset value 198 u16Str.push_back(static_cast<char16_t>((codePoint & LOWER_10_BITS_MASK) | LOW_AGENT_MASK)); 199 } else { 200 u16Str.push_back(static_cast<char16_t>(codePoint)); 201 } 202 break; 203 } 204 case THREE_BYTES_STYLE: { 205 uint8_t c2 = data[++i]; 206 uint8_t c3 = data[++i]; 207 uint32_t codePoint = ((c1 & LOWER_4_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple 208 ((c2 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c3 & LOWER_6_BITS_MASK); 209 u16Str.push_back(static_cast<char16_t>(codePoint)); 210 break; 211 } 212 case TWO_BYTES_STYLE1: 213 case TWO_BYTES_STYLE2: { 214 uint8_t c2 = data[++i]; 215 uint32_t codePoint = ((c1 & LOWER_5_BITS_MASK) << UTF8_VALID_BITS) | (c2 & LOWER_6_BITS_MASK); 216 u16Str.push_back(static_cast<char16_t>(codePoint)); 217 break; 218 } 219 default: { 220 isOk = false; 221 break; 222 } 223 } 224 } 225 if (ok != nullptr) { 226 *ok = isOk; 227 } 228 return u16Str; 229 } 230 Utf16BEToLE(const std::u16string & wstr)231 std::u16string Utf16BEToLE(const std::u16string &wstr) 232 { 233 std::u16string str16 = u""; 234 const char16_t *data = wstr.data(); 235 for (unsigned int i = 0; i < wstr.length(); i++) { 236 char16_t wc = data[i]; 237 char16_t high = (wc >> 8) & 0x00FF; // 8:offset value 238 char16_t low = wc & 0x00FF; 239 char16_t c16 = (low << 8) | high; // 8:offset value 240 str16.push_back(c16); 241 } 242 return str16; 243 } 244 OtherEncode(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)245 void OtherEncode(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding) 246 { 247 size_t outLen = 0; 248 void *data = nullptr; 249 size_t inputSize = 0; 250 napi_get_value_string_utf16(env, src, nullptr, 0, &inputSize); 251 char16_t *originalBuffer = ApplyMemory(inputSize); 252 if (originalBuffer == nullptr) { 253 HILOG_ERROR("textencoder::originalBuffer is nullptr"); 254 return; 255 } 256 napi_get_value_string_utf16(env, src, originalBuffer, inputSize + 1, &inputSize); 257 outLen = static_cast<size_t>(GetMaxByteSize(encoding)) * inputSize; 258 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 259 char *writeResult = static_cast<char*>(data); 260 if (writeResult == nullptr) { 261 HILOG_ERROR("textencoder::writeResult is nullptr"); 262 return; 263 } 264 std::string buffer = ""; 265 std::u16string originalStr(originalBuffer, inputSize); 266 size_t shifting = 0; 267 size_t resultShifting = 0; 268 size_t findIndex = originalStr.find('\0'); 269 if (findIndex == std::string::npos) { 270 buffer = UnicodeConversion(encoding, originalBuffer, inputSize); 271 outLens = buffer.length(); 272 if (memcpy_s(writeResult, outLens, reinterpret_cast<char*>(buffer.data()), outLens) != EOK) { 273 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 274 return; 275 } 276 } else { 277 while (findIndex != std::string::npos) { 278 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize); 279 if (memcpy_s(writeResult + resultShifting, buffer.length(), 280 reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) { 281 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 282 return; 283 } 284 resultShifting += buffer.length(); 285 *(writeResult + resultShifting) = '\0'; 286 resultShifting += 1; 287 outLens += buffer.length() + 1; 288 shifting += findIndex + 1; 289 originalStr = originalStr.substr(findIndex + 1, inputSize); 290 findIndex = originalStr.find('\0'); 291 } 292 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize); 293 outLens += buffer.length(); 294 if (memcpy_s(writeResult + resultShifting, buffer.length(), 295 reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) { 296 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 297 return; 298 } 299 } 300 FreedMemory(originalBuffer); 301 } 302 EncodeUtf16BE(napi_env env,napi_value src)303 std::u16string EncodeUtf16BE(napi_env env, napi_value src) 304 { 305 std::string buffer = ""; 306 size_t bufferSize = 0; 307 308 if (napi_get_value_string_utf8(env, src, nullptr, 0, &bufferSize) != napi_ok) { 309 HILOG_ERROR("textencoder::can not get src size"); 310 return u""; 311 } 312 buffer.resize(bufferSize); 313 if (napi_get_value_string_utf8(env, src, buffer.data(), bufferSize + 1, &bufferSize) != napi_ok) { 314 HILOG_ERROR("textencoder::can not get src value"); 315 return u""; 316 } 317 std::u16string u16Str = Utf8ToUtf16BE(buffer); 318 return u16Str; 319 } 320 IsValidLowSurrogate(char16_t high)321 bool IsValidLowSurrogate(char16_t high) 322 { 323 // 0xD800: minimum value of low proxy term. 0xDBFF: Maximum value of low proxy term. 324 return (high >= 0xD800 && high <= 0xDBFF); 325 } 326 IsValidHighSurrogate(char16_t low)327 bool IsValidHighSurrogate(char16_t low) 328 { 329 // 0xDC00: minimum value of high proxy item. 0xDFFF: maximum value of high proxy item. 330 return (low >= 0xDC00 && low <= 0xDFFF); 331 } 332 OtherEncodeUtf8Inner(char16_t * originalBuffer,InputBufferInfo inputInfo,size_t & index,OutBufferInfo & outInfo)333 uint32_t OtherEncodeUtf8Inner(char16_t *originalBuffer, InputBufferInfo inputInfo, size_t &index, 334 OutBufferInfo &outInfo) 335 { 336 if (IsValidLowSurrogate(originalBuffer[index]) && inputInfo.encoding == "utf-8") { 337 size_t tempIndex = index + 1; 338 if (tempIndex < inputInfo.inputSize && IsValidHighSurrogate(originalBuffer[index + 1])) { 339 // 2: move the pointer forward to the position of two elements. 340 std::u16string utf16String(&originalBuffer[index], &originalBuffer[index] + 2); 341 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter; 342 outInfo.rstBuffer = converter.to_bytes(utf16String); 343 outInfo.rstBufferLength = outInfo.rstBuffer.length(); 344 if (outInfo.rstBufferLength > outInfo.writedSize) { 345 return STATE_BREAK_ZERO; 346 } 347 index++; 348 outInfo.cntSize += 2; // 2: two bytes 349 outInfo.bufferResult += outInfo.rstBuffer; 350 outInfo.writedSize -= outInfo.rstBufferLength; 351 return STATE_CONTINUE_ONE; 352 } 353 } 354 return STATE_OTHER_TWO; 355 } 356 OtherEncodeUtf8(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchar)357 void OtherEncodeUtf8(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchar) 358 { 359 size_t inputSize = 0; 360 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize); 361 char16_t *originalBuffer = ApplyMemory(inputSize); 362 if (originalBuffer == nullptr) { 363 HILOG_ERROR("textencoder::originalBuffer is nullptr"); 364 return; 365 } 366 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize); 367 std::vector<char16_t> targetBuffer(inputSize + 1, u'\0'); 368 InputBufferInfo inputInfo(encodeInfo.encoding, inputSize); 369 OutBufferInfo outInfo(length, "", 0, 0, ""); 370 for (size_t i = 0; i < inputSize; i++) { 371 targetBuffer[i] = originalBuffer[i]; 372 uint32_t rstState = OtherEncodeUtf8Inner(originalBuffer, inputInfo, i, outInfo); 373 if (rstState == STATE_BREAK_ZERO) { 374 break; 375 } else if (rstState == STATE_CONTINUE_ONE) { 376 continue; 377 } 378 outInfo.rstBuffer = UnicodeConversion(encodeInfo.encoding, &targetBuffer[i], inputSize); 379 outInfo.rstBufferLength = outInfo.rstBuffer.length(); 380 if (outInfo.rstBufferLength > outInfo.writedSize) { 381 break; 382 } 383 outInfo.cntSize++; 384 outInfo.bufferResult += outInfo.rstBuffer; 385 outInfo.writedSize -= outInfo.rstBufferLength; 386 } 387 size_t writeLength = outInfo.bufferResult.length(); 388 for (size_t j = 0; j < writeLength; j++) { 389 *writeResult = outInfo.bufferResult[j]; 390 writeResult++; 391 } 392 *nchar = static_cast<int32_t>(outInfo.cntSize); 393 *written = static_cast<int32_t>(writeLength); 394 FreedMemory(originalBuffer); 395 } 396 EncodeTo16BE(TextEcodeInfo encodeInfo,char * writeResult,int32_t * written,size_t length,int32_t * nchars)397 void EncodeTo16BE(TextEcodeInfo encodeInfo, char* writeResult, int32_t* written, size_t length, int32_t* nchars) 398 { 399 size_t inputSize = 0; 400 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize); 401 char16_t *originalBuffer = ApplyMemory(inputSize); 402 if (originalBuffer == nullptr) { 403 HILOG_ERROR("textencoder::originalBuffer is nullptr"); 404 return; 405 } 406 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize); 407 408 size_t writableSize = length; 409 std::u16string bufferResult = u""; 410 size_t i = 0; 411 for (; i < inputSize; i++) { 412 std::string strBuff = ""; 413 std::u16string buffer = u""; 414 strBuff = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(originalBuffer[i]); 415 if (encodeInfo.encoding == "utf-16le") { 416 buffer = Utf8ToUtf16BE(strBuff); 417 } else { 418 std::u16string u16Str = Utf8ToUtf16BE(strBuff); 419 buffer = Utf16BEToLE(u16Str); 420 } 421 size_t bufferLength = buffer.length() * 2; // 2:multiple 422 if (bufferLength > writableSize) { 423 break; 424 } 425 bufferResult += buffer; 426 writableSize -= bufferLength; 427 } 428 429 size_t writeLength = bufferResult.length() * 2; // 2:multiple 430 if (memcpy_s(writeResult, writeLength, reinterpret_cast<char*>(bufferResult.data()), writeLength) != EOK) { 431 HILOG_FATAL("textencoder::copy buffer to arraybuffer error"); 432 return; 433 } 434 *nchars = static_cast<int32_t>(i); 435 *written = static_cast<int32_t>(writeLength); 436 FreedMemory(originalBuffer); 437 } 438 ApplyMemory(const size_t & inputSize)439 char16_t *ApplyMemory(const size_t &inputSize) 440 { 441 char16_t *originalBuffer = nullptr; 442 if (inputSize > 0) { 443 originalBuffer = new char16_t[inputSize + 1]; 444 if (memset_s(originalBuffer, inputSize + 1, u'\0', inputSize + 1) != EOK) { 445 HILOG_ERROR("encode originalBuffer memset_s failed"); 446 FreedMemory(originalBuffer); 447 return nullptr; 448 } 449 } else { 450 HILOG_ERROR("inputSize is error"); 451 return nullptr; 452 } 453 return originalBuffer; 454 } 455 FreedMemory(char16_t * & data)456 void FreedMemory(char16_t *&data) 457 { 458 if (data != nullptr) { 459 delete[] data; 460 data = nullptr; 461 } 462 } 463 } // namespace Commonlibrary::Platform