1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "core/text/text_emoji_processor.h"
17
18 #include <unicode/uchar.h>
19
20 #include "unicode/unistr.h"
21
22 namespace OHOS::Ace {
23 namespace {
24
25 constexpr int32_t LINE_FEED = 0x0A;
26 constexpr int32_t CARRIAGE_RETURN = 0x0D;
27 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
28 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
29 constexpr int32_t CANCEL_TAG = 0xE007F;
30 constexpr int32_t STATE_BEGIN = 0;
31 constexpr int32_t STATE_SECOND = 1;
32 constexpr int32_t STATE_EM = 2;
33 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
34 constexpr int32_t STATE_ZWJ = 4;
35 constexpr int32_t STATE_KEYCAP = 5;
36 constexpr int32_t STATE_EMOJI = 6;
37 constexpr int32_t STATE_VS_AND_EM = 7;
38 constexpr int32_t STATE_VS = 8;
39 constexpr int32_t STATE_VS_AND_ZWJ = 9;
40 constexpr int32_t STATE_LF = 10;
41 constexpr int32_t STATE_CR = 11;
42 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
43 constexpr int32_t STATE_EVEN_RIS = 13;
44 constexpr int32_t STATE_ODD_RIS = 14;
45 constexpr int32_t STATE_FINISHED = 20;
46
47 } // namespace
48
Delete(int32_t startIndex,int32_t length,std::string & content,bool isBackward)49 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::string& content, bool isBackward)
50 {
51 std::u16string u16 = StringUtils::Str8ToStr16(content);
52 // startIndex from selectController_->GetCaretIndex() is an utf-16 index
53 // so we need an u16string to get the correct index
54 std::u16string remainString = u"";
55 std::u32string u32ContentToDelete;
56 if (isBackward) {
57 if (startIndex == static_cast<int32_t>(u16.length())) {
58 u32ContentToDelete = StringUtils::ToU32string(content);
59 } else {
60 remainString = u16.substr(startIndex, u16.length() - startIndex);
61 std::u16string temp = u16.substr(0, startIndex);
62 u32ContentToDelete = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
63 }
64 if (u32ContentToDelete.length() == 0) {
65 return 0;
66 }
67 for (int32_t i = 0; i < length; i++) {
68 if (!BackwardDelete(u32ContentToDelete)) {
69 break;
70 }
71 }
72 content = StringUtils::U32StringToString(u32ContentToDelete) + StringUtils::Str16ToStr8(remainString);
73 } else {
74 if (startIndex == 0) {
75 u32ContentToDelete = StringUtils::ToU32string(content);
76 } else {
77 remainString = u16.substr(0, startIndex);
78 std::u16string temp = u16.substr(startIndex, u16.length() - startIndex);
79 u32ContentToDelete = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
80 }
81 if (u32ContentToDelete.length() == 0) {
82 return 0;
83 }
84 for (int32_t i = 0; i < length; i++) {
85 if (!ForwardDelete(u32ContentToDelete)) {
86 break;
87 }
88 }
89 content = StringUtils::Str16ToStr8(remainString) + StringUtils::U32StringToString(u32ContentToDelete);
90 }
91 int32_t deletedLength = static_cast<int32_t>(u16.length() - StringUtils::Str8ToStr16(content).length());
92 //we need length to update the cursor
93 return deletedLength;
94 }
95
IsIndexInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)96 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
97 const std::string& content, int32_t& startIndex, int32_t& endIndex)
98 {
99 int32_t emojiStartIndex;
100 int32_t emojiEndIndex;
101 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
102 if (relation == EmojiRelation::IN_EMOJI) {
103 startIndex = emojiStartIndex;
104 endIndex = emojiEndIndex;
105 return true;
106 }
107 startIndex = index;
108 endIndex = index;
109 return false;
110 }
111
GetCharacterNum(const std::string & content)112 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
113 {
114 CHECK_NULL_RETURN(!content.empty(), 0);
115 int32_t charNum = 0;
116 std::u16string u16Content = StringUtils::Str8ToStr16(content);
117 int32_t pos = 0;
118 while (pos < static_cast<int32_t>(u16Content.length())) {
119 std::u32string u32Content;
120 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
121 if (forwardLenU16 > 1) {
122 // emoji exsit
123 pos += forwardLenU16;
124 } else {
125 // char after pos is not emoji, move one pos forward
126 pos++;
127 }
128 charNum++;
129 }
130 TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum contentLength=%{public}zu pos=%{public}d charNum=%{public}d",
131 content.length(), pos, charNum);
132 return charNum;
133 }
134
GetIndexRelationToEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)135 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
136 const std::string& content, int32_t& startIndex, int32_t& endIndex)
137 {
138 endIndex = index;
139 startIndex = index;
140 std::u16string u16Content = StringUtils::Str8ToStr16(content);
141 if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
142 return EmojiRelation::NO_EMOJI;
143 }
144 std::u32string u32Content;
145 int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
146
147 int32_t emojiBackwardLengthU16 = 0;
148 if (backwardLen > 0) {
149 int32_t u32Length = static_cast<int32_t>(u32Content.length());
150 std::u16string tempstr = U32ToU16string(u32Content.substr(u32Length - backwardLen));
151 emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
152 index -= emojiBackwardLengthU16;
153 emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
154 }
155
156 // get the whole emoji from the new start
157 int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
158 TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
159 emojiBackwardLengthU16, emojiForwardLengthU16);
160 if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
161 // forward length is larget than backward one, which means the startIndex is inside one emoji
162 endIndex = index + emojiForwardLengthU16;
163 startIndex = index;
164 return EmojiRelation::IN_EMOJI;
165 } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
166 return EmojiRelation::BEFORE_EMOJI;
167 } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
168 // emoji exists before index
169 int32_t newStartIndex = index + emojiForwardLengthU16;
170 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
171 if (forwardLenU16 > 1) {
172 // forwardLenU16 > 1 means a real emoji is found
173 return EmojiRelation::MIDDLE_EMOJI;
174 } else {
175 return EmojiRelation::AFTER_EMOJI;
176 }
177 } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
178 // no emoji before index
179 int32_t newStartIndex = index + emojiForwardLengthU16;
180 int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
181 if (forwardLenU16 > 1) {
182 // forwardLenU16 > 1 means a real emoji is found
183 return EmojiRelation::BEFORE_EMOJI;
184 }
185 }
186 return EmojiRelation::NO_EMOJI;
187 }
188
IsIndexBeforeOrInEmoji(int32_t index,const std::string & content)189 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::string& content)
190 {
191 int32_t emojiStartIndex;
192 int32_t emojiEndIndex;
193 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
194 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
195 || relation == EmojiRelation::MIDDLE_EMOJI;
196 }
197
IsIndexAfterOrInEmoji(int32_t index,const std::string & content)198 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::string& content)
199 {
200 int32_t emojiStartIndex;
201 int32_t emojiEndIndex;
202 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
203 return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
204 || relation == EmojiRelation::MIDDLE_EMOJI;
205 }
206
IsIndexBeforeOrInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)207 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::string& content,
208 int32_t& startIndex, int32_t& endIndex)
209 {
210 int32_t emojiStartIndex;
211 int32_t emojiEndIndex;
212 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
213 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
214 || relation == EmojiRelation::MIDDLE_EMOJI) {
215 startIndex = emojiStartIndex;
216 endIndex = emojiEndIndex;
217 return true;
218 }
219 startIndex = index;
220 endIndex = index;
221 return false;
222 }
223
IsIndexAfterOrInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)224 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::string& content,
225 int32_t& startIndex, int32_t& endIndex)
226 {
227 int32_t emojiStartIndex;
228 int32_t emojiEndIndex;
229 EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
230 if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
231 || relation == EmojiRelation::MIDDLE_EMOJI) {
232 startIndex = emojiStartIndex;
233 endIndex = emojiEndIndex;
234 return true;
235 }
236 startIndex = index;
237 endIndex = index;
238 return false;
239 }
240
SubWstring(int32_t index,int32_t length,const std::wstring & content,bool includeHalf)241 std::wstring TextEmojiProcessor::SubWstring(
242 int32_t index, int32_t length, const std::wstring& content, bool includeHalf)
243 {
244 TextEmojiSubStringRange range = CalSubWstringRange(index, length, content, includeHalf);
245 int32_t rangeLength = range.endIndex - range.startIndex;
246 if (rangeLength == 0) {
247 return L"";
248 }
249 return content.substr(range.startIndex, rangeLength);
250 }
251
CalSubWstringRange(int32_t index,int32_t length,const std::wstring & content,bool includeHalf)252 TextEmojiSubStringRange TextEmojiProcessor::CalSubWstringRange(
253 int32_t index, int32_t length, const std::wstring& content, bool includeHalf)
254 {
255 int32_t startIndex = index;
256 int32_t endIndex = index + length;
257 int32_t emojiStartIndex = index; // [emojiStartIndex, emojiEndIndex)
258 int32_t emojiEndIndex = index;
259 // need to be converted to string for processing
260 // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
261 std::string curStr = StringUtils::ToString(content);
262 // exclude right overflow emoji
263 if (!includeHalf && IsIndexInEmoji(endIndex - 1, curStr, emojiStartIndex, emojiEndIndex) &&
264 emojiEndIndex > index + length) {
265 emojiEndIndex = emojiStartIndex;
266 length = emojiEndIndex - index;
267 length = std::max(length, 0);
268 endIndex = index + length;
269 }
270 // process left emoji
271 if (IsIndexBeforeOrInEmoji(startIndex, curStr, emojiStartIndex, emojiEndIndex)) {
272 if (startIndex != emojiStartIndex && !includeHalf) {
273 startIndex = emojiEndIndex; // exclude current emoji
274 }
275 if (startIndex != emojiStartIndex && includeHalf) {
276 startIndex = emojiStartIndex; // include current emoji
277 }
278 }
279 // process right emoji
280 if (IsIndexAfterOrInEmoji(endIndex, curStr, emojiStartIndex, emojiEndIndex)) {
281 if (endIndex != emojiEndIndex && !includeHalf) {
282 endIndex = emojiStartIndex; // exclude current emoji
283 }
284 if (endIndex != emojiEndIndex && includeHalf) {
285 endIndex = emojiEndIndex; // include current emoji
286 }
287 }
288 TextEmojiSubStringRange result = { startIndex, endIndex };
289 return result;
290 }
291
ConvertU8stringUnpairedSurrogates(const std::string & value)292 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
293 {
294 // Unpaired surrogates are replaced with U+FFFD
295 icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
296 std::string result;
297 ustring.toUTF8String(result);
298 return result;
299 }
300
U32ToU16string(const std::u32string & u32str)301 std::u16string TextEmojiProcessor::U32ToU16string(const std::u32string& u32str)
302 {
303 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> u8ToU16converter;
304 std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u32ToU8converter;
305 return u8ToU16converter.from_bytes(u32ToU8converter.to_bytes(u32str));
306 }
307
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)308 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
309 int32_t& startIndex, const std::u16string& u16Content)
310 {
311 if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
312 return 0;
313 }
314 do {
315 // U32 string may be failed to tranfer for spliting. Try to enlarge string scope to get transferred u32 string.
316 std::u16string temp = u16Content.substr(0, startIndex);
317 u32Content = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
318 if (static_cast<int32_t>(u32Content.length()) == 0) {
319 ++startIndex;
320 }
321 } while (static_cast<int32_t>(u32Content.length()) == 0 &&
322 startIndex <= static_cast<int32_t>(u16Content.length()));
323 if (u32Content.length() == 0) {
324 TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "GetEmojiLengthBackward u32Content is 0");
325 return 0;
326 }
327 return GetEmojiLengthAtEnd(u32Content, false);
328 }
329
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)330 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
331 int32_t& startIndex, const std::u16string& u16Content)
332 {
333 int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
334 return U32ToU16string(u32Content.substr(0, forwardLen)).length();
335 }
336
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)337 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
338 int32_t& startIndex, const std::u16string& u16Content)
339 {
340 if (startIndex >= static_cast<int32_t>(u16Content.length())) {
341 return 0;
342 }
343 do {
344 // U32 string may be failed to tranfer for spliting. Try to enlarge string scope to get transferred u32 string.
345 std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
346 u32Content = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
347 if (static_cast<int32_t>(u32Content.length()) == 0) {
348 --startIndex;
349 }
350 } while (static_cast<int32_t>(u32Content.length()) == 0 && startIndex >= 0);
351 if (static_cast<int32_t>(u32Content.length()) == 0) {
352 TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "GetEmojiLengthForward u32Content is 0");
353 return 0;
354 }
355 return GetEmojiLengthAtFront(u32Content, false);
356 }
357
IsEmojiModifierBase(uint32_t codePoint)358 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
359 {
360 // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
361 // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
362 // to handle with the compatibility, we need to add them back
363 if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
364 return true;
365 }
366 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
367 }
368
IsVariationSelector(uint32_t codePoint)369 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
370 {
371 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
372 }
373
IsRegionalIndicatorSymbol(uint32_t codePoint)374 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
375 {
376 return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
377 }
378
IsEmoji(uint32_t codePoint)379 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
380 {
381 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
382 }
383
IsEmojiModifier(uint32_t codePoint)384 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
385 {
386 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
387 }
388
IsTagSpec(uint32_t codePoint)389 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
390 {
391 // according to the https://www.unicode.org/charts/PDF/U0000.pdf
392 // 0xE0020 - 0xE007E are the visible tag specs.
393 // 0xE007F is CANCEL_TAG, not in here.
394 return 0xE0020 <= codePoint && codePoint <= 0xE007E;
395 }
396
IsKeycapBase(uint32_t codePoint)397 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
398 {
399 return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
400 }
401
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)402 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
403 {
404 deleteCount = 1;
405 if (codePoint == LINE_FEED) {
406 state = STATE_LF;
407 } else if (IsVariationSelector(codePoint)) { // only backward
408 state = STATE_VS;
409 } else if (codePoint == CARRIAGE_RETURN) { // only forward
410 state = STATE_CR;
411 } else if (IsRegionalIndicatorSymbol(codePoint)) {
412 state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
413 } else if (IsEmojiModifier(codePoint)) {
414 state = STATE_EM;
415 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
416 state = STATE_KEYCAP;
417 } else if (IsEmoji(codePoint)) {
418 state = STATE_EMOJI;
419 } else if (codePoint == CANCEL_TAG) {
420 state = STATE_IN_TAG_QUEUE;
421 } else {
422 state = isBackward ? STATE_FINISHED : STATE_SECOND;
423 }
424 }
425
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)426 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
427 {
428 if (isBackward) {
429 switch (state) {
430 case STATE_ODD_RIS:
431 if (IsRegionalIndicatorSymbol(codePoint)) {
432 ++deleteCount;
433 state = STATE_EVEN_RIS;
434 } else {
435 state = STATE_FINISHED;
436 }
437 break;
438 case STATE_EVEN_RIS:
439 if (IsRegionalIndicatorSymbol(codePoint)) {
440 state = STATE_FINISHED;
441 }
442 break;
443 }
444 } else {
445 switch (state) {
446 case STATE_ODD_RIS:
447 state = STATE_FINISHED;
448 break;
449 case STATE_EVEN_RIS:
450 if (IsRegionalIndicatorSymbol(codePoint)) {
451 ++deleteCount;
452 state = STATE_ODD_RIS;
453 } else {
454 state = STATE_FINISHED;
455 }
456 break;
457 }
458 }
459 }
460
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)461 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
462 {
463 if (isBackward) {
464 if (codePoint == CARRIAGE_RETURN) {
465 ++deleteCount;
466 }
467 state = STATE_FINISHED;
468 } else {
469 switch (state) {
470 case STATE_CR:
471 if (codePoint == LINE_FEED) {
472 ++deleteCount;
473 }
474 state = STATE_FINISHED;
475 break;
476 case STATE_LF:
477 state = STATE_FINISHED;
478 break;
479 }
480 }
481 }
482
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)483 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
484 bool isBackward)
485 {
486 if (isBackward) {
487 switch (state) {
488 case STATE_ZWJ:
489 if (IsEmoji(codePoint)) {
490 ++deleteCount; // delete zwj
491 ++deleteCount; // delete emoji
492 state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
493 } else if (IsVariationSelector(codePoint)) {
494 lastVSCount = 1;
495 state = STATE_VS_AND_ZWJ;
496 } else {
497 state = STATE_FINISHED;
498 }
499 break;
500 case STATE_VS_AND_ZWJ:
501 if (IsEmoji(codePoint)) {
502 ++deleteCount; // delete zwj
503 ++deleteCount; // delete emoji
504 deleteCount += lastVSCount;
505 lastVSCount = 0;
506 state = STATE_EMOJI;
507 } else {
508 state = STATE_FINISHED;
509 }
510 break;
511 }
512 } else {
513 if (IsEmoji(codePoint)) {
514 ++deleteCount;
515 state = STATE_EMOJI;
516 } else {
517 state = STATE_FINISHED;
518 }
519 }
520 }
521
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)522 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
523 {
524 if (isBackward) {
525 if (IsEmoji(codePoint)) {
526 ++deleteCount;
527 state = STATE_EMOJI;
528 return;
529 }
530 if (!IsVariationSelector(codePoint) &&
531 u_getCombiningClass(codePoint) == 0) {
532 ++deleteCount;
533 }
534 state = STATE_FINISHED;
535 } else {
536 if (codePoint == ZERO_WIDTH_JOINER) {
537 ++deleteCount;
538 state = STATE_ZWJ;
539 return;
540 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
541 ++deleteCount;
542 state = STATE_KEYCAP;
543 return;
544 }
545 state = STATE_FINISHED;
546 }
547 }
548
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)549 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
550 bool isBackward)
551 {
552 if (isBackward) {
553 switch (state) {
554 case STATE_KEYCAP:
555 if (IsVariationSelector(codePoint)) {
556 lastVSCount = 1;
557 state = STATE_VS_AND_KEYCAP;
558 return;
559 }
560 if (IsEmojiModifierBase(codePoint)) {
561 ++deleteCount;
562 state = STATE_FINISHED;
563 }
564 break;
565 case STATE_VS_AND_KEYCAP:
566 if (IsKeycapBase(codePoint)) {
567 deleteCount += lastVSCount + 1;
568 }
569 state = STATE_FINISHED;
570 break;
571 }
572 } else {
573 state = STATE_FINISHED;
574 }
575 }
576
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)577 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
578 bool isBackward)
579 {
580 if (isBackward) {
581 switch (state) {
582 case STATE_EM:
583 if (IsVariationSelector(codePoint)) {
584 lastVSCount = 1;
585 state = STATE_VS_AND_EM;
586 return;
587 } else if (IsEmojiModifierBase(codePoint)) {
588 ++deleteCount;
589 }
590 state = STATE_FINISHED;
591 break;
592 case STATE_VS_AND_EM:
593 if (IsEmojiModifierBase(codePoint)) {
594 deleteCount += lastVSCount + 1;
595 }
596 state = STATE_FINISHED;
597 break;
598 }
599 } else {
600 if (IsEmoji(codePoint)) {
601 ++deleteCount;
602 state = STATE_EMOJI;
603 return;
604 } else if (IsVariationSelector(codePoint)) {
605 ++deleteCount;
606 state = STATE_VS;
607 return;
608 } else if (codePoint == ZERO_WIDTH_JOINER) {
609 ++deleteCount;
610 state = STATE_ZWJ;
611 return;
612 } else if (IsEmojiModifierBase(codePoint)) {
613 ++deleteCount;
614 }
615 state = STATE_FINISHED;
616 }
617 }
618
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)619 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
620 {
621 if (isBackward) {
622 if (codePoint == ZERO_WIDTH_JOINER) {
623 state = STATE_ZWJ;
624 } else {
625 state = STATE_FINISHED;
626 }
627 } else {
628 if (codePoint == ZERO_WIDTH_JOINER) {
629 ++deleteCount;
630 state = STATE_ZWJ;
631 } else if (IsVariationSelector(codePoint)) {
632 ++deleteCount;
633 state = STATE_VS;
634 } else if (IsEmojiModifier(codePoint)) {
635 ++deleteCount;
636 state = STATE_EM;
637 } else if (IsTagSpec(codePoint)) {
638 ++deleteCount;
639 state = STATE_IN_TAG_QUEUE;
640 } else {
641 state = STATE_FINISHED;
642 }
643 }
644 }
645
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)646 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
647 {
648 if (IsVariationSelector(codePoint)) {
649 ++deleteCount;
650 state = STATE_VS;
651 } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
652 ++deleteCount;
653 state = STATE_KEYCAP;
654 } else {
655 state = STATE_FINISHED;
656 }
657 }
658
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)659 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
660 {
661 if (isBackward) {
662 if (!IsTagSpec(codePoint)) {
663 state = STATE_FINISHED;
664 }
665 ++deleteCount;
666 } else {
667 if (IsTagSpec(codePoint)) {
668 ++deleteCount;
669 } else if (IsEmoji(codePoint)) {
670 state = STATE_FINISHED;
671 } else if (codePoint == CANCEL_TAG) {
672 ++deleteCount;
673 state = STATE_FINISHED;
674 } else {
675 ++deleteCount;
676 state = STATE_FINISHED;
677 }
678 }
679 }
680
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)681 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
682 {
683 int32_t deleteCount = 0;
684 int32_t lastVSCount = 0;
685 int32_t state = STATE_BEGIN;
686 int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
687 do {
688 uint32_t codePoint = u32Content[tempOffset];
689 tempOffset--;
690 switch (state) {
691 case STATE_BEGIN:
692 OnBeginState(codePoint, state, deleteCount, true);
693 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
694 // avoid non-emoji
695 return 0;
696 }
697 break;
698 case STATE_LF:
699 OnCRLFState(codePoint, state, deleteCount, true);
700 break;
701 case STATE_ODD_RIS:
702 case STATE_EVEN_RIS:
703 OnRISState(codePoint, state, deleteCount, true);
704 break;
705 case STATE_KEYCAP:
706 case STATE_VS_AND_KEYCAP:
707 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
708 break;
709 case STATE_EM:
710 case STATE_VS_AND_EM:
711 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
712 break;
713 case STATE_VS:
714 OnVSState(codePoint, state, deleteCount, true);
715 break;
716 case STATE_EMOJI:
717 OnEmojiState(codePoint, state, deleteCount, true);
718 break;
719 case STATE_ZWJ:
720 case STATE_VS_AND_ZWJ:
721 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
722 break;
723 case STATE_IN_TAG_QUEUE:
724 OnTagQueueState(codePoint, state, deleteCount, true);
725 break;
726 default:
727 break;
728 }
729 } while (tempOffset >= 0 && state != STATE_FINISHED);
730 return deleteCount;
731 }
732
BackwardDelete(std::u32string & u32Content)733 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
734 {
735 int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
736 return HandleDeleteAction(u32Content, deleteCount, true);
737 }
738
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)739 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
740 {
741 int32_t deleteCount = 0;
742 int32_t state = STATE_BEGIN;
743 int32_t tempOffset = 0;
744 int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
745 do {
746 int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
747 tempOffset++;
748 switch (state) {
749 case STATE_BEGIN:
750 OnBeginState(codePoint, state, deleteCount, false);
751 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
752 return 0;
753 }
754 break;
755 case STATE_SECOND:
756 OnForwardSecondState(codePoint, state, deleteCount);
757 break;
758 case STATE_CR:
759 case STATE_LF:
760 OnCRLFState(codePoint, state, deleteCount, false);
761 break;
762 case STATE_ODD_RIS:
763 case STATE_EVEN_RIS:
764 OnRISState(codePoint, state, deleteCount, false);
765 break;
766 case STATE_KEYCAP:
767 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
768 // in ForwardDelete, we dont need to care about lastVSCount.
769 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
770 // same as above
771 break;
772 case STATE_EM:
773 OnEMState(codePoint, state, deleteCount, deleteCount, false);
774 break;
775 case STATE_VS:
776 OnVSState(codePoint, state, deleteCount, false);
777 break;
778 case STATE_EMOJI:
779 OnEmojiState(codePoint, state, deleteCount, false);
780 break;
781 case STATE_ZWJ:
782 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
783 break;
784 case STATE_IN_TAG_QUEUE:
785 OnTagQueueState(codePoint, state, deleteCount, false);
786 break;
787 default:
788 break;
789 }
790 } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
791 return deleteCount;
792 }
793
ForwardDelete(std::u32string & u32Content)794 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
795 {
796 int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
797 return HandleDeleteAction(u32Content, deleteCount, false);
798 }
799
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)800 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
801 {
802 int32_t contentLength = static_cast<int32_t>(u32Content.length());
803 deleteCount = std::min(deleteCount, contentLength);
804 if (isBackward) {
805 if (deleteCount > 0) {
806 int32_t start = contentLength - deleteCount;
807 u32Content.erase(start, deleteCount);
808 return true;
809 }
810 } else {
811 if (deleteCount > 0) {
812 u32Content.erase(0, deleteCount);
813 return true;
814 }
815 }
816 return false;
817 }
818
819 } // namespace OHOS::Ace
820