1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "core/text/text_emoji_processor.h"
17 
18 #include <unicode/uchar.h>
19 
20 #include "unicode/unistr.h"
21 
22 namespace OHOS::Ace {
23 namespace {
24 
25 constexpr int32_t LINE_FEED = 0x0A;
26 constexpr int32_t CARRIAGE_RETURN = 0x0D;
27 constexpr int32_t COMBINING_ENCLOSING_KEYCAP = 0x20E3;
28 constexpr int32_t ZERO_WIDTH_JOINER = 0x200D;
29 constexpr int32_t CANCEL_TAG = 0xE007F;
30 constexpr int32_t STATE_BEGIN = 0;
31 constexpr int32_t STATE_SECOND = 1;
32 constexpr int32_t STATE_EM = 2;
33 constexpr int32_t STATE_VS_AND_KEYCAP = 3;
34 constexpr int32_t STATE_ZWJ = 4;
35 constexpr int32_t STATE_KEYCAP = 5;
36 constexpr int32_t STATE_EMOJI = 6;
37 constexpr int32_t STATE_VS_AND_EM = 7;
38 constexpr int32_t STATE_VS = 8;
39 constexpr int32_t STATE_VS_AND_ZWJ = 9;
40 constexpr int32_t STATE_LF = 10;
41 constexpr int32_t STATE_CR = 11;
42 constexpr int32_t STATE_IN_TAG_QUEUE = 12;
43 constexpr int32_t STATE_EVEN_RIS = 13;
44 constexpr int32_t STATE_ODD_RIS = 14;
45 constexpr int32_t STATE_FINISHED = 20;
46 
47 } // namespace
48 
Delete(int32_t startIndex,int32_t length,std::string & content,bool isBackward)49 int32_t TextEmojiProcessor::Delete(int32_t startIndex, int32_t length, std::string& content, bool isBackward)
50 {
51     std::u16string u16 = StringUtils::Str8ToStr16(content);
52     // startIndex from selectController_->GetCaretIndex() is an utf-16 index
53     // so we need an u16string to get the correct index
54     std::u16string remainString = u"";
55     std::u32string u32ContentToDelete;
56     if (isBackward) {
57         if (startIndex == static_cast<int32_t>(u16.length())) {
58             u32ContentToDelete = StringUtils::ToU32string(content);
59         } else {
60             remainString = u16.substr(startIndex, u16.length() - startIndex);
61             std::u16string temp = u16.substr(0, startIndex);
62             u32ContentToDelete = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
63         }
64         if (u32ContentToDelete.length() == 0) {
65             return 0;
66         }
67         for (int32_t i = 0; i < length; i++) {
68             if (!BackwardDelete(u32ContentToDelete)) {
69                 break;
70             }
71         }
72         content = StringUtils::U32StringToString(u32ContentToDelete) + StringUtils::Str16ToStr8(remainString);
73     } else {
74         if (startIndex == 0) {
75             u32ContentToDelete = StringUtils::ToU32string(content);
76         } else {
77             remainString = u16.substr(0, startIndex);
78             std::u16string temp = u16.substr(startIndex, u16.length() - startIndex);
79             u32ContentToDelete = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
80         }
81         if (u32ContentToDelete.length() == 0) {
82             return 0;
83         }
84         for (int32_t i = 0; i < length; i++) {
85             if (!ForwardDelete(u32ContentToDelete)) {
86                 break;
87             }
88         }
89         content = StringUtils::Str16ToStr8(remainString) + StringUtils::U32StringToString(u32ContentToDelete);
90     }
91     int32_t deletedLength = static_cast<int32_t>(u16.length() - StringUtils::Str8ToStr16(content).length());
92     //we need length to update the cursor
93     return deletedLength;
94 }
95 
IsIndexInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)96 bool TextEmojiProcessor::IsIndexInEmoji(int32_t index,
97     const std::string& content, int32_t& startIndex, int32_t& endIndex)
98 {
99     int32_t emojiStartIndex;
100     int32_t emojiEndIndex;
101     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
102     if (relation == EmojiRelation::IN_EMOJI) {
103         startIndex = emojiStartIndex;
104         endIndex = emojiEndIndex;
105         return true;
106     }
107     startIndex = index;
108     endIndex = index;
109     return false;
110 }
111 
GetCharacterNum(const std::string & content)112 int32_t TextEmojiProcessor::GetCharacterNum(const std::string& content)
113 {
114     CHECK_NULL_RETURN(!content.empty(), 0);
115     int32_t charNum = 0;
116     std::u16string u16Content = StringUtils::Str8ToStr16(content);
117     int32_t pos = 0;
118     while (pos < static_cast<int32_t>(u16Content.length())) {
119         std::u32string u32Content;
120         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, pos, u16Content);
121         if (forwardLenU16 > 1) {
122             // emoji exsit
123             pos += forwardLenU16;
124         } else {
125             // char after pos is not emoji, move one pos forward
126             pos++;
127         }
128         charNum++;
129     }
130     TAG_LOGI(AceLogTag::ACE_RICH_TEXT, "ByteNumToCharNum contentLength=%{public}zu pos=%{public}d charNum=%{public}d",
131         content.length(), pos, charNum);
132     return charNum;
133 }
134 
GetIndexRelationToEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)135 EmojiRelation TextEmojiProcessor::GetIndexRelationToEmoji(int32_t index,
136     const std::string& content, int32_t& startIndex, int32_t& endIndex)
137 {
138     endIndex = index;
139     startIndex = index;
140     std::u16string u16Content = StringUtils::Str8ToStr16(content);
141     if (index < 0 || index > static_cast<int32_t>(u16Content.length())) {
142         return EmojiRelation::NO_EMOJI;
143     }
144     std::u32string u32Content;
145     int32_t backwardLen = GetEmojiLengthBackward(u32Content, index, u16Content);
146 
147     int32_t emojiBackwardLengthU16 = 0;
148     if (backwardLen > 0) {
149         int32_t u32Length = static_cast<int32_t>(u32Content.length());
150         std::u16string tempstr = U32ToU16string(u32Content.substr(u32Length - backwardLen));
151         emojiBackwardLengthU16 = static_cast<int32_t>(tempstr.length());
152         index -= emojiBackwardLengthU16;
153         emojiBackwardLengthU16 = endIndex - index; // calculate length of the part of emoji
154     }
155 
156     // get the whole emoji from the new start
157     int32_t emojiForwardLengthU16 = GetEmojiLengthU16Forward(u32Content, index, u16Content);
158     TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "emojiBackwardLengthU16=%{public}d emojiForwardLengthU16=%{public}d",
159         emojiBackwardLengthU16, emojiForwardLengthU16);
160     if (emojiBackwardLengthU16 > 0 && emojiForwardLengthU16 > emojiBackwardLengthU16) {
161         // forward length is larget than backward one, which means the startIndex is inside one emoji
162         endIndex = index + emojiForwardLengthU16;
163         startIndex = index;
164         return EmojiRelation::IN_EMOJI;
165     } else if (emojiBackwardLengthU16 == 0 && emojiForwardLengthU16 > 1) {
166         return EmojiRelation::BEFORE_EMOJI;
167     } else if (emojiBackwardLengthU16 > 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
168         // emoji exists before index
169         int32_t newStartIndex = index + emojiForwardLengthU16;
170         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
171         if (forwardLenU16 > 1) {
172             // forwardLenU16 > 1 means a real emoji is found
173             return EmojiRelation::MIDDLE_EMOJI;
174         } else {
175             return EmojiRelation::AFTER_EMOJI;
176         }
177     } else if (emojiBackwardLengthU16 == 1 && emojiBackwardLengthU16 == emojiForwardLengthU16) {
178         // no emoji before index
179         int32_t newStartIndex = index + emojiForwardLengthU16;
180         int32_t forwardLenU16 = GetEmojiLengthU16Forward(u32Content, newStartIndex, u16Content);
181         if (forwardLenU16 > 1) {
182             // forwardLenU16 > 1 means a real emoji is found
183             return EmojiRelation::BEFORE_EMOJI;
184         }
185     }
186     return EmojiRelation::NO_EMOJI;
187 }
188 
IsIndexBeforeOrInEmoji(int32_t index,const std::string & content)189 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::string& content)
190 {
191     int32_t emojiStartIndex;
192     int32_t emojiEndIndex;
193     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
194     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
195         || relation == EmojiRelation::MIDDLE_EMOJI;
196 }
197 
IsIndexAfterOrInEmoji(int32_t index,const std::string & content)198 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::string& content)
199 {
200     int32_t emojiStartIndex;
201     int32_t emojiEndIndex;
202     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
203     return relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
204         || relation == EmojiRelation::MIDDLE_EMOJI;
205 }
206 
IsIndexBeforeOrInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)207 bool TextEmojiProcessor::IsIndexBeforeOrInEmoji(int32_t index, const std::string& content,
208     int32_t& startIndex, int32_t& endIndex)
209 {
210     int32_t emojiStartIndex;
211     int32_t emojiEndIndex;
212     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
213     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::BEFORE_EMOJI
214         || relation == EmojiRelation::MIDDLE_EMOJI) {
215         startIndex = emojiStartIndex;
216         endIndex = emojiEndIndex;
217         return true;
218     }
219     startIndex = index;
220     endIndex = index;
221     return false;
222 }
223 
IsIndexAfterOrInEmoji(int32_t index,const std::string & content,int32_t & startIndex,int32_t & endIndex)224 bool TextEmojiProcessor::IsIndexAfterOrInEmoji(int32_t index, const std::string& content,
225     int32_t& startIndex, int32_t& endIndex)
226 {
227     int32_t emojiStartIndex;
228     int32_t emojiEndIndex;
229     EmojiRelation relation = GetIndexRelationToEmoji(index, content, emojiStartIndex, emojiEndIndex);
230     if (relation == EmojiRelation::IN_EMOJI || relation == EmojiRelation::AFTER_EMOJI
231         || relation == EmojiRelation::MIDDLE_EMOJI) {
232         startIndex = emojiStartIndex;
233         endIndex = emojiEndIndex;
234         return true;
235     }
236     startIndex = index;
237     endIndex = index;
238     return false;
239 }
240 
SubWstring(int32_t index,int32_t length,const std::wstring & content,bool includeHalf)241 std::wstring TextEmojiProcessor::SubWstring(
242     int32_t index, int32_t length, const std::wstring& content, bool includeHalf)
243 {
244     TextEmojiSubStringRange range = CalSubWstringRange(index, length, content, includeHalf);
245     int32_t rangeLength = range.endIndex - range.startIndex;
246     if (rangeLength == 0) {
247         return L"";
248     }
249     return content.substr(range.startIndex, rangeLength);
250 }
251 
CalSubWstringRange(int32_t index,int32_t length,const std::wstring & content,bool includeHalf)252 TextEmojiSubStringRange TextEmojiProcessor::CalSubWstringRange(
253     int32_t index, int32_t length, const std::wstring& content, bool includeHalf)
254 {
255     int32_t startIndex = index;
256     int32_t endIndex = index + length;
257     int32_t emojiStartIndex = index;   // [emojiStartIndex, emojiEndIndex)
258     int32_t emojiEndIndex = index;
259     // need to be converted to string for processing
260     // IsIndexBeforeOrInEmoji and IsIndexAfterOrInEmoji is working for string
261     std::string curStr = StringUtils::ToString(content);
262     // exclude right overflow emoji
263     if (!includeHalf && IsIndexInEmoji(endIndex - 1, curStr, emojiStartIndex, emojiEndIndex) &&
264         emojiEndIndex > index + length) {
265         emojiEndIndex = emojiStartIndex;
266         length = emojiEndIndex - index;
267         length = std::max(length, 0);
268         endIndex = index + length;
269     }
270     // process left emoji
271     if (IsIndexBeforeOrInEmoji(startIndex, curStr, emojiStartIndex, emojiEndIndex)) {
272         if (startIndex != emojiStartIndex && !includeHalf) {
273             startIndex = emojiEndIndex; // exclude current emoji
274         }
275         if (startIndex != emojiStartIndex && includeHalf) {
276             startIndex = emojiStartIndex; // include current emoji
277         }
278     }
279     // process right emoji
280     if (IsIndexAfterOrInEmoji(endIndex, curStr, emojiStartIndex, emojiEndIndex)) {
281         if (endIndex != emojiEndIndex && !includeHalf) {
282             endIndex = emojiStartIndex; // exclude current emoji
283         }
284         if (endIndex != emojiEndIndex && includeHalf) {
285             endIndex = emojiEndIndex; // include current emoji
286         }
287     }
288     TextEmojiSubStringRange result = { startIndex, endIndex };
289     return result;
290 }
291 
ConvertU8stringUnpairedSurrogates(const std::string & value)292 std::string TextEmojiProcessor::ConvertU8stringUnpairedSurrogates(const std::string& value)
293 {
294     // Unpaired surrogates are replaced with U+FFFD
295     icu::UnicodeString ustring = icu::UnicodeString::fromUTF8(value);
296     std::string result;
297     ustring.toUTF8String(result);
298     return result;
299 }
300 
U32ToU16string(const std::u32string & u32str)301 std::u16string TextEmojiProcessor::U32ToU16string(const std::u32string& u32str)
302 {
303     std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> u8ToU16converter;
304     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u32ToU8converter;
305     return u8ToU16converter.from_bytes(u32ToU8converter.to_bytes(u32str));
306 }
307 
GetEmojiLengthBackward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)308 int32_t TextEmojiProcessor::GetEmojiLengthBackward(std::u32string& u32Content,
309     int32_t& startIndex, const std::u16string& u16Content)
310 {
311     if (startIndex <= 0 || startIndex > static_cast<int32_t>(u16Content.length())) {
312         return 0;
313     }
314     do {
315         // U32 string may be failed to tranfer for spliting. Try to enlarge string scope to get transferred u32 string.
316         std::u16string temp = u16Content.substr(0, startIndex);
317         u32Content = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
318         if (static_cast<int32_t>(u32Content.length()) == 0) {
319             ++startIndex;
320         }
321     } while (static_cast<int32_t>(u32Content.length()) == 0 &&
322             startIndex <= static_cast<int32_t>(u16Content.length()));
323     if (u32Content.length() == 0) {
324         TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "GetEmojiLengthBackward u32Content is 0");
325         return 0;
326     }
327     return GetEmojiLengthAtEnd(u32Content, false);
328 }
329 
GetEmojiLengthU16Forward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)330 int32_t TextEmojiProcessor::GetEmojiLengthU16Forward(std::u32string& u32Content,
331     int32_t& startIndex, const std::u16string& u16Content)
332 {
333     int32_t forwardLen = GetEmojiLengthForward(u32Content, startIndex, u16Content);
334     return U32ToU16string(u32Content.substr(0, forwardLen)).length();
335 }
336 
GetEmojiLengthForward(std::u32string & u32Content,int32_t & startIndex,const std::u16string & u16Content)337 int32_t TextEmojiProcessor::GetEmojiLengthForward(std::u32string& u32Content,
338     int32_t& startIndex, const std::u16string& u16Content)
339 {
340     if (startIndex >= static_cast<int32_t>(u16Content.length())) {
341         return 0;
342     }
343     do {
344         // U32 string may be failed to tranfer for spliting. Try to enlarge string scope to get transferred u32 string.
345         std::u16string temp = u16Content.substr(startIndex, u16Content.length() - startIndex);
346         u32Content = StringUtils::ToU32string(StringUtils::Str16ToStr8(temp));
347         if (static_cast<int32_t>(u32Content.length()) == 0) {
348             --startIndex;
349         }
350     } while (static_cast<int32_t>(u32Content.length()) == 0 && startIndex >= 0);
351     if (static_cast<int32_t>(u32Content.length()) == 0) {
352         TAG_LOGD(AceLogTag::ACE_RICH_TEXT, "GetEmojiLengthForward u32Content is 0");
353         return 0;
354     }
355     return GetEmojiLengthAtFront(u32Content, false);
356 }
357 
IsEmojiModifierBase(uint32_t codePoint)358 bool TextEmojiProcessor::IsEmojiModifierBase(uint32_t codePoint)
359 {
360     // according to the https://unicode.org/Public/emoji/4.0/emoji-data.txt
361     // emoji 4.0 removed 0x1F91D(HANDSHAKE) and 0x1F93C(WRESTLERS) from the emoji modifier base
362     // to handle with the compatibility, we need to add them back
363     if (codePoint == 0x1F91D || codePoint == 0x1F93C) {
364         return true;
365     }
366     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER_BASE);
367 }
368 
IsVariationSelector(uint32_t codePoint)369 bool TextEmojiProcessor::IsVariationSelector(uint32_t codePoint)
370 {
371     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_VARIATION_SELECTOR);
372 }
373 
IsRegionalIndicatorSymbol(uint32_t codePoint)374 bool TextEmojiProcessor::IsRegionalIndicatorSymbol(uint32_t codePoint)
375 {
376     return u_hasBinaryProperty(codePoint, UCHAR_REGIONAL_INDICATOR);
377 }
378 
IsEmoji(uint32_t codePoint)379 bool TextEmojiProcessor::IsEmoji(uint32_t codePoint)
380 {
381     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI);
382 }
383 
IsEmojiModifier(uint32_t codePoint)384 bool TextEmojiProcessor::IsEmojiModifier(uint32_t codePoint)
385 {
386     return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_EMOJI_MODIFIER);
387 }
388 
IsTagSpec(uint32_t codePoint)389 bool TextEmojiProcessor::IsTagSpec(uint32_t codePoint)
390 {
391     // according to the https://www.unicode.org/charts/PDF/U0000.pdf
392     // 0xE0020 - 0xE007E are the visible tag specs.
393     // 0xE007F is CANCEL_TAG, not in here.
394     return 0xE0020 <= codePoint && codePoint <= 0xE007E;
395 }
396 
IsKeycapBase(uint32_t codePoint)397 bool TextEmojiProcessor::IsKeycapBase(uint32_t codePoint)
398 {
399     return ('0' <= codePoint && codePoint <= '9') || codePoint == '#' || codePoint == '*';
400 }
401 
OnBeginState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)402 void TextEmojiProcessor::OnBeginState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
403 {
404     deleteCount = 1;
405     if (codePoint == LINE_FEED) {
406         state = STATE_LF;
407     } else if (IsVariationSelector(codePoint)) { // only backward
408         state = STATE_VS;
409     } else if (codePoint == CARRIAGE_RETURN) { // only forward
410         state = STATE_CR;
411     } else if (IsRegionalIndicatorSymbol(codePoint)) {
412         state = isBackward ? STATE_ODD_RIS : STATE_EVEN_RIS;
413     } else if (IsEmojiModifier(codePoint)) {
414         state = STATE_EM;
415     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
416         state = STATE_KEYCAP;
417     } else if (IsEmoji(codePoint)) {
418         state = STATE_EMOJI;
419     } else if (codePoint == CANCEL_TAG) {
420         state = STATE_IN_TAG_QUEUE;
421     } else {
422         state = isBackward ? STATE_FINISHED : STATE_SECOND;
423     }
424 }
425 
OnRISState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)426 void TextEmojiProcessor::OnRISState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
427 {
428     if (isBackward) {
429         switch (state) {
430             case STATE_ODD_RIS:
431                 if (IsRegionalIndicatorSymbol(codePoint)) {
432                     ++deleteCount;
433                     state = STATE_EVEN_RIS;
434                 } else {
435                     state = STATE_FINISHED;
436                 }
437                 break;
438             case STATE_EVEN_RIS:
439                 if (IsRegionalIndicatorSymbol(codePoint)) {
440                     state = STATE_FINISHED;
441                 }
442                 break;
443         }
444     } else {
445         switch (state) {
446             case STATE_ODD_RIS:
447                 state = STATE_FINISHED;
448                 break;
449             case STATE_EVEN_RIS:
450                 if (IsRegionalIndicatorSymbol(codePoint)) {
451                     ++deleteCount;
452                     state = STATE_ODD_RIS;
453                 } else {
454                     state = STATE_FINISHED;
455                 }
456                 break;
457         }
458     }
459 }
460 
OnCRLFState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)461 void TextEmojiProcessor::OnCRLFState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
462 {
463     if (isBackward) {
464         if (codePoint == CARRIAGE_RETURN) {
465             ++deleteCount;
466         }
467         state = STATE_FINISHED;
468     } else {
469         switch (state) {
470             case STATE_CR:
471                 if (codePoint == LINE_FEED) {
472                     ++deleteCount;
473                 }
474                 state = STATE_FINISHED;
475                 break;
476             case STATE_LF:
477                 state = STATE_FINISHED;
478                 break;
479         }
480     }
481 }
482 
OnZWJState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)483 void TextEmojiProcessor::OnZWJState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
484     bool isBackward)
485 {
486     if (isBackward) {
487         switch (state) {
488             case STATE_ZWJ:
489                 if (IsEmoji(codePoint)) {
490                     ++deleteCount; // delete zwj
491                     ++deleteCount; // delete emoji
492                     state = IsEmojiModifier(codePoint) ? STATE_EM : STATE_EMOJI;
493                 } else if (IsVariationSelector(codePoint)) {
494                     lastVSCount = 1;
495                     state = STATE_VS_AND_ZWJ;
496                 } else {
497                     state = STATE_FINISHED;
498                 }
499                 break;
500             case STATE_VS_AND_ZWJ:
501                 if (IsEmoji(codePoint)) {
502                     ++deleteCount; // delete zwj
503                     ++deleteCount; // delete emoji
504                     deleteCount += lastVSCount;
505                     lastVSCount = 0;
506                     state = STATE_EMOJI;
507                 } else {
508                     state = STATE_FINISHED;
509                 }
510                 break;
511         }
512     } else {
513         if (IsEmoji(codePoint)) {
514             ++deleteCount;
515             state = STATE_EMOJI;
516         } else {
517             state = STATE_FINISHED;
518         }
519     }
520 }
521 
OnVSState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)522 void TextEmojiProcessor::OnVSState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
523 {
524     if (isBackward) {
525         if (IsEmoji(codePoint)) {
526             ++deleteCount;
527             state = STATE_EMOJI;
528             return;
529         }
530         if (!IsVariationSelector(codePoint) &&
531             u_getCombiningClass(codePoint) == 0) {
532             ++deleteCount;
533         }
534         state = STATE_FINISHED;
535     } else {
536         if (codePoint == ZERO_WIDTH_JOINER) {
537             ++deleteCount;
538             state = STATE_ZWJ;
539             return;
540         } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
541             ++deleteCount;
542             state = STATE_KEYCAP;
543             return;
544         }
545         state = STATE_FINISHED;
546     }
547 }
548 
OnKeyCapState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)549 void TextEmojiProcessor::OnKeyCapState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
550     bool isBackward)
551 {
552     if (isBackward) {
553         switch (state) {
554             case STATE_KEYCAP:
555                 if (IsVariationSelector(codePoint)) {
556                     lastVSCount = 1;
557                     state = STATE_VS_AND_KEYCAP;
558                     return;
559                 }
560                 if (IsEmojiModifierBase(codePoint)) {
561                     ++deleteCount;
562                     state = STATE_FINISHED;
563                 }
564                 break;
565             case STATE_VS_AND_KEYCAP:
566                 if (IsKeycapBase(codePoint)) {
567                     deleteCount += lastVSCount + 1;
568                 }
569                 state = STATE_FINISHED;
570                 break;
571         }
572     } else {
573         state = STATE_FINISHED;
574     }
575 }
576 
OnEMState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,int32_t & lastVSCount,bool isBackward)577 void TextEmojiProcessor::OnEMState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, int32_t& lastVSCount,
578     bool isBackward)
579 {
580     if (isBackward) {
581         switch (state) {
582             case STATE_EM:
583                 if (IsVariationSelector(codePoint)) {
584                     lastVSCount = 1;
585                     state = STATE_VS_AND_EM;
586                     return;
587                 } else if (IsEmojiModifierBase(codePoint)) {
588                     ++deleteCount;
589                 }
590                 state = STATE_FINISHED;
591                 break;
592             case STATE_VS_AND_EM:
593                 if (IsEmojiModifierBase(codePoint)) {
594                     deleteCount += lastVSCount + 1;
595                 }
596                 state = STATE_FINISHED;
597                 break;
598         }
599     } else {
600         if (IsEmoji(codePoint)) {
601             ++deleteCount;
602             state = STATE_EMOJI;
603             return;
604         } else if (IsVariationSelector(codePoint)) {
605             ++deleteCount;
606             state = STATE_VS;
607             return;
608         } else if (codePoint == ZERO_WIDTH_JOINER) {
609             ++deleteCount;
610             state = STATE_ZWJ;
611             return;
612         } else if (IsEmojiModifierBase(codePoint)) {
613             ++deleteCount;
614         }
615         state = STATE_FINISHED;
616     }
617 }
618 
OnEmojiState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)619 void TextEmojiProcessor::OnEmojiState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
620 {
621     if (isBackward) {
622         if (codePoint == ZERO_WIDTH_JOINER) {
623             state = STATE_ZWJ;
624         } else {
625             state = STATE_FINISHED;
626         }
627     } else {
628         if (codePoint == ZERO_WIDTH_JOINER) {
629             ++deleteCount;
630             state = STATE_ZWJ;
631         } else if (IsVariationSelector(codePoint)) {
632             ++deleteCount;
633             state = STATE_VS;
634         } else if (IsEmojiModifier(codePoint)) {
635             ++deleteCount;
636             state = STATE_EM;
637         } else if (IsTagSpec(codePoint)) {
638             ++deleteCount;
639             state = STATE_IN_TAG_QUEUE;
640         } else {
641             state = STATE_FINISHED;
642         }
643     }
644 }
645 
OnForwardSecondState(uint32_t codePoint,int32_t & state,int32_t & deleteCount)646 void TextEmojiProcessor::OnForwardSecondState(uint32_t codePoint, int32_t& state, int32_t& deleteCount)
647 {
648     if (IsVariationSelector(codePoint)) {
649         ++deleteCount;
650         state = STATE_VS;
651     } else if (codePoint == COMBINING_ENCLOSING_KEYCAP) {
652         ++deleteCount;
653         state = STATE_KEYCAP;
654     } else {
655         state = STATE_FINISHED;
656     }
657 }
658 
OnTagQueueState(uint32_t codePoint,int32_t & state,int32_t & deleteCount,bool isBackward)659 void TextEmojiProcessor::OnTagQueueState(uint32_t codePoint, int32_t& state, int32_t& deleteCount, bool isBackward)
660 {
661     if (isBackward) {
662         if (!IsTagSpec(codePoint)) {
663             state = STATE_FINISHED;
664         }
665         ++deleteCount;
666     } else {
667         if (IsTagSpec(codePoint)) {
668             ++deleteCount;
669         } else if (IsEmoji(codePoint)) {
670             state = STATE_FINISHED;
671         } else if (codePoint == CANCEL_TAG) {
672             ++deleteCount;
673             state = STATE_FINISHED;
674         } else {
675             ++deleteCount;
676             state = STATE_FINISHED;
677         }
678     }
679 }
680 
GetEmojiLengthAtEnd(const std::u32string & u32Content,bool isCountNonEmoji)681 int32_t TextEmojiProcessor::GetEmojiLengthAtEnd(const std::u32string& u32Content, bool isCountNonEmoji)
682 {
683     int32_t deleteCount = 0;
684     int32_t lastVSCount = 0;
685     int32_t state = STATE_BEGIN;
686     int32_t tempOffset = static_cast<int32_t>(u32Content.length()) - 1;
687     do {
688         uint32_t codePoint = u32Content[tempOffset];
689         tempOffset--;
690         switch (state) {
691             case STATE_BEGIN:
692                 OnBeginState(codePoint, state, deleteCount, true);
693                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
694                     // avoid non-emoji
695                     return 0;
696                 }
697                 break;
698             case STATE_LF:
699                 OnCRLFState(codePoint, state, deleteCount, true);
700                 break;
701             case STATE_ODD_RIS:
702             case STATE_EVEN_RIS:
703                 OnRISState(codePoint, state, deleteCount, true);
704                 break;
705             case STATE_KEYCAP:
706             case STATE_VS_AND_KEYCAP:
707                 OnKeyCapState(codePoint, state, deleteCount, lastVSCount, true);
708                 break;
709             case STATE_EM:
710             case STATE_VS_AND_EM:
711                 OnEMState(codePoint, state, deleteCount, lastVSCount, true);
712                 break;
713             case STATE_VS:
714                 OnVSState(codePoint, state, deleteCount, true);
715                 break;
716             case STATE_EMOJI:
717                 OnEmojiState(codePoint, state, deleteCount, true);
718                 break;
719             case STATE_ZWJ:
720             case STATE_VS_AND_ZWJ:
721                 OnZWJState(codePoint, state, deleteCount, lastVSCount, true);
722                 break;
723             case STATE_IN_TAG_QUEUE:
724                 OnTagQueueState(codePoint, state, deleteCount, true);
725                 break;
726             default:
727                 break;
728         }
729     } while (tempOffset >= 0 && state != STATE_FINISHED);
730     return deleteCount;
731 }
732 
BackwardDelete(std::u32string & u32Content)733 bool TextEmojiProcessor::BackwardDelete(std::u32string& u32Content)
734 {
735     int32_t deleteCount = GetEmojiLengthAtEnd(u32Content, true);
736     return HandleDeleteAction(u32Content, deleteCount, true);
737 }
738 
GetEmojiLengthAtFront(const std::u32string & u32Content,bool isCountNonEmoji)739 int32_t TextEmojiProcessor::GetEmojiLengthAtFront(const std::u32string& u32Content, bool isCountNonEmoji)
740 {
741     int32_t deleteCount = 0;
742     int32_t state = STATE_BEGIN;
743     int32_t tempOffset = 0;
744     int32_t u32ContentLength = static_cast<int32_t>(u32Content.length());
745     do {
746         int32_t codePoint = static_cast<int32_t>(u32Content[tempOffset]);
747         tempOffset++;
748         switch (state) {
749             case STATE_BEGIN:
750                 OnBeginState(codePoint, state, deleteCount, false);
751                 if (!isCountNonEmoji && (state == STATE_FINISHED || state == STATE_SECOND)) {
752                     return 0;
753                 }
754                 break;
755             case STATE_SECOND:
756                 OnForwardSecondState(codePoint, state, deleteCount);
757                 break;
758             case STATE_CR:
759             case STATE_LF:
760                 OnCRLFState(codePoint, state, deleteCount, false);
761                 break;
762             case STATE_ODD_RIS:
763             case STATE_EVEN_RIS:
764                 OnRISState(codePoint, state, deleteCount, false);
765                 break;
766             case STATE_KEYCAP:
767                 OnKeyCapState(codePoint, state, deleteCount, deleteCount, false);
768                 // in ForwardDelete, we dont need to care about lastVSCount.
769                 // "Borrowing" deleteCount to lastVSCount, to avoiding the use of std::optional.
770                 // same as above
771                 break;
772             case STATE_EM:
773                 OnEMState(codePoint, state, deleteCount, deleteCount, false);
774                 break;
775             case STATE_VS:
776                 OnVSState(codePoint, state, deleteCount, false);
777                 break;
778             case STATE_EMOJI:
779                 OnEmojiState(codePoint, state, deleteCount, false);
780                 break;
781             case STATE_ZWJ:
782                 OnZWJState(codePoint, state, deleteCount, deleteCount, false);
783                 break;
784             case STATE_IN_TAG_QUEUE:
785                 OnTagQueueState(codePoint, state, deleteCount, false);
786                 break;
787             default:
788                 break;
789         }
790     } while (tempOffset < u32ContentLength && state != STATE_FINISHED);
791     return deleteCount;
792 }
793 
ForwardDelete(std::u32string & u32Content)794 bool TextEmojiProcessor::ForwardDelete(std::u32string& u32Content)
795 {
796     int32_t deleteCount = GetEmojiLengthAtFront(u32Content, true);
797     return HandleDeleteAction(u32Content, deleteCount, false);
798 }
799 
HandleDeleteAction(std::u32string & u32Content,int32_t deleteCount,bool isBackward)800 bool TextEmojiProcessor::HandleDeleteAction(std::u32string& u32Content, int32_t deleteCount, bool isBackward)
801 {
802     int32_t contentLength = static_cast<int32_t>(u32Content.length());
803     deleteCount = std::min(deleteCount, contentLength);
804     if (isBackward) {
805         if (deleteCount > 0) {
806             int32_t start = contentLength - deleteCount;
807             u32Content.erase(start, deleteCount);
808             return true;
809         }
810     } else {
811         if (deleteCount > 0) {
812             u32Content.erase(0, deleteCount);
813             return true;
814         }
815     }
816     return false;
817 }
818 
819 } // namespace OHOS::Ace
820