1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "character.h"
16
17 #include <sys/types.h>
18 #include <set>
19 #include <string>
20
21 #include "cctype"
22 #include "map"
23 #include "string"
24 #include "unicode/umachine.h"
25 #include "unicode/unistr.h"
26 #include "unicode/urename.h"
27
28 namespace OHOS {
29 namespace Global {
30 namespace I18n {
31 static std::set<UCharDirection> RTLDirectionSet = {
32 U_RIGHT_TO_LEFT,
33 U_RIGHT_TO_LEFT_ARABIC,
34 U_RIGHT_TO_LEFT_EMBEDDING,
35 U_RIGHT_TO_LEFT_OVERRIDE,
36 U_RIGHT_TO_LEFT_ISOLATE
37 };
38
IsDigit(const std::string & character)39 bool IsDigit(const std::string &character)
40 {
41 icu::UnicodeString unicodeString(character.c_str());
42 UChar32 char32 = unicodeString.char32At(0);
43 return u_isdigit(char32);
44 }
45
IsSpaceChar(const std::string & character)46 bool IsSpaceChar(const std::string &character)
47 {
48 icu::UnicodeString unicodeString(character.c_str());
49 UChar32 char32 = unicodeString.char32At(0);
50 return u_isJavaSpaceChar(char32);
51 }
52
IsWhiteSpace(const std::string & character)53 bool IsWhiteSpace(const std::string &character)
54 {
55 icu::UnicodeString unicodeString(character.c_str());
56 UChar32 char32 = unicodeString.char32At(0);
57 return u_isWhitespace(char32);
58 }
59
IsRTLCharacter(const std::string & character)60 bool IsRTLCharacter(const std::string &character)
61 {
62 icu::UnicodeString unicodeString(character.c_str());
63 UChar32 char32 = unicodeString.char32At(0);
64 UCharDirection direction = u_charDirection(char32);
65 if (RTLDirectionSet.find(direction) != RTLDirectionSet.end()) {
66 return true;
67 }
68 return false;
69 }
70
IsIdeoGraphic(const std::string & character)71 bool IsIdeoGraphic(const std::string &character)
72 {
73 icu::UnicodeString unicodeString(character.c_str());
74 UChar32 char32 = unicodeString.char32At(0);
75 return u_hasBinaryProperty(char32, UCHAR_IDEOGRAPHIC);
76 }
77
IsLetter(const std::string & character)78 bool IsLetter(const std::string &character)
79 {
80 icu::UnicodeString unicodeString(character.c_str());
81 UChar32 char32 = unicodeString.char32At(0);
82 return isalpha(char32);
83 }
84
IsLowerCase(const std::string & character)85 bool IsLowerCase(const std::string &character)
86 {
87 icu::UnicodeString unicodeString(character.c_str());
88 UChar32 char32 = unicodeString.char32At(0);
89 return u_islower(char32);
90 }
91
IsUpperCase(const std::string & character)92 bool IsUpperCase(const std::string &character)
93 {
94 icu::UnicodeString unicodeString(character.c_str());
95 UChar32 char32 = unicodeString.char32At(0);
96 return u_isupper(char32);
97 }
98
99 std::map<UCharCategory, std::string> categoryMap = {
100 { U_UNASSIGNED, "U_UNASSIGNED" },
101 { U_GENERAL_OTHER_TYPES, "U_GENERAL_OTHER_TYPES" },
102 { U_UPPERCASE_LETTER, "U_UPPERCASE_LETTER" },
103 { U_LOWERCASE_LETTER, "U_LOWERCASE_LETTER" },
104 { U_TITLECASE_LETTER, "U_TITLECASE_LETTER" },
105 { U_MODIFIER_LETTER, "U_MODIFIER_LETTER" },
106 { U_OTHER_LETTER, "U_OTHER_LETTER" },
107 { U_NON_SPACING_MARK, "U_NON_SPACING_MARK" },
108 { U_ENCLOSING_MARK, "U_ENCLOSING_MARK" },
109 { U_COMBINING_SPACING_MARK, "U_COMBINING_SPACING_MARK" },
110 { U_DECIMAL_DIGIT_NUMBER, "U_DECIMAL_DIGIT_NUMBER" },
111 { U_LETTER_NUMBER, "U_LETTER_NUMBER" },
112 { U_OTHER_NUMBER, "U_OTHER_NUMBER" },
113 { U_SPACE_SEPARATOR, "U_SPACE_SEPARATOR" },
114 { U_LINE_SEPARATOR, "U_LINE_SEPARATOR" },
115 { U_PARAGRAPH_SEPARATOR, "U_PARAGRAPH_SEPARATOR" },
116 { U_CONTROL_CHAR, "U_CONTROL_CHAR" },
117 { U_FORMAT_CHAR, "U_FORMAT_CHAR" },
118 { U_PRIVATE_USE_CHAR, "U_PRIVATE_USE_CHAR" },
119 { U_SURROGATE, "U_SURROGATE" },
120 { U_DASH_PUNCTUATION, "U_DASH_PUNCTUATION" },
121 { U_START_PUNCTUATION, "U_START_PUNCTUATION" },
122 { U_END_PUNCTUATION, "U_END_PUNCTUATION" },
123 { U_CONNECTOR_PUNCTUATION, "U_CONNECTOR_PUNCTUATION" },
124 { U_OTHER_PUNCTUATION, "U_OTHER_PUNCTUATION" },
125 { U_MATH_SYMBOL, "U_MATH_SYMBOL" },
126 { U_CURRENCY_SYMBOL, "U_CURRENCY_SYMBOL" },
127 { U_MODIFIER_SYMBOL, "U_MODIFIER_SYMBOL" },
128 { U_OTHER_SYMBOL, "U_OTHER_SYMBOL" },
129 { U_INITIAL_PUNCTUATION, "U_INITIAL_PUNCTUATION" },
130 { U_FINAL_PUNCTUATION, "U_FINAL_PUNCTUATION" },
131 { U_CHAR_CATEGORY_COUNT, "U_CHAR_CATEGORY_COUNT" },
132 };
133
GetType(const std::string & character)134 std::string GetType(const std::string &character)
135 {
136 icu::UnicodeString unicodeString(character.c_str());
137 UChar32 char32 = unicodeString.char32At(0);
138 int8_t category = u_charType(char32);
139 return categoryMap[UCharCategory(category)];
140 }
141 } // namespace I18n
142 } // namespace Global
143 } // namespace OHOS