1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "positive_rule.h"
16 #include "i18n_hilog.h"
17 #include "phonenumbers/phonenumberutil.h"
18 #include "phonenumbers/phonenumber.h"
19 #include "phonenumbers/shortnumberinfo.h"
20
21 namespace OHOS {
22 namespace Global {
23 namespace I18n {
24 using i18n::phonenumbers::PhoneNumber;
25 using i18n::phonenumbers::PhoneNumberUtil;
26 using i18n::phonenumbers::ShortNumberInfo;
PositiveRule(icu::UnicodeString & regex,std::string & handleType,std::string & insensitive)27 PositiveRule::PositiveRule(icu::UnicodeString& regex, std::string& handleType, std::string& insensitive)
28 {
29 this->regex = regex;
30 this->status = U_ZERO_ERROR;
31 this->handleType = handleType;
32 this->insensitive = insensitive;
33 if (regex.length() == 0) {
34 return;
35 }
36 if (U_FAILURE(this->status)) {
37 HILOG_ERROR_I18N("member pattern construct failed.");
38 }
39 }
40
GetPattern()41 icu::RegexPattern* PositiveRule::GetPattern()
42 {
43 // Sets whether regular expression matching is case sensitive
44 if (insensitive == "True") {
45 return icu::RegexPattern::compile(this->regex, URegexpFlag::UREGEX_CASE_INSENSITIVE, this->status);
46 } else {
47 return icu::RegexPattern::compile(this->regex, 0, this->status);
48 }
49 }
50
51 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)52 bool PositiveRule::IsNumberWithOneBracket(icu::UnicodeString& message)
53 {
54 if (message != "") {
55 int numLeft = 0;
56 int numRight = 0;
57 int len = message.length();
58 for (int i = 0; i < len; i++) {
59 if (message[i] == '(' || message[i] == '[') {
60 numLeft++;
61 }
62 if (message[i] == ')' || message[i] == ']') {
63 numRight++;
64 }
65 }
66 if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
67 return true;
68 }
69 }
70 return false;
71 }
72
DealStringWithOneBracket(icu::UnicodeString & message)73 icu::UnicodeString PositiveRule::DealStringWithOneBracket(icu::UnicodeString& message)
74 {
75 if (IsNumberWithOneBracket(message)) {
76 return message.tempSubString(1);
77 }
78 return message;
79 }
80
Handle(PhoneNumberMatch * match,icu::UnicodeString & message)81 std::vector<MatchedNumberInfo> PositiveRule::Handle(PhoneNumberMatch* match, icu::UnicodeString& message)
82 {
83 icu::UnicodeString rawString = match->raw_string().c_str();
84 icu::UnicodeString str = DealStringWithOneBracket(rawString);
85
86 icu::RegexPattern* pattern = this->GetPattern();
87 UErrorCode status = U_ZERO_ERROR;
88 icu::RegexMatcher* mat1 = pattern->matcher(str, status);
89 if (mat1 != nullptr && mat1->find(status)) {
90 std::vector<MatchedNumberInfo> infoList = this->HandleInner(match, message);
91 delete mat1;
92 delete pattern;
93 return infoList;
94 }
95 delete mat1;
96 icu::RegexMatcher* mat2 = pattern->matcher(message, status);
97 if (mat2 != nullptr && mat2->find(status)) {
98 std::vector<MatchedNumberInfo> infoList = this->HandleInner(match, message);
99 delete mat2;
100 delete pattern;
101 return infoList;
102 }
103 delete mat2;
104 delete pattern;
105 return {};
106 }
107
HandleInner(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)108 std::vector<MatchedNumberInfo> PositiveRule::HandleInner(PhoneNumberMatch *possibleNumber, icu::UnicodeString& message)
109 {
110 if (handleType == "Operator") {
111 return HandleOperator(possibleNumber, message);
112 } else if (handleType == "Blank") {
113 return HandleBlank(possibleNumber, message);
114 } else if (handleType == "Slant") {
115 return HandleSlant(possibleNumber, message);
116 } else if (handleType == "StartWithMobile") {
117 return HandleStartWithMobile(possibleNumber, message);
118 } else if (handleType == "EndWithMobile") {
119 return HandleEndWithMobile(possibleNumber, message);
120 }
121 return HandleDefault(possibleNumber, message);
122 }
123
HandleDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)124 std::vector<MatchedNumberInfo> PositiveRule::HandleDefault(PhoneNumberMatch* possibleNumber,
125 icu::UnicodeString& message)
126 {
127 MatchedNumberInfo matcher;
128 matcher.SetBegin(0);
129 matcher.SetEnd(1);
130 icu::UnicodeString content = "";
131 matcher.SetContent(content);
132 std::vector<MatchedNumberInfo> matchedNumberInfoList;
133 matchedNumberInfoList.push_back(matcher);
134 return matchedNumberInfoList;
135 }
136
HandleOperator(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)137 std::vector<MatchedNumberInfo> PositiveRule::HandleOperator(PhoneNumberMatch* possibleNumber,
138 icu::UnicodeString& message)
139 {
140 MatchedNumberInfo matcher;
141 if (possibleNumber->raw_string()[0] == '(' || possibleNumber->raw_string()[0] == '[') {
142 matcher.SetBegin(possibleNumber->start() + 1);
143 } else {
144 matcher.SetBegin(possibleNumber->start());
145 }
146 matcher.SetEnd(possibleNumber->end());
147 matcher.SetContent(message);
148 std::vector<MatchedNumberInfo> matchedNumberInfoList;
149 matchedNumberInfoList.push_back(matcher);
150 return matchedNumberInfoList;
151 }
152
HandleBlank(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)153 std::vector<MatchedNumberInfo> PositiveRule::HandleBlank(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
154 {
155 // exclude phone number 5201314
156 icu::UnicodeString speString = "5201314";
157 MatchedNumberInfo matchedNumberInfo;
158 icu::UnicodeString number = possibleNumber->raw_string().c_str();
159 icu::RegexPattern* pattern = GetPattern();
160 UErrorCode status;
161 icu::RegexMatcher* matcher = pattern->matcher(number, status);
162 UErrorCode negativeStatus = U_ZERO_ERROR;
163 // exclude phone number 2333333
164 icu::UnicodeString negativeRegex = "(?<![-\\d])(23{6,7})(?![-\\d])";
165 icu::RegexMatcher negativePattern(negativeRegex, 0, negativeStatus);
166 negativePattern.reset(number);
167 std::vector<MatchedNumberInfo> matchedNumberInfoList;
168 if (matcher != nullptr && matcher->find()) {
169 if (negativePattern.find() || number == speString) {
170 return matchedNumberInfoList;
171 }
172 if (possibleNumber->raw_string()[0] != '(' && possibleNumber->raw_string()[0] != '[') {
173 matchedNumberInfo.SetBegin(matcher->start(status) + possibleNumber->start());
174 } else {
175 matchedNumberInfo.SetBegin(possibleNumber->start());
176 }
177 matchedNumberInfo.SetEnd(matcher->end(status) + possibleNumber->start());
178 matchedNumberInfo.SetContent(number);
179 matchedNumberInfoList.push_back(matchedNumberInfo);
180 }
181 delete matcher;
182 delete pattern;
183 return matchedNumberInfoList;
184 }
185
HandleSlant(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)186 std::vector<MatchedNumberInfo> PositiveRule::HandleSlant(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
187 {
188 MatchedNumberInfo matchedNumberInfo;
189 MatchedNumberInfo numberInfo;
190 icu::UnicodeString number = possibleNumber->raw_string().c_str();
191 icu::RegexPattern* pattern = GetPattern();
192 UErrorCode status;
193 icu::RegexMatcher* matcher = pattern->matcher(number, status);
194 std::vector<MatchedNumberInfo> matchedNumberInfoList;
195 if (matcher != nullptr && matcher->find()) {
196 int start = matcher->start(status);
197 std::vector<MatchedNumberInfo> tempList = GetNumbersWithSlant(number);
198 // 2 is the size of tempList.
199 if (tempList.size() == 2 && start == 1) {
200 start = 0;
201 }
202 if (tempList.size() > 0) {
203 matchedNumberInfo.SetBegin(tempList[0].GetBegin() + start + possibleNumber->start());
204 matchedNumberInfo.SetEnd(tempList[0].GetEnd() + possibleNumber->start());
205 icu::UnicodeString contentFirst = tempList[0].GetContent();
206 matchedNumberInfo.SetContent(contentFirst);
207 matchedNumberInfoList.push_back(matchedNumberInfo);
208 // 2 is the size of tempList.
209 if (tempList.size() == 2) {
210 numberInfo.SetBegin(tempList[1].GetBegin() + start + possibleNumber->start());
211 numberInfo.SetEnd(tempList[1].GetEnd() + possibleNumber->start());
212 icu::UnicodeString contentSecond = tempList[1].GetContent();
213 numberInfo.SetContent(contentSecond);
214 matchedNumberInfoList.push_back(numberInfo);
215 }
216 }
217 }
218 delete matcher;
219 delete pattern;
220 return matchedNumberInfoList;
221 }
222
HandleStartWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)223 std::vector<MatchedNumberInfo> PositiveRule::HandleStartWithMobile(PhoneNumberMatch* possibleNumber,
224 icu::UnicodeString& message)
225 {
226 return HandlePossibleNumberWithPattern(possibleNumber, message, false);
227 }
228
HandleEndWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)229 std::vector<MatchedNumberInfo> PositiveRule::HandleEndWithMobile(PhoneNumberMatch* possibleNumber,
230 icu::UnicodeString& message)
231 {
232 return HandlePossibleNumberWithPattern(possibleNumber, message, true);
233 }
234
235 // identify short number separated by '/'
GetNumbersWithSlant(icu::UnicodeString & testStr)236 std::vector<MatchedNumberInfo> PositiveRule::GetNumbersWithSlant(icu::UnicodeString& testStr)
237 {
238 std::vector<MatchedNumberInfo> shortList;
239 PhoneNumberUtil* pnu = PhoneNumberUtil::GetInstance();
240 ShortNumberInfo* shortInfo = new (std::nothrow) ShortNumberInfo();
241 if (shortInfo == nullptr) {
242 HILOG_ERROR_I18N("ShortNumberInfo construct failed.");
243 return shortList;
244 }
245 std::string numberFisrt = "";
246 std::string numberEnd = "";
247 int slantIndex = 0;
248 for (int i = 0; i < testStr.length(); i++) {
249 if (testStr[i] == '/' || testStr[i] == '|') {
250 slantIndex = i;
251 testStr.tempSubString(0, i).toUTF8String(numberFisrt);
252 testStr.tempSubString(i + 1).toUTF8String(numberEnd);
253 }
254 }
255 PhoneNumber phoneNumberFirst;
256 PhoneNumber phoneNumberEnd;
257 pnu->Parse(numberFisrt, "CN", &phoneNumberFirst);
258 pnu->Parse(numberEnd, "CN", &phoneNumberEnd);
259 if (shortInfo->IsValidShortNumber(phoneNumberFirst)) {
260 MatchedNumberInfo matchedNumberInfoFirst;
261 matchedNumberInfoFirst.SetBegin(0);
262 matchedNumberInfoFirst.SetEnd(slantIndex);
263 icu::UnicodeString contentFirst = numberFisrt.c_str();
264 matchedNumberInfoFirst.SetContent(contentFirst);
265 shortList.push_back(matchedNumberInfoFirst);
266 }
267 if (shortInfo->IsValidShortNumber(phoneNumberEnd)) {
268 MatchedNumberInfo matchedNumberInfoEnd;
269 matchedNumberInfoEnd.SetBegin(slantIndex + 1);
270 matchedNumberInfoEnd.SetEnd(testStr.length());
271 icu::UnicodeString contentEnd = numberEnd.c_str();
272 matchedNumberInfoEnd.SetContent(contentEnd);
273 shortList.push_back(matchedNumberInfoEnd);
274 }
275 delete shortInfo;
276 return shortList;
277 }
278
HandlePossibleNumberWithPattern(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message,bool isStartsWithNumber)279 std::vector<MatchedNumberInfo> PositiveRule::HandlePossibleNumberWithPattern(PhoneNumberMatch* possibleNumber,
280 icu::UnicodeString& message, bool isStartsWithNumber)
281 {
282 UErrorCode status = U_ZERO_ERROR;
283 std::vector<MatchedNumberInfo> matchedList;
284 icu::UnicodeString possible = possibleNumber->raw_string().c_str();
285 icu::RegexPattern* pattern = GetPattern();
286 icu::RegexMatcher* mat = pattern->matcher(message, status);
287 while (mat != nullptr && mat->find(status)) {
288 int start = mat->start(status);
289 int end = mat->end(status);
290 icu::UnicodeString matched = message.tempSubString(start, end - start);
291 bool isMatch = isStartsWithNumber ? matched.startsWith(possible) : matched.endsWith(possible);
292 if (isMatch) {
293 MatchedNumberInfo info;
294 info.SetBegin(isStartsWithNumber ? start : end - possible.length());
295 info.SetEnd(isStartsWithNumber ? (start + possible.length()) : end);
296 info.SetContent(possible);
297 matchedList.push_back(info);
298 }
299 }
300 delete mat;
301 delete pattern;
302 return matchedList;
303 }
304 } // namespace I18n
305 } // namespace Global
306 } // namespace OHOS