1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34 phoneNumberRule = new PhoneNumberRule(country);
35 phoneNumberUtil = PhoneNumberUtil::GetInstance();
36 shortNumberInfo = new ShortNumberInfo();
37 this->country = country;
38 if (phoneNumberRule != nullptr) {
39 phoneNumberRule->Init();
40 }
41 }
42
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45 delete phoneNumberRule;
46 delete shortNumberInfo;
47 }
48
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51 icu::UnicodeString messageStr = message;
52 if (!phoneNumberRule->isFixed) {
53 return DealWithoutFixed(messageStr, country);
54 }
55 icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56 std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57 messageStr, filteredString);
58 std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59 if (shortList.size() != 0) {
60 matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61 }
62 matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63 for (auto& matchedNumberInfo : matchedNumberInfoList) {
64 DealNumberWithOneBracket(matchedNumberInfo);
65 }
66 return DealResult(matchedNumberInfoList);
67 }
68
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72 std::vector<MatchedNumberInfo> result;
73 for (auto& match : matchList) {
74 if (match == nullptr) continue;
75 MatchedNumberInfo info;
76 icu::UnicodeString content = match->raw_string().c_str();
77 PhoneNumber phoneNumber = match->number();
78 if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
79 info.SetBegin(match->start());
80 info.SetEnd(match->end());
81 info.SetContent(content);
82 result.push_back(info);
83 }
84 delete match;
85 }
86 std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
87 // Merge result
88 if (shortResult.size() != 0) {
89 result.insert(result.end(), shortResult.begin(), shortResult.end());
90 }
91 result = DeleteRepeatedInfo(result);
92 for (auto& res: result) {
93 DealNumberWithOneBracket(res);
94 }
95 return DealResult(result);
96 }
97
98 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)99 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
100 {
101 std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
102 icu::UnicodeString ret = src;
103 for (NegativeRule* rule : rules) {
104 if (rule == nullptr) {
105 continue;
106 }
107 ret = rule->Handle(ret);
108 }
109 return ret;
110 }
111
112 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)113 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
114 {
115 if (start < end) {
116 int len = chs.length();
117 for (int i = 0; i < len; i++) {
118 if (i >= start && i < end) {
119 chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
120 }
121 }
122 }
123 }
124
125 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)126 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
127 icu::UnicodeString& message)
128 {
129 std::vector<MatchedNumberInfo> matchedNumberInfoList;
130 UErrorCode status = U_ZERO_ERROR;
131 size_t pos = phoneNumberRule->GetFindRules().size();
132 if (pos == 0) {
133 HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
134 return matchedNumberInfoList;
135 }
136 // 1 indicates the last position.
137 FindRule* shortRegexRule = phoneNumberRule->GetFindRules()[pos - 1];
138 icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
139 if (shortPattern == nullptr) {
140 HILOG_ERROR_I18N("shortPattern getPattern failed.");
141 return matchedNumberInfoList;
142 }
143 icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
144 if (shortMatch == nullptr) {
145 delete shortPattern;
146 HILOG_ERROR_I18N("shortPattern matcher failed.");
147 return matchedNumberInfoList;
148 }
149 while (shortMatch->find(status)) {
150 icu::UnicodeString numberToParse = shortMatch->group(status);
151 std::string stringParse;
152 numberToParse.toUTF8String(stringParse);
153 PhoneNumber phoneNumber;
154 PhoneNumberUtil::ErrorType errorType =
155 phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
156 if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
157 HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
158 continue;
159 }
160 // Add the valid short number to the result
161 if (shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
162 MatchedNumberInfo matcher;
163 matcher.SetBegin(shortMatch->start(status));
164 matcher.SetEnd(shortMatch->end(status));
165 icu::UnicodeString stringShort = shortMatch->group(status);
166 matcher.SetContent(stringShort);
167 matchedNumberInfoList.push_back(matcher);
168 }
169 }
170 delete shortMatch;
171 delete shortPattern;
172 return matchedNumberInfoList;
173 }
174
175 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)176 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
177 std::string& country)
178 {
179 PhoneNumber phoneNumber;
180 int lenNumber = 5;
181 icu::UnicodeString uNumber = number.c_str();
182 if (RegexRule::CountDigits(uNumber) < lenNumber) {
183 return false;
184 }
185 PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
186 if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
187 return false;
188 }
189 UChar32 space = ' ';
190 UChar32 slash = '/';
191 // Add to matchList if phone number is not delimited, or valid
192 if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
193 phoneNumberUtil->IsValidNumber(phoneNumber)) {
194 PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
195 matchList.push_back(match);
196 return true;
197 }
198 return false;
199 }
200
201 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)202 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
203 icu::UnicodeString& filteredString)
204 {
205 std::vector<PhoneNumberMatch*> matchList;
206 UErrorCode status = U_ZERO_ERROR;
207 size_t pos = phoneNumberRule->GetFindRules().size();
208 // 2 indicates the penultimate position.
209 FindRule* numberRegexRule = phoneNumberRule->GetFindRules()[pos - 2];
210 icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
211 if (numberPattern == nullptr) {
212 HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
213 return matchList;
214 }
215 icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
216 if (numberMatcher == nullptr) {
217 HILOG_ERROR_I18N("numberPattern matcher failed.");
218 delete numberPattern;
219 return matchList;
220 }
221 while (numberMatcher->find(status)) {
222 int32_t start = numberMatcher->start(status);
223 int32_t end = numberMatcher->end(status);
224 icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
225 std::string number;
226 uNumber.toUTF8String(number);
227 // if the entire phone number is invalid, identify each segment that is separated
228 if (!AddPhoneNumber(number, start, matchList, country)) {
229 int searchStart = 0;
230 UChar32 space = ' ';
231 UChar32 slash = '/';
232 if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
233 continue;
234 }
235 while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
236 int phoneStart = searchStart;
237 int indexSpace = uNumber.indexOf(space, searchStart);
238 int indexSlash = uNumber.indexOf(slash, searchStart);
239 int phoneEnd =
240 (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
241 searchStart = phoneEnd + 1;
242 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
243 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
244 }
245 // identify the last segment
246 std::string lastStr = number.substr(searchStart);
247 AddPhoneNumber(lastStr, searchStart + start, matchList, country);
248 }
249 }
250 delete numberMatcher;
251 delete numberPattern;
252 return matchList;
253 }
254
255 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)256 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
257 MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
258 {
259 bool flag = false;
260 // 3 is the length of findRules.
261 size_t length = 3;
262 if (phoneNumberRule->GetFindRules().size() == length) {
263 UErrorCode status = U_ZERO_ERROR;
264 FindRule* regexRule = phoneNumberRule->GetFindRules()[0];
265 icu::RegexPattern* pattern = regexRule->GetPattern();
266 if (pattern == nullptr) {
267 HILOG_ERROR_I18N("regexRule getPattern failed.");
268 return flag;
269 }
270 icu::UnicodeString str = "";
271 if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
272 str = info.GetContent().tempSubString(1);
273 } else {
274 str = info.GetContent();
275 }
276 icu::RegexMatcher* matcher = pattern->matcher(str, status);
277 if (matcher == nullptr) {
278 delete pattern;
279 HILOG_ERROR_I18N("pattern matcher failed.");
280 return flag;
281 }
282 if (!matcher->find(status)) {
283 result.push_back(info);
284 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
285 flag = true;
286 }
287 delete matcher;
288 delete pattern;
289 } else {
290 result.push_back(info);
291 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
292 }
293 return flag;
294 }
295
296 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)297 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
298 icu::UnicodeString& src, icu::UnicodeString& filteredString)
299 {
300 std::vector<MatchedNumberInfo> result;
301 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
302 for (auto match : matchList) {
303 if (!HandleBorderRule(match, filteredString)) {
304 continue;
305 }
306 PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
307 if (delMatch == nullptr) {
308 continue;
309 }
310 icu::UnicodeString content = delMatch->raw_string().c_str();
311 int contentStart = delMatch->start();
312 int contentEnd = delMatch->end();
313 std::pair<int, int> pos{contentStart, contentEnd};
314 if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
315 MatchedNumberInfo info;
316 info.SetBegin(contentStart);
317 info.SetEnd(contentEnd);
318 info.SetContent(content);
319 bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
320 if (flag) {
321 continue;
322 }
323 }
324 std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
325 if (posList.size() != 0) {
326 for (auto& matchInfo : posList) {
327 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
328 }
329 result.insert(result.end(), posList.begin(), posList.end());
330 }
331 delete match;
332 }
333 return result;
334 }
335
336 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)337 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
338 {
339 std::set<MatchedNumberInfo> set;
340 std::vector<MatchedNumberInfo> ret;
341 for (auto info : list) {
342 if (set.find(info) == set.end()) {
343 ret.push_back(info);
344 }
345 set.insert(info);
346 }
347 return ret;
348 }
349
350 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)351 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
352 {
353 icu::UnicodeString message = info.GetContent();
354 if (IsNumberWithOneBracket(message)) {
355 info.SetBegin(info.GetBegin() + 1);
356 icu::UnicodeString content = info.GetContent().tempSubString(1);
357 info.SetContent(content);
358 }
359 }
360
361 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)362 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
363 {
364 if (message != "") {
365 int numLeft = 0;
366 int numRight = 0;
367 int len = message.length();
368 for (int i = 0; i < len; i++) {
369 if (message[i] == '(' || message[i] == '[') {
370 numLeft++;
371 }
372 if (message[i] == ')' || message[i] == ']') {
373 numRight++;
374 }
375 }
376 if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
377 return true;
378 }
379 }
380 return false;
381 }
382
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)383 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
384 {
385 std::vector<int> result;
386 size_t length = matchedNumberInfoList.size();
387 if (length == 0) {
388 result.push_back(0);
389 } else {
390 size_t posNumber = 2;
391 size_t posStart = 1;
392 size_t posEnd = 2;
393 result.resize(posNumber * length + 1);
394 result[0] = static_cast<int>(length);
395 for (size_t i = 0; i < length; i++) {
396 result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
397 result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
398 }
399 }
400 return result;
401 }
402
403 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)404 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
405 {
406 if (match == nullptr) {
407 return false;
408 }
409 std::vector<BorderRule*> rules = phoneNumberRule->GetBorderRules();
410 if (rules.size() == 0) {
411 return true;
412 }
413 for (BorderRule* rule : rules) {
414 if (!rule->Handle(match, message)) {
415 return false;
416 }
417 }
418 return true;
419 }
420
421 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)422 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
423 {
424 PhoneNumberMatch* match = phoneNumberMatch;
425 std::vector<CodeRule*> rules = phoneNumberRule->GetCodesRules();
426 if (rules.size() == 0) {
427 return nullptr;
428 }
429 for (CodeRule* rule : rules) {
430 match = rule->Handle(match, message);
431 }
432 return match;
433 }
434
435 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)436 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
437 icu::UnicodeString& message)
438 {
439 std::vector<MatchedNumberInfo> infoList;
440 std::vector<PositiveRule*> rules = phoneNumberRule->GetPositiveRules();
441 for (PositiveRule* rule : rules) {
442 infoList = rule->Handle(match, message);
443 if (infoList.size() != 0) {
444 break;
445 }
446 }
447 return infoList;
448 }
449
DealStringWithOneBracket(icu::UnicodeString & message)450 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
451 {
452 if (IsNumberWithOneBracket(message)) {
453 return message.tempSubString(1);
454 }
455 return message;
456 }
457 } // namespace I18n
458 } // namespace Global
459 } // namespace OHOS