1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22 
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27 
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31 
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34     phoneNumberRule = new PhoneNumberRule(country);
35     phoneNumberUtil = PhoneNumberUtil::GetInstance();
36     shortNumberInfo = new ShortNumberInfo();
37     this->country = country;
38     if (phoneNumberRule != nullptr) {
39         phoneNumberRule->Init();
40     }
41 }
42 
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45     delete phoneNumberRule;
46     delete shortNumberInfo;
47 }
48 
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51     icu::UnicodeString messageStr = message;
52     if (!phoneNumberRule->isFixed) {
53         return DealWithoutFixed(messageStr, country);
54     }
55     icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56     std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57         messageStr, filteredString);
58     std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59     if (shortList.size() != 0) {
60         matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61     }
62     matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63     for (auto& matchedNumberInfo : matchedNumberInfoList) {
64         DealNumberWithOneBracket(matchedNumberInfo);
65     }
66     return DealResult(matchedNumberInfoList);
67 }
68 
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72     std::vector<MatchedNumberInfo> result;
73     for (auto& match : matchList) {
74         if (match == nullptr) continue;
75         MatchedNumberInfo info;
76         icu::UnicodeString content = match->raw_string().c_str();
77         PhoneNumber phoneNumber = match->number();
78         if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
79             info.SetBegin(match->start());
80             info.SetEnd(match->end());
81             info.SetContent(content);
82             result.push_back(info);
83         }
84         delete match;
85     }
86     std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
87     // Merge result
88     if (shortResult.size() != 0) {
89         result.insert(result.end(), shortResult.begin(), shortResult.end());
90     }
91     result = DeleteRepeatedInfo(result);
92     for (auto& res: result) {
93         DealNumberWithOneBracket(res);
94     }
95     return DealResult(result);
96 }
97 
98 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)99 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
100 {
101     std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
102     icu::UnicodeString ret = src;
103     for (NegativeRule* rule : rules) {
104         if (rule == nullptr) {
105             continue;
106         }
107         ret = rule->Handle(ret);
108     }
109     return ret;
110 }
111 
112 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)113 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
114 {
115     if (start < end) {
116         int len = chs.length();
117         for (int i = 0; i < len; i++) {
118             if (i >= start && i < end) {
119                 chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
120             }
121         }
122     }
123 }
124 
125 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)126 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
127     icu::UnicodeString& message)
128 {
129     std::vector<MatchedNumberInfo> matchedNumberInfoList;
130     UErrorCode status = U_ZERO_ERROR;
131     size_t pos = phoneNumberRule->GetFindRules().size();
132     if (pos == 0) {
133         HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
134         return matchedNumberInfoList;
135     }
136     // 1 indicates the last position.
137     FindRule* shortRegexRule = phoneNumberRule->GetFindRules()[pos - 1];
138     icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
139     if (shortPattern == nullptr) {
140         HILOG_ERROR_I18N("shortPattern getPattern failed.");
141         return matchedNumberInfoList;
142     }
143     icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
144     if (shortMatch == nullptr) {
145         delete shortPattern;
146         HILOG_ERROR_I18N("shortPattern matcher failed.");
147         return matchedNumberInfoList;
148     }
149     while (shortMatch->find(status)) {
150         icu::UnicodeString numberToParse = shortMatch->group(status);
151         std::string stringParse;
152         numberToParse.toUTF8String(stringParse);
153         PhoneNumber phoneNumber;
154         PhoneNumberUtil::ErrorType errorType =
155             phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
156         if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
157             HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
158             continue;
159         }
160         // Add the valid short number to the result
161         if (shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
162             MatchedNumberInfo matcher;
163             matcher.SetBegin(shortMatch->start(status));
164             matcher.SetEnd(shortMatch->end(status));
165             icu::UnicodeString stringShort = shortMatch->group(status);
166             matcher.SetContent(stringShort);
167             matchedNumberInfoList.push_back(matcher);
168         }
169     }
170     delete shortMatch;
171     delete shortPattern;
172     return matchedNumberInfoList;
173 }
174 
175 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)176 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
177     std::string& country)
178 {
179     PhoneNumber phoneNumber;
180     int lenNumber = 5;
181     icu::UnicodeString uNumber = number.c_str();
182     if (RegexRule::CountDigits(uNumber) < lenNumber) {
183         return false;
184     }
185     PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
186     if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
187         return false;
188     }
189     UChar32 space = ' ';
190     UChar32 slash = '/';
191     // Add to matchList if phone number is not delimited, or valid
192     if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
193         phoneNumberUtil->IsValidNumber(phoneNumber)) {
194         PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
195         matchList.push_back(match);
196         return true;
197     }
198     return false;
199 }
200 
201 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)202 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
203     icu::UnicodeString& filteredString)
204 {
205     std::vector<PhoneNumberMatch*> matchList;
206     UErrorCode status = U_ZERO_ERROR;
207     size_t pos = phoneNumberRule->GetFindRules().size();
208     // 2 indicates the penultimate position.
209     FindRule* numberRegexRule = phoneNumberRule->GetFindRules()[pos - 2];
210     icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
211     if (numberPattern == nullptr) {
212         HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
213         return matchList;
214     }
215     icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
216     if (numberMatcher == nullptr) {
217         HILOG_ERROR_I18N("numberPattern matcher failed.");
218         delete numberPattern;
219         return matchList;
220     }
221     while (numberMatcher->find(status)) {
222         int32_t start = numberMatcher->start(status);
223         int32_t end = numberMatcher->end(status);
224         icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
225         std::string number;
226         uNumber.toUTF8String(number);
227         // if the entire phone number is invalid, identify each segment that is separated
228         if (!AddPhoneNumber(number, start, matchList, country)) {
229             int searchStart = 0;
230             UChar32 space = ' ';
231             UChar32 slash = '/';
232             if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
233                 continue;
234             }
235             while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
236                 int phoneStart = searchStart;
237                 int indexSpace = uNumber.indexOf(space, searchStart);
238                 int indexSlash = uNumber.indexOf(slash, searchStart);
239                 int phoneEnd =
240                     (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
241                 searchStart = phoneEnd + 1;
242                 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
243                 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
244             }
245             // identify the last segment
246             std::string lastStr = number.substr(searchStart);
247             AddPhoneNumber(lastStr, searchStart + start, matchList, country);
248         }
249     }
250     delete numberMatcher;
251     delete numberPattern;
252     return matchList;
253 }
254 
255 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)256 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
257     MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
258 {
259     bool flag = false;
260     // 3 is the length of findRules.
261     size_t length = 3;
262     if (phoneNumberRule->GetFindRules().size() == length) {
263         UErrorCode status = U_ZERO_ERROR;
264         FindRule* regexRule = phoneNumberRule->GetFindRules()[0];
265         icu::RegexPattern* pattern = regexRule->GetPattern();
266         if (pattern == nullptr) {
267             HILOG_ERROR_I18N("regexRule getPattern failed.");
268             return flag;
269         }
270         icu::UnicodeString str = "";
271         if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
272             str = info.GetContent().tempSubString(1);
273         } else {
274             str = info.GetContent();
275         }
276         icu::RegexMatcher* matcher = pattern->matcher(str, status);
277         if (matcher == nullptr) {
278             delete pattern;
279             HILOG_ERROR_I18N("pattern matcher failed.");
280             return flag;
281         }
282         if (!matcher->find(status)) {
283             result.push_back(info);
284             ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
285             flag = true;
286         }
287         delete matcher;
288         delete pattern;
289     } else {
290         result.push_back(info);
291         ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
292     }
293     return flag;
294 }
295 
296 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)297 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
298     icu::UnicodeString& src, icu::UnicodeString& filteredString)
299 {
300     std::vector<MatchedNumberInfo> result;
301     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
302     for (auto match : matchList) {
303         if (!HandleBorderRule(match, filteredString)) {
304             continue;
305         }
306         PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
307         if (delMatch == nullptr) {
308             continue;
309         }
310         icu::UnicodeString content = delMatch->raw_string().c_str();
311         int contentStart = delMatch->start();
312         int contentEnd = delMatch->end();
313         std::pair<int, int> pos{contentStart, contentEnd};
314         if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
315             MatchedNumberInfo info;
316             info.SetBegin(contentStart);
317             info.SetEnd(contentEnd);
318             info.SetContent(content);
319             bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
320             if (flag) {
321                 continue;
322             }
323         }
324         std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
325         if (posList.size() != 0) {
326             for (auto& matchInfo : posList) {
327                 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
328             }
329             result.insert(result.end(), posList.begin(), posList.end());
330         }
331         delete match;
332     }
333     return result;
334 }
335 
336 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)337 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
338 {
339     std::set<MatchedNumberInfo> set;
340     std::vector<MatchedNumberInfo> ret;
341     for (auto info : list) {
342         if (set.find(info) == set.end()) {
343             ret.push_back(info);
344         }
345         set.insert(info);
346     }
347     return ret;
348 }
349 
350 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)351 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
352 {
353     icu::UnicodeString message = info.GetContent();
354     if (IsNumberWithOneBracket(message)) {
355         info.SetBegin(info.GetBegin() + 1);
356         icu::UnicodeString content = info.GetContent().tempSubString(1);
357         info.SetContent(content);
358     }
359 }
360 
361 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)362 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
363 {
364     if (message != "") {
365         int numLeft = 0;
366         int numRight = 0;
367         int len = message.length();
368         for (int i = 0; i < len; i++) {
369             if (message[i] == '(' || message[i] == '[') {
370                 numLeft++;
371             }
372             if (message[i] == ')' || message[i] == ']') {
373                 numRight++;
374             }
375         }
376         if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
377             return true;
378         }
379     }
380     return false;
381 }
382 
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)383 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
384 {
385     std::vector<int> result;
386     size_t length = matchedNumberInfoList.size();
387     if (length == 0) {
388         result.push_back(0);
389     } else {
390         size_t posNumber = 2;
391         size_t posStart = 1;
392         size_t posEnd = 2;
393         result.resize(posNumber * length + 1);
394         result[0] = static_cast<int>(length);
395         for (size_t i = 0; i < length; i++) {
396             result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
397             result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
398         }
399     }
400     return result;
401 }
402 
403 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)404 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
405 {
406     if (match == nullptr) {
407         return false;
408     }
409     std::vector<BorderRule*> rules = phoneNumberRule->GetBorderRules();
410     if (rules.size() == 0) {
411         return true;
412     }
413     for (BorderRule* rule : rules) {
414         if (!rule->Handle(match, message)) {
415             return false;
416         }
417     }
418     return true;
419 }
420 
421 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)422 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
423 {
424     PhoneNumberMatch* match = phoneNumberMatch;
425     std::vector<CodeRule*> rules = phoneNumberRule->GetCodesRules();
426     if (rules.size() == 0) {
427         return nullptr;
428     }
429     for (CodeRule* rule : rules) {
430         match = rule->Handle(match, message);
431     }
432     return match;
433 }
434 
435 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)436 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
437     icu::UnicodeString& message)
438 {
439     std::vector<MatchedNumberInfo> infoList;
440     std::vector<PositiveRule*> rules = phoneNumberRule->GetPositiveRules();
441     for (PositiveRule* rule : rules) {
442         infoList = rule->Handle(match, message);
443         if (infoList.size() != 0) {
444             break;
445         }
446     }
447     return infoList;
448 }
449 
DealStringWithOneBracket(icu::UnicodeString & message)450 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
451 {
452     if (IsNumberWithOneBracket(message)) {
453         return message.tempSubString(1);
454     }
455     return message;
456 }
457 } // namespace I18n
458 } // namespace Global
459 } // namespace OHOS