1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "pasteboard_pattern.h"
17 
18 #include <unordered_map>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/tree.h>
21 
22 namespace OHOS::MiscServices {
23 std::map<uint32_t, std::string> PatternDetection::patterns_{
24     { static_cast<uint32_t>(Pattern::URL), std::string("[a-zA-Z0-9+.-]+://[-a-zA-Z0-9+&@#/%?"
25                                                 "=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_]")},
26     { static_cast<uint32_t>(Pattern::Number), std::string("[-+]?[0-9]*\\.?[0-9]+")},
27     { static_cast<uint32_t>(Pattern::EmailAddress), std::string("(([a-zA-Z0-9_\\-\\.]+)@"
28                                                 "((?:\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\])|"
29                                                 "([a-zA-Z0-9\\-]+(?:\\.[a-zA-Z0-9\\-]+)*))"
30                                                 "([a-zA-Z]{2,}|[0-9]{1,3}))")},
31 };
32 
Detect(const std::set<Pattern> & patternsToCheck,const PasteData & pasteData,bool hasHTML,bool hasPlain)33 const std::set<Pattern> PatternDetection::Detect(const std::set<Pattern> &patternsToCheck,
34     const PasteData &pasteData, bool hasHTML, bool hasPlain)
35 {
36     std::set<Pattern> existedPatterns;
37     for (auto& record : pasteData.AllRecords()) {
38         if (patternsToCheck == existedPatterns) {
39             break;
40         }
41         if (hasPlain && record->GetPlainText() != nullptr) {
42             std::string recordText = *(record->GetPlainText());
43             DetectPlainText(existedPatterns, patternsToCheck, recordText);
44         }
45         if (hasHTML && record->GetHtmlText() != nullptr) {
46             std::string recordText = ExtractHtmlContent(*(record->GetHtmlText()));
47             DetectPlainText(existedPatterns, patternsToCheck, recordText);
48         }
49     }
50     return existedPatterns;
51 }
52 
IsValid(const std::set<Pattern> & patterns)53 bool PatternDetection::IsValid(const std::set<Pattern> &patterns)
54 {
55     for (Pattern pattern : patterns) {
56         if (pattern >= Pattern::PatternCount) {
57             return false;
58         }
59     }
60     return true;
61 }
62 
DetectPlainText(std::set<Pattern> & patternsOut,const std::set<Pattern> & patternsIn,const std::string & plainText)63 void PatternDetection::DetectPlainText(std::set<Pattern> &patternsOut,
64     const std::set<Pattern> &patternsIn, const std::string &plainText)
65 {
66     for (Pattern pattern : patternsIn) {
67         if (patternsOut.find(pattern) != patternsOut.end()) {
68             continue;
69         }
70         uint32_t patternUint32 = static_cast<uint32_t>(pattern);
71         auto it = patterns_.find(patternUint32);
72         if (it == patterns_.end()) {
73             PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "pasteboard pattern, unexpected Pattern value!");
74             continue;
75         }
76         std::regex curRegex(it->second);
77         if (std::regex_search(plainText, curRegex)) {
78             patternsOut.insert(pattern);
79         }
80     }
81 }
82 
ExtractHtmlContent(const std::string & html_str)83 std::string PatternDetection::ExtractHtmlContent(const std::string &html_str)
84 {
85     xmlDocPtr doc = htmlReadMemory(html_str.c_str(), html_str.size(), nullptr, nullptr, 0);
86     if (doc == nullptr) {
87         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! doc nullptr.");
88         return "";
89     }
90     xmlNode *rootNode = xmlDocGetRootElement(doc);
91     if (rootNode == nullptr) {
92         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! rootNode nullptr.");
93         xmlFreeDoc(doc);
94         return "";
95     }
96     xmlChar *xmlStr = xmlNodeGetContent(rootNode);
97     if (xmlStr == nullptr) {
98         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! xmlStr nullptr.");
99         xmlFreeDoc(doc);
100         return "";
101     }
102     std::string result(reinterpret_cast<const char*>(xmlStr));
103     xmlFree(xmlStr);
104     xmlFreeDoc(doc);
105     return result;
106 }
107 } // namespace OHOS::MiscServices