1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "pasteboard_pattern.h"
17
18 #include <unordered_map>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/tree.h>
21
22 namespace OHOS::MiscServices {
23 std::map<uint32_t, std::string> PatternDetection::patterns_{
24 { static_cast<uint32_t>(Pattern::URL), std::string("[a-zA-Z0-9+.-]+://[-a-zA-Z0-9+&@#/%?"
25 "=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_]")},
26 { static_cast<uint32_t>(Pattern::Number), std::string("[-+]?[0-9]*\\.?[0-9]+")},
27 { static_cast<uint32_t>(Pattern::EmailAddress), std::string("(([a-zA-Z0-9_\\-\\.]+)@"
28 "((?:\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\])|"
29 "([a-zA-Z0-9\\-]+(?:\\.[a-zA-Z0-9\\-]+)*))"
30 "([a-zA-Z]{2,}|[0-9]{1,3}))")},
31 };
32
Detect(const std::set<Pattern> & patternsToCheck,const PasteData & pasteData,bool hasHTML,bool hasPlain)33 const std::set<Pattern> PatternDetection::Detect(const std::set<Pattern> &patternsToCheck,
34 const PasteData &pasteData, bool hasHTML, bool hasPlain)
35 {
36 std::set<Pattern> existedPatterns;
37 for (auto& record : pasteData.AllRecords()) {
38 if (patternsToCheck == existedPatterns) {
39 break;
40 }
41 if (hasPlain && record->GetPlainText() != nullptr) {
42 std::string recordText = *(record->GetPlainText());
43 DetectPlainText(existedPatterns, patternsToCheck, recordText);
44 }
45 if (hasHTML && record->GetHtmlText() != nullptr) {
46 std::string recordText = ExtractHtmlContent(*(record->GetHtmlText()));
47 DetectPlainText(existedPatterns, patternsToCheck, recordText);
48 }
49 }
50 return existedPatterns;
51 }
52
IsValid(const std::set<Pattern> & patterns)53 bool PatternDetection::IsValid(const std::set<Pattern> &patterns)
54 {
55 for (Pattern pattern : patterns) {
56 if (pattern >= Pattern::PatternCount) {
57 return false;
58 }
59 }
60 return true;
61 }
62
DetectPlainText(std::set<Pattern> & patternsOut,const std::set<Pattern> & patternsIn,const std::string & plainText)63 void PatternDetection::DetectPlainText(std::set<Pattern> &patternsOut,
64 const std::set<Pattern> &patternsIn, const std::string &plainText)
65 {
66 for (Pattern pattern : patternsIn) {
67 if (patternsOut.find(pattern) != patternsOut.end()) {
68 continue;
69 }
70 uint32_t patternUint32 = static_cast<uint32_t>(pattern);
71 auto it = patterns_.find(patternUint32);
72 if (it == patterns_.end()) {
73 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "pasteboard pattern, unexpected Pattern value!");
74 continue;
75 }
76 std::regex curRegex(it->second);
77 if (std::regex_search(plainText, curRegex)) {
78 patternsOut.insert(pattern);
79 }
80 }
81 }
82
ExtractHtmlContent(const std::string & html_str)83 std::string PatternDetection::ExtractHtmlContent(const std::string &html_str)
84 {
85 xmlDocPtr doc = htmlReadMemory(html_str.c_str(), html_str.size(), nullptr, nullptr, 0);
86 if (doc == nullptr) {
87 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! doc nullptr.");
88 return "";
89 }
90 xmlNode *rootNode = xmlDocGetRootElement(doc);
91 if (rootNode == nullptr) {
92 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! rootNode nullptr.");
93 xmlFreeDoc(doc);
94 return "";
95 }
96 xmlChar *xmlStr = xmlNodeGetContent(rootNode);
97 if (xmlStr == nullptr) {
98 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! xmlStr nullptr.");
99 xmlFreeDoc(doc);
100 return "";
101 }
102 std::string result(reinterpret_cast<const char*>(xmlStr));
103 xmlFree(xmlStr);
104 xmlFreeDoc(doc);
105 return result;
106 }
107 } // namespace OHOS::MiscServices