1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "js_uri.h"
17 #include "tools/log.h"
18 namespace OHOS::Uri {
19     std::bitset<MAX_BIT_SIZE> g_ruleAlpha;
20     std::bitset<MAX_BIT_SIZE> g_ruleScheme;
21     std::bitset<MAX_BIT_SIZE> g_ruleUrlc;
22     std::bitset<MAX_BIT_SIZE> g_rulePath;
23     std::bitset<MAX_BIT_SIZE> g_ruleUserInfo;
24     std::bitset<MAX_BIT_SIZE> g_ruleDigit;
25     std::bitset<MAX_BIT_SIZE> g_rulePort;
PreliminaryWork() const26     void Uri::PreliminaryWork() const
27     {
28         std::string digitAggregate = "0123456789";
29         for (size_t i = 0; i < digitAggregate.size(); ++i) {
30             g_ruleDigit.set(digitAggregate[i]);
31         }
32 
33         std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
34         for (size_t i = 0; i < alphasAggregate.size(); ++i) {
35             g_ruleAlpha.set(alphasAggregate[i]);
36         }
37 
38         std::string schemeAggregate = digitAggregate + alphasAggregate + "+-.| _-~!$&=,;'(){}*";
39         for (size_t i = 0; i < schemeAggregate.size(); ++i) {
40             g_ruleScheme.set(schemeAggregate[i]);
41         }
42 
43         std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%\"";
44         for (size_t i = 0; i < uricAggregate.size(); ++i) {
45             g_ruleUrlc.set(uricAggregate[i]);
46         }
47 
48         std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%";
49         for (size_t i = 0; i < pathAggregate.size(); ++i) {
50             g_rulePath.set(pathAggregate[i]);
51         }
52 
53         std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%";
54         for (size_t i = 0; i < userInfoAggregate.size(); ++i) {
55             g_ruleUserInfo.set(userInfoAggregate[i]);
56         }
57 
58         std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()";
59         for (size_t i = 0; i < portAggregate.size(); ++i) {
60             g_rulePort.set(portAggregate[i]);
61         }
62     }
63 
Uri(const std::string input)64     Uri::Uri(const std::string input)
65     {
66         PreliminaryWork();
67         errStr_ = "";
68         if (input.empty()) {
69             errStr_ = "uri is empty";
70             return;
71         }
72         inputUri_ = input;
73         AnalysisUri();
74     }
75 
AssignSchemeSpecificPart()76     void Uri::AssignSchemeSpecificPart()
77     {
78         uriData_.SchemeSpecificPart.reserve(data_.length() + uriData_.query.length() + 1);
79         uriData_.SchemeSpecificPart.append(data_);
80         if (!uriData_.query.empty()) {
81             uriData_.SchemeSpecificPart.append("?");
82             uriData_.SchemeSpecificPart.append(uriData_.query);
83         }
84     }
85 
AnalysisUri()86     void Uri::AnalysisUri()
87     {
88         data_ = inputUri_;
89         size_t pos = data_.find('#'); // Fragment
90         if (pos != std::string::npos) {
91             AnalysisFragment(pos);
92             if (!errStr_.empty()) {
93                 return;
94             }
95         }
96         pos = data_.find('?'); // Query
97         if (pos != std::string::npos) {
98             AnalysisQuery(pos);
99             if (!errStr_.empty()) {
100                 return;
101             }
102         }
103         pos = data_.find(':'); // Scheme
104         if (pos != std::string::npos) {
105             AnalysisScheme(pos);
106             if (!errStr_.empty()) {
107                 return;
108             }
109         } else {
110             SpecialPath();
111             if (!errStr_.empty()) {
112                 return;
113             }
114             AssignSchemeSpecificPart();
115             return;
116         }
117         pos = data_.find("//"); // userInfo path host port ipv4 or ipv6
118         if (pos != std::string::npos && pos == 0) {
119             AssignSchemeSpecificPart();
120             data_ = data_.substr(2); // 2:Intercept the string from the second subscript
121             AnalysisHostAndPath();
122             if (!errStr_.empty()) {
123                 return;
124             }
125         } else if (data_[0] == '/') {
126             uriData_.path = data_;
127             AssignSchemeSpecificPart();
128             data_ = "";
129         } else {
130             AssignSchemeSpecificPart();
131             uriData_.query = "";
132             data_ = "";
133         }
134     }
135 
CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const136     bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const
137     {
138         size_t dataLen = data.size();
139         for (size_t i = 0; i < dataLen; ++i) {
140             if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number
141                 bool isLegal = rule.test(data[i]);
142                 if (!isLegal) {
143                     return false;
144                 }
145             } else if (!flag) {
146                 return false;
147             }
148         }
149         return true;
150     }
151 
SpecialPath()152     void Uri::SpecialPath()
153     {
154         if (!CheckCharacter(data_, g_rulePath, true)) {
155             errStr_ = "SpecialPath does not conform to the rule";
156             return;
157         }
158         uriData_.path = data_;
159         data_ = "";
160     }
161 
AnalysisFragment(size_t pos)162     void Uri::AnalysisFragment(size_t pos)
163     {
164         if (pos == 0) {
165             errStr_ = "#It can't be the first";
166             return;
167         }
168         std::string fragment = data_.substr(pos + 1);
169         if (!CheckCharacter(fragment, g_ruleUrlc, true)) {
170             errStr_ = "Fragment does not conform to the rule";
171             return;
172         }
173         uriData_.fragment = fragment;
174         data_ = data_.substr(0, pos);
175     }
176 
AnalysisQuery(size_t pos)177     void Uri::AnalysisQuery(size_t pos)
178     {
179         std::string query = data_.substr(pos + 1);
180         if (!CheckCharacter(query, g_ruleUrlc, true)) {
181             errStr_ = "Query does not conform to the rule";
182             return;
183         }
184         uriData_.query = query;
185         data_ = data_.substr(0, pos);
186     }
187 
AnalysisScheme(size_t pos)188     void Uri::AnalysisScheme(size_t pos)
189     {
190         size_t slashPos = data_.find('/');
191         if (slashPos != std::string::npos && slashPos < pos) {
192             SpecialPath();
193             uriData_.SchemeSpecificPart.reserve(uriData_.path.length() + uriData_.query.length() + 1);
194             uriData_.SchemeSpecificPart.append(uriData_.path);
195             uriData_.SchemeSpecificPart.append("?");
196             uriData_.SchemeSpecificPart.append(uriData_.query);
197             data_ = "";
198         } else {
199             if ((static_cast<int>(data_[0]) >= 0 && static_cast<int>(data_[0]) < MAX_BIT_SIZE) &&
200                 !g_ruleAlpha.test(data_[0])) {
201                 errStr_ = "Scheme the first character must be a letter";
202                 return;
203             }
204             std::string scheme = data_.substr(0, pos);
205             if (!CheckCharacter(scheme, g_ruleScheme, false)) {
206                 errStr_ = "scheme does not conform to the rule";
207                 return;
208             }
209             uriData_.scheme = scheme;
210             data_ = data_.substr(pos + 1);
211         }
212     }
213 
AnalysisHost(bool isLawfulProt)214     void Uri::AnalysisHost(bool isLawfulProt)
215     {
216         // find ipv4 or ipv6 or host
217         if (data_[0] == '[') {
218             if (data_[data_.size() - 1] == ']') {
219                 // IPV6
220                 if (!isLawfulProt) {
221                     errStr_ = "Prot does not conform to the rule";
222                     return;
223                 }
224                 AnalysisIPV6();
225             } else {
226                 errStr_ = "IPv6 is missing a closing bracket";
227                 return;
228             }
229         } else {
230             if (data_.find('[') != std::string::npos || data_.find(']') != std::string::npos) {
231                 errStr_ = "host does not conform to the rule";
232                 return;
233             }
234             // ipv4
235             if (!isLawfulProt || !AnalysisIPV4()) {
236                 uriData_.port = -1;
237                 uriData_.host = "";
238                 uriData_.userInfo = "";
239             }
240         }
241     }
242 
AnalysisHostAndPath()243     void Uri::AnalysisHostAndPath()
244     {
245         if (data_.empty()) {
246             return;
247         }
248         // find path
249         size_t pos = data_.find('/');
250         if (pos != std::string::npos) {
251             AnalysisPath(pos);
252             if (!errStr_.empty()) {
253                 return;
254             }
255         }
256         uriData_.authority = data_;
257         // find UserInfo
258         pos = data_.find('@');
259         if (pos != std::string::npos) {
260             AnalysisUserInfo(pos);
261             if (!errStr_.empty()) {
262                 return;
263             }
264         }
265         bool isLawfulProt = true;
266         // find port
267         pos = data_.rfind(':');
268         if (pos != std::string::npos) {
269             size_t pos1 = data_.rfind(']');
270             if (pos1 == std::string::npos || pos > pos1) {
271                 isLawfulProt = AnalysisPort(pos);
272             }
273             if (!errStr_.empty()) {
274             return;
275             }
276         }
277         AnalysisHost(isLawfulProt);
278     }
279 
AnalysisPath(size_t pos)280     void Uri::AnalysisPath(size_t pos)
281     {
282         std::string path = data_.substr(pos);
283         if (!CheckCharacter(path, g_rulePath, true)) {
284             errStr_ = "path does not conform to the rule";
285             return;
286         }
287         uriData_.path = path;
288         data_ = data_.substr(0, pos);
289     }
290 
AnalysisUserInfo(size_t pos)291     void Uri::AnalysisUserInfo(size_t pos)
292     {
293         std::string userInfo = data_.substr(0, pos);
294         if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) {
295             errStr_ = "userInfo does not conform to the rule";
296             return;
297         }
298         uriData_.userInfo = userInfo;
299         data_ = data_.substr(pos + 1);
300     }
301 
AnalysisPort(size_t pos)302     bool Uri::AnalysisPort(size_t pos)
303     {
304         std::string port = data_.substr(pos + 1);
305         if (!CheckCharacter(port, g_rulePort, true)) {
306             errStr_ = "port does not conform to the rule";
307             return false;
308         } else if (CheckCharacter(port, g_ruleDigit, false)) {
309             if (port.size() == 0 || port.size() > 10) { // 10:The maximum number of bits for int value
310                 return false;
311             }
312             double tempPort = std::strtod(port.c_str(), nullptr);
313             if (tempPort < 0 || tempPort > INT32_MAX) {
314                 return false;
315             }
316             uriData_.port = static_cast<int>(tempPort);
317             data_ = data_.substr(0, pos);
318             return true;
319         } else {
320             data_ = data_.substr(0, pos);
321             return false;
322         }
323         return false;
324     }
325 
AnalysisIPV4()326     bool Uri::AnalysisIPV4()
327     {
328         std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)");
329         std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?\\.)+"
330                            "([a-zA-Z]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*"
331                            "[a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s])?))|"
332                            "([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?)");
333         bool isIpv4 = std::regex_match(data_, ipv4);
334         bool isHosName = std::regex_match(data_, hostname);
335         if (!isIpv4 && !isHosName) {
336             return false;
337         } else {
338             uriData_.host = data_;
339             data_ = "";
340             return true;
341         }
342     }
343 
AnalysisIPV6()344     void Uri::AnalysisIPV6()
345     {
346         std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript
347         std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|"
348                         "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}"
349                         "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})"
350                         "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}"
351                         "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|"
352                         "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)"
353                         "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}"
354                         "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|"
355                         "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|"
356                         "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?");
357         if (!std::regex_match(str, ipv6)) {
358             errStr_ = "ipv6 does not conform to the rule";
359             return;
360         }
361         uriData_.host = data_;
362         data_ = "";
363     }
364 
Equals(const Uri other) const365     bool Uri::Equals(const Uri other) const
366     {
367         if (uriData_.port != other.uriData_.port) {
368             return false;
369         }
370         if (uriData_.scheme != other.uriData_.scheme) {
371             return false;
372         }
373         if (uriData_.userInfo != other.uriData_.userInfo) {
374             return false;
375         }
376         if (uriData_.host != other.uriData_.host) {
377             return false;
378         }
379         if (uriData_.query != other.uriData_.query) {
380             return false;
381         }
382         if (uriData_.fragment != other.uriData_.fragment) {
383             return false;
384         }
385         if (uriData_.path != other.uriData_.path) {
386             return false;
387         }
388         if (uriData_.authority != other.uriData_.authority) {
389             return false;
390         }
391         if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) {
392             return false;
393         }
394         return true;
395     }
396 
ToString() const397     std::string Uri::ToString() const
398     {
399         return inputUri_;
400     }
401 
IsAbsolute() const402     bool Uri::IsAbsolute() const
403     {
404         return !uriData_.scheme.empty();
405     }
406 
IsRelative() const407     bool Uri::IsRelative() const
408     {
409         return uriData_.scheme.empty();
410     }
411 
IsOpaque() const412     bool Uri::IsOpaque() const
413     {
414         return !IsHierarchical();
415     }
416 
IsHierarchical() const417     bool Uri::IsHierarchical() const
418     {
419         size_t index = inputUri_.find(':');
420         if (index == std::string::npos) {
421             return true;
422         }
423         if (inputUri_.length() == index + 1) {
424             return false;
425         }
426         return inputUri_[index + 1] == '/';
427     }
428 
AddQueryValue(const std::string key,const std::string value) const429     std::string Uri::AddQueryValue(const std::string key, const std::string value) const
430     {
431         return BuildUriString("query", key + "=" + value);
432     }
433 
AddSegment(const std::string pathSegment) const434     std::string Uri::AddSegment(const std::string pathSegment) const
435     {
436         return BuildUriString("segment", pathSegment);
437     }
438 
BuildUriString(const std::string str,const std::string param) const439     std::string Uri::BuildUriString(const std::string str, const std::string param) const
440     {
441         std::string result = "";
442         if (!uriData_.scheme.empty()) {
443             result += uriData_.scheme + ":";
444         }
445         if (!uriData_.authority.empty()) {
446             result += "//" + uriData_.authority;
447         }
448         if (!uriData_.path.empty()) {
449             result += uriData_.path ;
450         }
451         if (str == "segment") {
452             if (result.back() == '/') {
453                 result += param;
454             } else {
455                 result += "/" + param;
456             }
457         }
458         if (str != "clearquery") {
459             if (uriData_.query.empty()) {
460                 if (str == "query") {
461                     result +=  "?" + param;
462                 }
463             } else {
464                 result +=  "?" + uriData_.query;
465                 if (str == "query") {
466                     result +=  "&" + param;
467                 }
468             }
469         }
470         if (!uriData_.fragment.empty()) {
471             result +=  "#" + uriData_.fragment;
472         }
473         return result;
474     }
475 
GetSegment() const476     std::vector<std::string> Uri::GetSegment() const
477     {
478         std::vector<std::string> segments;
479         if (uriData_.path.empty()) {
480             return segments;
481         }
482         size_t previous = 0;
483         size_t current = 0;
484         for (current = uriData_.path.find('/', previous); current != std::string::npos;
485             current = uriData_.path.find('/', previous)) {
486             if (previous < current) {
487                 std::string segment = uriData_.path.substr(previous, current - previous);
488                 segments.push_back(segment);
489             }
490             previous = current + 1;
491         }
492         if (previous < uriData_.path.length()) {
493             segments.push_back(uriData_.path.substr(previous));
494         }
495         return segments;
496     }
497 
IsFailed() const498     std::string Uri::IsFailed() const
499     {
500         return errStr_;
501     }
502 
Normalize() const503     std::string Uri::Normalize() const
504     {
505         std::vector<std::string> temp;
506         size_t pathLen = uriData_.path.size();
507         if (pathLen == 0) {
508             return this->inputUri_;
509         }
510         size_t pos = 0;
511         size_t left = 0;
512         while ((pos = uriData_.path.find('/', left)) != std::string::npos) {
513             temp.push_back(uriData_.path.substr(left, pos - left));
514             left = pos + 1;
515         }
516         if (left != pathLen) {
517             temp.push_back(uriData_.path.substr(left));
518         }
519         size_t tempLen = temp.size();
520         std::vector<std::string> normalizeTemp;
521         for (size_t i = 0; i < tempLen; ++i) {
522             if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) {
523                 normalizeTemp.push_back(temp[i]);
524             }
525             if (temp[i] == "..") {
526                 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") {
527                     normalizeTemp.pop_back();
528                 } else {
529                     normalizeTemp.push_back(temp[i]);
530                 }
531             }
532         }
533         std::string normalizePath = "";
534         tempLen = normalizeTemp.size();
535         if (tempLen == 0) {
536             normalizePath = "/";
537         } else {
538             for (size_t i = 0; i < tempLen; ++i) {
539                 normalizePath += "/" + normalizeTemp[i];
540             }
541         }
542         return Split(normalizePath);
543     }
544 
545 
Split(const std::string & path) const546     std::string Uri::Split(const std::string &path) const
547     {
548         std::string normalizeUri = "";
549         if (!uriData_.scheme.empty()) {
550             normalizeUri += uriData_.scheme + ":";
551         }
552         if (uriData_.path.empty()) {
553             normalizeUri += uriData_.SchemeSpecificPart;
554         } else {
555             if (!uriData_.host.empty()) {
556                 normalizeUri += "//";
557                 if (!uriData_.userInfo.empty()) {
558                     normalizeUri += uriData_.userInfo + "@";
559                 }
560                 normalizeUri += uriData_.host;
561                 if (uriData_.port != -1) {
562                     normalizeUri += ":" + std::to_string(uriData_.port);
563                 }
564             } else if (!uriData_.authority.empty()) {
565                 normalizeUri += "//" + uriData_.authority;
566             }
567             normalizeUri += path;
568         }
569         if (!uriData_.query.empty()) {
570             normalizeUri += "?" + uriData_.query;
571         }
572         if (!uriData_.fragment.empty()) {
573             normalizeUri += "#" + uriData_.fragment;
574         }
575         return normalizeUri;
576     }
577 
GetScheme() const578     std::string Uri::GetScheme() const
579     {
580         return uriData_.scheme;
581     }
582 
GetAuthority() const583     std::string Uri::GetAuthority() const
584     {
585         return uriData_.authority;
586     }
587 
GetSsp() const588     std::string Uri::GetSsp() const
589     {
590         return uriData_.SchemeSpecificPart;
591     }
592 
GetUserinfo() const593     std::string Uri::GetUserinfo() const
594     {
595         return uriData_.userInfo;
596     }
597 
GetHost() const598     std::string Uri::GetHost() const
599     {
600         return uriData_.host;
601     }
602 
GetPort() const603     std::string Uri::GetPort() const
604     {
605         return std::to_string(uriData_.port);
606     }
607 
GetPath() const608     std::string Uri::GetPath() const
609     {
610         return uriData_.path;
611     }
612 
GetQuery() const613     std::string Uri::GetQuery() const
614     {
615         return uriData_.query;
616     }
617 
GetFragment() const618     std::string Uri::GetFragment() const
619     {
620         return uriData_.fragment;
621     }
622 
ClearQuery() const623     std::string Uri::ClearQuery() const
624     {
625         return BuildUriString("clearquery", "");
626     }
627 } // namespace OHOS::Uri
628