1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <regex>
17 #include <vector>
18 #include "hilog/log.h"
19 #include "string_ex.h"
20 #include "uri.h"
21 
22 using std::string;
23 using std::regex;
24 using OHOS::HiviewDFX::HiLog;
25 
26 namespace OHOS {
27 namespace {
28     const string NOT_CACHED = "NOT VALID";
29     const string EMPTY = "";
30     const size_t NOT_FOUND = string::npos;
31     const int NOT_CALCULATED = -2;
32     const int PORT_NONE = -1;
33     const char SCHEME_SEPARATOR = ':';
34     const char SCHEME_FRAGMENT = '#';
35     const char LEFT_SEPARATOR = '/';
36     const char RIGHT_SEPARATOR = '\\';
37     const char QUERY_FLAG = '?';
38     const char USER_HOST_SEPARATOR = '@';
39     const char PORT_SEPARATOR = ':';
40     const size_t POS_INC = 1;
41     const size_t POS_INC_MORE = 2;
42     const size_t POS_INC_AGAIN = 3;
43 }; // namespace
44 
Uri(const string & uriString)45 Uri::Uri(const string& uriString)
46 {
47     cachedSsi_ = NOT_FOUND;
48     cachedFsi_ = NOT_FOUND;
49     port_ = NOT_CALCULATED;
50 
51     if (uriString.empty()) {
52         return;
53     }
54 
55     uriString_ = uriString;
56     scheme_ = NOT_CACHED;
57     ssp_ = NOT_CACHED;
58     authority_ = NOT_CACHED;
59     host_ = NOT_CACHED;
60     userInfo_ = NOT_CACHED;
61     query_ = NOT_CACHED;
62     path_ = NOT_CACHED;
63     fragment_ = NOT_CACHED;
64 
65     if (!CheckScheme()) {
66         uriString_ = EMPTY;
67         HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "Scheme wrong");
68     }
69 }
70 
~Uri()71 Uri::~Uri()
72 {}
73 
CheckScheme()74 bool Uri::CheckScheme()
75 {
76     scheme_ = ParseScheme();
77     if (scheme_.empty()) {
78         return true;
79     }
80     try {
81         regex schemeRegex("[a-zA-Z][a-zA-Z|\\d|\\+|\\-|.]*$");
82         if (!regex_match(scheme_, schemeRegex)) {
83             return false;
84         }
85     } catch (std::regex_error &message) {
86         HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "regex fail,message:%{public}s", message.what());
87         return false;
88     }
89     return true;
90 }
91 
GetScheme()92 string Uri::GetScheme()
93 {
94     if (uriString_.empty()) {
95         return EMPTY;
96     }
97 
98     if (scheme_ == NOT_CACHED) {
99         scheme_ = ParseScheme();
100     }
101     return scheme_;
102 }
103 
ParseScheme()104 string Uri::ParseScheme()
105 {
106     size_t ssi = FindSchemeSeparator();
107     return (ssi == NOT_FOUND) ? EMPTY : uriString_.substr(0, ssi);
108 }
109 
GetSchemeSpecificPart()110 string Uri::GetSchemeSpecificPart()
111 {
112     if (uriString_.empty()) {
113         return EMPTY;
114     }
115 
116     return (ssp_ == NOT_CACHED) ? (ssp_ = ParseSsp()) : ssp_;
117 }
118 
ParseSsp()119 string Uri::ParseSsp()
120 {
121     size_t ssi = FindSchemeSeparator();
122     size_t fsi = FindFragmentSeparator();
123 
124     size_t start = (ssi == NOT_FOUND) ? 0 : (ssi + 1);
125     size_t end = (fsi == NOT_FOUND) ? uriString_.size() : fsi;
126 
127     // Return everything between ssi and fsi.
128     string ssp = EMPTY;
129     if (end > start) {
130         ssp = uriString_.substr(start, end - start);
131     }
132 
133     return ssp;
134 }
135 
GetAuthority()136 string Uri::GetAuthority()
137 {
138     if (uriString_.empty()) {
139         return EMPTY;
140     }
141 
142     if (authority_ == NOT_CACHED) {
143         authority_ = ParseAuthority();
144     }
145     return authority_;
146 }
147 
ParseAuthority()148 string Uri::ParseAuthority()
149 {
150     size_t ssi = FindSchemeSeparator();
151     if (ssi == NOT_FOUND) {
152         return EMPTY;
153     }
154 
155     size_t length = uriString_.length();
156     // If "//" follows the scheme separator, we have an authority.
157     if ((length > (ssi + POS_INC_MORE)) && (uriString_.at(ssi + POS_INC) == LEFT_SEPARATOR) &&
158         (uriString_.at(ssi + POS_INC_MORE) == LEFT_SEPARATOR)) {
159         // Look for the start of the path, query, or fragment, or the end of the string.
160         size_t start = ssi + POS_INC_AGAIN;
161         size_t end = start;
162 
163         while (end < length) {
164             char ch = uriString_.at(end);
165             if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR) || (ch == QUERY_FLAG) ||
166                 (ch == SCHEME_FRAGMENT)) {
167                 break;
168             }
169 
170             end++;
171         }
172 
173         return uriString_.substr(start, end - start);
174     } else {
175         return EMPTY;
176     }
177 }
178 
GetUserInfo()179 string Uri::GetUserInfo()
180 {
181     if (uriString_.empty()) {
182         return EMPTY;
183     }
184 
185     if (userInfo_ == NOT_CACHED) {
186         userInfo_ = ParseUserInfo();
187     }
188     return userInfo_;
189 }
190 
ParseUserInfo()191 string Uri::ParseUserInfo()
192 {
193     string authority = GetAuthority();
194     if (authority.empty()) {
195         return EMPTY;
196     }
197 
198     size_t end = authority.find_last_of(USER_HOST_SEPARATOR);
199     return (end == NOT_FOUND) ? EMPTY : authority.substr(0, end);
200 }
201 
GetHost()202 string Uri::GetHost()
203 {
204     if (uriString_.empty()) {
205         return EMPTY;
206     }
207 
208     if (host_ == NOT_CACHED) {
209         host_ = ParseHost();
210     }
211     return host_;
212 }
213 
ParseHost()214 string Uri::ParseHost()
215 {
216     string authority = GetAuthority();
217     if (authority.empty()) {
218         return EMPTY;
219     }
220 
221     // Parse out user info and then port.
222     size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR);
223     size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1);
224     size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start);
225     size_t end = (portSeparator == NOT_FOUND) ? authority.size() : portSeparator;
226 
227     string host = EMPTY;
228     if (start < end) {
229         host = authority.substr(start, end - start);
230     }
231 
232     return host;
233 }
234 
GetPort()235 int Uri::GetPort()
236 {
237     if (uriString_.empty()) {
238         return PORT_NONE;
239     }
240 
241     if (port_ == NOT_CALCULATED) {
242         port_ = ParsePort();
243     }
244     return port_;
245 }
246 
ParsePort()247 int Uri::ParsePort()
248 {
249     string authority = GetAuthority();
250     if (authority.empty()) {
251         return PORT_NONE;
252     }
253 
254     // Make sure we look for the port separtor *after* the user info separator.
255     size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR);
256     size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1);
257     size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start);
258     if (portSeparator == NOT_FOUND) {
259         return PORT_NONE;
260     }
261 
262     start = portSeparator + 1;
263     string portString = authority.substr(start);
264 
265     int value = PORT_NONE;
266     return StrToInt(portString, value) ? value : PORT_NONE;
267 }
268 
GetQuery()269 string Uri::GetQuery()
270 {
271     if (uriString_.empty()) {
272         return EMPTY;
273     }
274 
275     if (query_ == NOT_CACHED) {
276         query_ = ParseQuery();
277     }
278     return query_;
279 }
280 
ParseQuery()281 string Uri::ParseQuery()
282 {
283     size_t ssi = FindSchemeSeparator();
284     if (ssi == NOT_FOUND) {
285         ssi = 0;
286     }
287     size_t qsi = uriString_.find_first_of(QUERY_FLAG, ssi);
288     if (qsi == NOT_FOUND) {
289         return EMPTY;
290     }
291 
292     size_t start = qsi + 1;
293     size_t fsi = FindFragmentSeparator();
294     if (fsi == NOT_FOUND) {
295         return uriString_.substr(start);
296     }
297 
298     if (fsi < qsi) {
299         // Invalid.
300         return EMPTY;
301     }
302 
303     return uriString_.substr(start, fsi - start);
304 }
305 
GetPath()306 string Uri::GetPath()
307 {
308     if (uriString_.empty()) {
309         return EMPTY;
310     }
311 
312     if (path_ == NOT_CACHED) {
313         path_ = ParsePath();
314     }
315     return path_;
316 }
317 
GetPathSegments(std::vector<std::string> & segments)318 void Uri::GetPathSegments(std::vector<std::string>& segments)
319 {
320     if (uriString_.empty()) {
321         return;
322     }
323     if (path_ == NOT_CACHED) {
324         path_ = ParsePath();
325     }
326 
327     size_t previous = 0;
328     size_t current;
329     while ((current = path_.find(LEFT_SEPARATOR, previous)) != std::string::npos) {
330         if (previous < current) {
331             segments.emplace_back(path_.substr(previous, current - previous));
332         }
333         previous = current + POS_INC;
334     }
335     // Add in the final path segment.
336     if (previous < path_.length()) {
337         segments.emplace_back(path_.substr(previous));
338     }
339 }
340 
ParsePath()341 string Uri::ParsePath()
342 {
343     size_t ssi = FindSchemeSeparator();
344     // If the URI is absolute.
345     if (ssi != NOT_FOUND) {
346         // Is there anything after the ':'?
347         if ((ssi + 1) == uriString_.length()) {
348             // Opaque URI.
349             return EMPTY;
350         }
351 
352         // A '/' after the ':' means this is hierarchical.
353         if (uriString_.at(ssi + 1) != LEFT_SEPARATOR) {
354             // Opaque URI.
355             return EMPTY;
356         }
357     } else {
358         // All relative URIs are hierarchical.
359     }
360 
361     return ParsePath(ssi);
362 }
363 
ParsePath(size_t ssi)364 string Uri::ParsePath(size_t ssi)
365 {
366     size_t length = uriString_.length();
367 
368     // Find start of path.
369     size_t pathStart = (ssi == NOT_FOUND) ? 0 : (ssi + POS_INC);
370     if ((length > (pathStart + POS_INC)) && (uriString_.at(pathStart) == LEFT_SEPARATOR) &&
371         (uriString_.at(pathStart + POS_INC) == LEFT_SEPARATOR)) {
372         // Skip over authority to path.
373         pathStart += POS_INC_MORE;
374 
375         while (pathStart < length) {
376             char ch = uriString_.at(pathStart);
377             if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) {
378                 return EMPTY;
379             }
380 
381             if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR)) {
382                 break;
383             }
384 
385             pathStart++;
386         }
387     }
388 
389     // Find end of path.
390     size_t pathEnd = pathStart;
391     while (pathEnd < length) {
392         char ch = uriString_.at(pathEnd);
393         if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) {
394             break;
395         }
396 
397         pathEnd++;
398     }
399 
400     return uriString_.substr(pathStart, pathEnd - pathStart);
401 }
402 
GetFragment()403 string Uri::GetFragment()
404 {
405     if (uriString_.empty()) {
406         return EMPTY;
407     }
408 
409     if (fragment_ == NOT_CACHED) {
410         fragment_ = ParseFragment();
411     }
412     return fragment_;
413 }
414 
ParseFragment()415 string Uri::ParseFragment()
416 {
417     size_t fsi = FindFragmentSeparator();
418     return (fsi == NOT_FOUND) ? EMPTY : uriString_.substr(fsi + 1);
419 }
420 
FindSchemeSeparator()421 size_t Uri::FindSchemeSeparator()
422 {
423     if (cachedSsi_ == NOT_FOUND) {
424         cachedSsi_ = uriString_.find_first_of(SCHEME_SEPARATOR);
425     }
426     return cachedSsi_;
427 }
428 
FindFragmentSeparator()429 size_t Uri::FindFragmentSeparator()
430 {
431     if (cachedFsi_ == NOT_FOUND) {
432         cachedFsi_ = uriString_.find_first_of(SCHEME_FRAGMENT, FindSchemeSeparator());
433     }
434     return cachedFsi_;
435 }
436 
IsHierarchical()437 bool Uri::IsHierarchical()
438 {
439     if (uriString_.empty()) {
440         return false;
441     }
442 
443     size_t ssi = FindSchemeSeparator();
444     if (ssi == NOT_FOUND) {
445         // All relative URIs are hierarchical.
446         return true;
447     }
448 
449     if (uriString_.length() == (ssi + 1)) {
450         // No ssp.
451         return false;
452     }
453 
454     // If the ssp starts with a '/', this is hierarchical.
455     return (uriString_.at(ssi + 1) == LEFT_SEPARATOR);
456 }
457 
IsOpaque()458 bool Uri::IsOpaque()
459 {
460     if (uriString_.empty()) {
461         return false;
462     }
463 
464     return !IsHierarchical();
465 }
466 
IsAbsolute()467 bool Uri::IsAbsolute()
468 {
469     if (uriString_.empty()) {
470         return false;
471     }
472 
473     return !IsRelative();
474 }
475 
IsRelative()476 bool Uri::IsRelative()
477 {
478     if (uriString_.empty()) {
479         return false;
480     }
481 
482     // Note: We return true if the index is 0
483     return FindSchemeSeparator() == NOT_FOUND;
484 }
485 
Equals(const Uri & other) const486 bool Uri::Equals(const Uri& other) const
487 {
488     return ToString() == other.ToString();
489 }
490 
CompareTo(const Uri & other) const491 int Uri::CompareTo(const Uri& other) const
492 {
493     return ToString().compare(other.ToString());
494 }
495 
ToString() const496 string Uri::ToString() const
497 {
498     return uriString_;
499 }
500 
operator ==(const Uri & other) const501 bool Uri::operator==(const Uri& other) const
502 {
503     return ToString() == other.ToString();
504 }
505 
Marshalling(Parcel & parcel) const506 bool Uri::Marshalling(Parcel& parcel) const
507 {
508     if (IsAsciiString(uriString_)) {
509         return parcel.WriteString16(Str8ToStr16(uriString_));
510     }
511 
512     HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "uriString_ is not ascii string");
513     return false;
514 }
515 
Unmarshalling(Parcel & parcel)516 Uri* Uri::Unmarshalling(Parcel& parcel)
517 {
518     return new Uri(Str16ToStr8(parcel.ReadString16()));
519 }
520 } // namespace OHOS
521