1  /*
2   * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3   * Licensed under the Apache License, Version 2.0 (the "License");
4   * you may not use this file except in compliance with the License.
5   * You may obtain a copy of the License at
6   *
7   *     http://www.apache.org/licenses/LICENSE-2.0
8   *
9   * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  
16  #include <regex>
17  #include <vector>
18  #include "hilog/log.h"
19  #include "string_ex.h"
20  #include "uri.h"
21  
22  using std::string;
23  using std::regex;
24  using OHOS::HiviewDFX::HiLog;
25  
26  namespace OHOS {
27  namespace {
28      const string NOT_CACHED = "NOT VALID";
29      const string EMPTY = "";
30      const size_t NOT_FOUND = string::npos;
31      const int NOT_CALCULATED = -2;
32      const int PORT_NONE = -1;
33      const char SCHEME_SEPARATOR = ':';
34      const char SCHEME_FRAGMENT = '#';
35      const char LEFT_SEPARATOR = '/';
36      const char RIGHT_SEPARATOR = '\\';
37      const char QUERY_FLAG = '?';
38      const char USER_HOST_SEPARATOR = '@';
39      const char PORT_SEPARATOR = ':';
40      const size_t POS_INC = 1;
41      const size_t POS_INC_MORE = 2;
42      const size_t POS_INC_AGAIN = 3;
43  }; // namespace
44  
Uri(const string & uriString)45  Uri::Uri(const string& uriString)
46  {
47      cachedSsi_ = NOT_FOUND;
48      cachedFsi_ = NOT_FOUND;
49      port_ = NOT_CALCULATED;
50  
51      if (uriString.empty()) {
52          return;
53      }
54  
55      uriString_ = uriString;
56      scheme_ = NOT_CACHED;
57      ssp_ = NOT_CACHED;
58      authority_ = NOT_CACHED;
59      host_ = NOT_CACHED;
60      userInfo_ = NOT_CACHED;
61      query_ = NOT_CACHED;
62      path_ = NOT_CACHED;
63      fragment_ = NOT_CACHED;
64  
65      if (!CheckScheme()) {
66          uriString_ = EMPTY;
67          HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "Scheme wrong");
68      }
69  }
70  
~Uri()71  Uri::~Uri()
72  {}
73  
CheckScheme()74  bool Uri::CheckScheme()
75  {
76      scheme_ = ParseScheme();
77      if (scheme_.empty()) {
78          return true;
79      }
80      try {
81          regex schemeRegex("[a-zA-Z][a-zA-Z|\\d|\\+|\\-|.]*$");
82          if (!regex_match(scheme_, schemeRegex)) {
83              return false;
84          }
85      } catch (std::regex_error &message) {
86          HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "regex fail,message:%{public}s", message.what());
87          return false;
88      }
89      return true;
90  }
91  
GetScheme()92  string Uri::GetScheme()
93  {
94      if (uriString_.empty()) {
95          return EMPTY;
96      }
97  
98      if (scheme_ == NOT_CACHED) {
99          scheme_ = ParseScheme();
100      }
101      return scheme_;
102  }
103  
ParseScheme()104  string Uri::ParseScheme()
105  {
106      size_t ssi = FindSchemeSeparator();
107      return (ssi == NOT_FOUND) ? EMPTY : uriString_.substr(0, ssi);
108  }
109  
GetSchemeSpecificPart()110  string Uri::GetSchemeSpecificPart()
111  {
112      if (uriString_.empty()) {
113          return EMPTY;
114      }
115  
116      return (ssp_ == NOT_CACHED) ? (ssp_ = ParseSsp()) : ssp_;
117  }
118  
ParseSsp()119  string Uri::ParseSsp()
120  {
121      size_t ssi = FindSchemeSeparator();
122      size_t fsi = FindFragmentSeparator();
123  
124      size_t start = (ssi == NOT_FOUND) ? 0 : (ssi + 1);
125      size_t end = (fsi == NOT_FOUND) ? uriString_.size() : fsi;
126  
127      // Return everything between ssi and fsi.
128      string ssp = EMPTY;
129      if (end > start) {
130          ssp = uriString_.substr(start, end - start);
131      }
132  
133      return ssp;
134  }
135  
GetAuthority()136  string Uri::GetAuthority()
137  {
138      if (uriString_.empty()) {
139          return EMPTY;
140      }
141  
142      if (authority_ == NOT_CACHED) {
143          authority_ = ParseAuthority();
144      }
145      return authority_;
146  }
147  
ParseAuthority()148  string Uri::ParseAuthority()
149  {
150      size_t ssi = FindSchemeSeparator();
151      if (ssi == NOT_FOUND) {
152          return EMPTY;
153      }
154  
155      size_t length = uriString_.length();
156      // If "//" follows the scheme separator, we have an authority.
157      if ((length > (ssi + POS_INC_MORE)) && (uriString_.at(ssi + POS_INC) == LEFT_SEPARATOR) &&
158          (uriString_.at(ssi + POS_INC_MORE) == LEFT_SEPARATOR)) {
159          // Look for the start of the path, query, or fragment, or the end of the string.
160          size_t start = ssi + POS_INC_AGAIN;
161          size_t end = start;
162  
163          while (end < length) {
164              char ch = uriString_.at(end);
165              if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR) || (ch == QUERY_FLAG) ||
166                  (ch == SCHEME_FRAGMENT)) {
167                  break;
168              }
169  
170              end++;
171          }
172  
173          return uriString_.substr(start, end - start);
174      } else {
175          return EMPTY;
176      }
177  }
178  
GetUserInfo()179  string Uri::GetUserInfo()
180  {
181      if (uriString_.empty()) {
182          return EMPTY;
183      }
184  
185      if (userInfo_ == NOT_CACHED) {
186          userInfo_ = ParseUserInfo();
187      }
188      return userInfo_;
189  }
190  
ParseUserInfo()191  string Uri::ParseUserInfo()
192  {
193      string authority = GetAuthority();
194      if (authority.empty()) {
195          return EMPTY;
196      }
197  
198      size_t end = authority.find_last_of(USER_HOST_SEPARATOR);
199      return (end == NOT_FOUND) ? EMPTY : authority.substr(0, end);
200  }
201  
GetHost()202  string Uri::GetHost()
203  {
204      if (uriString_.empty()) {
205          return EMPTY;
206      }
207  
208      if (host_ == NOT_CACHED) {
209          host_ = ParseHost();
210      }
211      return host_;
212  }
213  
ParseHost()214  string Uri::ParseHost()
215  {
216      string authority = GetAuthority();
217      if (authority.empty()) {
218          return EMPTY;
219      }
220  
221      // Parse out user info and then port.
222      size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR);
223      size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1);
224      size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start);
225      size_t end = (portSeparator == NOT_FOUND) ? authority.size() : portSeparator;
226  
227      string host = EMPTY;
228      if (start < end) {
229          host = authority.substr(start, end - start);
230      }
231  
232      return host;
233  }
234  
GetPort()235  int Uri::GetPort()
236  {
237      if (uriString_.empty()) {
238          return PORT_NONE;
239      }
240  
241      if (port_ == NOT_CALCULATED) {
242          port_ = ParsePort();
243      }
244      return port_;
245  }
246  
ParsePort()247  int Uri::ParsePort()
248  {
249      string authority = GetAuthority();
250      if (authority.empty()) {
251          return PORT_NONE;
252      }
253  
254      // Make sure we look for the port separtor *after* the user info separator.
255      size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR);
256      size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1);
257      size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start);
258      if (portSeparator == NOT_FOUND) {
259          return PORT_NONE;
260      }
261  
262      start = portSeparator + 1;
263      string portString = authority.substr(start);
264  
265      int value = PORT_NONE;
266      return StrToInt(portString, value) ? value : PORT_NONE;
267  }
268  
GetQuery()269  string Uri::GetQuery()
270  {
271      if (uriString_.empty()) {
272          return EMPTY;
273      }
274  
275      if (query_ == NOT_CACHED) {
276          query_ = ParseQuery();
277      }
278      return query_;
279  }
280  
ParseQuery()281  string Uri::ParseQuery()
282  {
283      size_t ssi = FindSchemeSeparator();
284      if (ssi == NOT_FOUND) {
285          ssi = 0;
286      }
287      size_t qsi = uriString_.find_first_of(QUERY_FLAG, ssi);
288      if (qsi == NOT_FOUND) {
289          return EMPTY;
290      }
291  
292      size_t start = qsi + 1;
293      size_t fsi = FindFragmentSeparator();
294      if (fsi == NOT_FOUND) {
295          return uriString_.substr(start);
296      }
297  
298      if (fsi < qsi) {
299          // Invalid.
300          return EMPTY;
301      }
302  
303      return uriString_.substr(start, fsi - start);
304  }
305  
GetPath()306  string Uri::GetPath()
307  {
308      if (uriString_.empty()) {
309          return EMPTY;
310      }
311  
312      if (path_ == NOT_CACHED) {
313          path_ = ParsePath();
314      }
315      return path_;
316  }
317  
GetPathSegments(std::vector<std::string> & segments)318  void Uri::GetPathSegments(std::vector<std::string>& segments)
319  {
320      if (uriString_.empty()) {
321          return;
322      }
323      if (path_ == NOT_CACHED) {
324          path_ = ParsePath();
325      }
326  
327      size_t previous = 0;
328      size_t current;
329      while ((current = path_.find(LEFT_SEPARATOR, previous)) != std::string::npos) {
330          if (previous < current) {
331              segments.emplace_back(path_.substr(previous, current - previous));
332          }
333          previous = current + POS_INC;
334      }
335      // Add in the final path segment.
336      if (previous < path_.length()) {
337          segments.emplace_back(path_.substr(previous));
338      }
339  }
340  
ParsePath()341  string Uri::ParsePath()
342  {
343      size_t ssi = FindSchemeSeparator();
344      // If the URI is absolute.
345      if (ssi != NOT_FOUND) {
346          // Is there anything after the ':'?
347          if ((ssi + 1) == uriString_.length()) {
348              // Opaque URI.
349              return EMPTY;
350          }
351  
352          // A '/' after the ':' means this is hierarchical.
353          if (uriString_.at(ssi + 1) != LEFT_SEPARATOR) {
354              // Opaque URI.
355              return EMPTY;
356          }
357      } else {
358          // All relative URIs are hierarchical.
359      }
360  
361      return ParsePath(ssi);
362  }
363  
ParsePath(size_t ssi)364  string Uri::ParsePath(size_t ssi)
365  {
366      size_t length = uriString_.length();
367  
368      // Find start of path.
369      size_t pathStart = (ssi == NOT_FOUND) ? 0 : (ssi + POS_INC);
370      if ((length > (pathStart + POS_INC)) && (uriString_.at(pathStart) == LEFT_SEPARATOR) &&
371          (uriString_.at(pathStart + POS_INC) == LEFT_SEPARATOR)) {
372          // Skip over authority to path.
373          pathStart += POS_INC_MORE;
374  
375          while (pathStart < length) {
376              char ch = uriString_.at(pathStart);
377              if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) {
378                  return EMPTY;
379              }
380  
381              if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR)) {
382                  break;
383              }
384  
385              pathStart++;
386          }
387      }
388  
389      // Find end of path.
390      size_t pathEnd = pathStart;
391      while (pathEnd < length) {
392          char ch = uriString_.at(pathEnd);
393          if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) {
394              break;
395          }
396  
397          pathEnd++;
398      }
399  
400      return uriString_.substr(pathStart, pathEnd - pathStart);
401  }
402  
GetFragment()403  string Uri::GetFragment()
404  {
405      if (uriString_.empty()) {
406          return EMPTY;
407      }
408  
409      if (fragment_ == NOT_CACHED) {
410          fragment_ = ParseFragment();
411      }
412      return fragment_;
413  }
414  
ParseFragment()415  string Uri::ParseFragment()
416  {
417      size_t fsi = FindFragmentSeparator();
418      return (fsi == NOT_FOUND) ? EMPTY : uriString_.substr(fsi + 1);
419  }
420  
FindSchemeSeparator()421  size_t Uri::FindSchemeSeparator()
422  {
423      if (cachedSsi_ == NOT_FOUND) {
424          cachedSsi_ = uriString_.find_first_of(SCHEME_SEPARATOR);
425      }
426      return cachedSsi_;
427  }
428  
FindFragmentSeparator()429  size_t Uri::FindFragmentSeparator()
430  {
431      if (cachedFsi_ == NOT_FOUND) {
432          cachedFsi_ = uriString_.find_first_of(SCHEME_FRAGMENT, FindSchemeSeparator());
433      }
434      return cachedFsi_;
435  }
436  
IsHierarchical()437  bool Uri::IsHierarchical()
438  {
439      if (uriString_.empty()) {
440          return false;
441      }
442  
443      size_t ssi = FindSchemeSeparator();
444      if (ssi == NOT_FOUND) {
445          // All relative URIs are hierarchical.
446          return true;
447      }
448  
449      if (uriString_.length() == (ssi + 1)) {
450          // No ssp.
451          return false;
452      }
453  
454      // If the ssp starts with a '/', this is hierarchical.
455      return (uriString_.at(ssi + 1) == LEFT_SEPARATOR);
456  }
457  
IsOpaque()458  bool Uri::IsOpaque()
459  {
460      if (uriString_.empty()) {
461          return false;
462      }
463  
464      return !IsHierarchical();
465  }
466  
IsAbsolute()467  bool Uri::IsAbsolute()
468  {
469      if (uriString_.empty()) {
470          return false;
471      }
472  
473      return !IsRelative();
474  }
475  
IsRelative()476  bool Uri::IsRelative()
477  {
478      if (uriString_.empty()) {
479          return false;
480      }
481  
482      // Note: We return true if the index is 0
483      return FindSchemeSeparator() == NOT_FOUND;
484  }
485  
Equals(const Uri & other) const486  bool Uri::Equals(const Uri& other) const
487  {
488      return ToString() == other.ToString();
489  }
490  
CompareTo(const Uri & other) const491  int Uri::CompareTo(const Uri& other) const
492  {
493      return ToString().compare(other.ToString());
494  }
495  
ToString() const496  string Uri::ToString() const
497  {
498      return uriString_;
499  }
500  
operator ==(const Uri & other) const501  bool Uri::operator==(const Uri& other) const
502  {
503      return ToString() == other.ToString();
504  }
505  
Marshalling(Parcel & parcel) const506  bool Uri::Marshalling(Parcel& parcel) const
507  {
508      if (IsAsciiString(uriString_)) {
509          return parcel.WriteString16(Str8ToStr16(uriString_));
510      }
511  
512      HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305,  "URI", "uriString_ is not ascii string");
513      return false;
514  }
515  
Unmarshalling(Parcel & parcel)516  Uri* Uri::Unmarshalling(Parcel& parcel)
517  {
518      return new Uri(Str16ToStr8(parcel.ReadString16()));
519  }
520  } // namespace OHOS
521