1 /* 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include <regex> 17 #include <vector> 18 #include "hilog/log.h" 19 #include "string_ex.h" 20 #include "uri.h" 21 22 using std::string; 23 using std::regex; 24 using OHOS::HiviewDFX::HiLog; 25 26 namespace OHOS { 27 namespace { 28 const string NOT_CACHED = "NOT VALID"; 29 const string EMPTY = ""; 30 const size_t NOT_FOUND = string::npos; 31 const int NOT_CALCULATED = -2; 32 const int PORT_NONE = -1; 33 const char SCHEME_SEPARATOR = ':'; 34 const char SCHEME_FRAGMENT = '#'; 35 const char LEFT_SEPARATOR = '/'; 36 const char RIGHT_SEPARATOR = '\\'; 37 const char QUERY_FLAG = '?'; 38 const char USER_HOST_SEPARATOR = '@'; 39 const char PORT_SEPARATOR = ':'; 40 const size_t POS_INC = 1; 41 const size_t POS_INC_MORE = 2; 42 const size_t POS_INC_AGAIN = 3; 43 }; // namespace 44 Uri(const string & uriString)45 Uri::Uri(const string& uriString) 46 { 47 cachedSsi_ = NOT_FOUND; 48 cachedFsi_ = NOT_FOUND; 49 port_ = NOT_CALCULATED; 50 51 if (uriString.empty()) { 52 return; 53 } 54 55 uriString_ = uriString; 56 scheme_ = NOT_CACHED; 57 ssp_ = NOT_CACHED; 58 authority_ = NOT_CACHED; 59 host_ = NOT_CACHED; 60 userInfo_ = NOT_CACHED; 61 query_ = NOT_CACHED; 62 path_ = NOT_CACHED; 63 fragment_ = NOT_CACHED; 64 65 if (!CheckScheme()) { 66 uriString_ = EMPTY; 67 HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305, "URI", "Scheme wrong"); 68 } 69 } 70 ~Uri()71 Uri::~Uri() 72 {} 73 CheckScheme()74 bool Uri::CheckScheme() 75 { 76 scheme_ = ParseScheme(); 77 if (scheme_.empty()) { 78 return true; 79 } 80 try { 81 regex schemeRegex("[a-zA-Z][a-zA-Z|\\d|\\+|\\-|.]*$"); 82 if (!regex_match(scheme_, schemeRegex)) { 83 return false; 84 } 85 } catch (std::regex_error &message) { 86 HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305, "URI", "regex fail,message:%{public}s", message.what()); 87 return false; 88 } 89 return true; 90 } 91 GetScheme()92 string Uri::GetScheme() 93 { 94 if (uriString_.empty()) { 95 return EMPTY; 96 } 97 98 if (scheme_ == NOT_CACHED) { 99 scheme_ = ParseScheme(); 100 } 101 return scheme_; 102 } 103 ParseScheme()104 string Uri::ParseScheme() 105 { 106 size_t ssi = FindSchemeSeparator(); 107 return (ssi == NOT_FOUND) ? EMPTY : uriString_.substr(0, ssi); 108 } 109 GetSchemeSpecificPart()110 string Uri::GetSchemeSpecificPart() 111 { 112 if (uriString_.empty()) { 113 return EMPTY; 114 } 115 116 return (ssp_ == NOT_CACHED) ? (ssp_ = ParseSsp()) : ssp_; 117 } 118 ParseSsp()119 string Uri::ParseSsp() 120 { 121 size_t ssi = FindSchemeSeparator(); 122 size_t fsi = FindFragmentSeparator(); 123 124 size_t start = (ssi == NOT_FOUND) ? 0 : (ssi + 1); 125 size_t end = (fsi == NOT_FOUND) ? uriString_.size() : fsi; 126 127 // Return everything between ssi and fsi. 128 string ssp = EMPTY; 129 if (end > start) { 130 ssp = uriString_.substr(start, end - start); 131 } 132 133 return ssp; 134 } 135 GetAuthority()136 string Uri::GetAuthority() 137 { 138 if (uriString_.empty()) { 139 return EMPTY; 140 } 141 142 if (authority_ == NOT_CACHED) { 143 authority_ = ParseAuthority(); 144 } 145 return authority_; 146 } 147 ParseAuthority()148 string Uri::ParseAuthority() 149 { 150 size_t ssi = FindSchemeSeparator(); 151 if (ssi == NOT_FOUND) { 152 return EMPTY; 153 } 154 155 size_t length = uriString_.length(); 156 // If "//" follows the scheme separator, we have an authority. 157 if ((length > (ssi + POS_INC_MORE)) && (uriString_.at(ssi + POS_INC) == LEFT_SEPARATOR) && 158 (uriString_.at(ssi + POS_INC_MORE) == LEFT_SEPARATOR)) { 159 // Look for the start of the path, query, or fragment, or the end of the string. 160 size_t start = ssi + POS_INC_AGAIN; 161 size_t end = start; 162 163 while (end < length) { 164 char ch = uriString_.at(end); 165 if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR) || (ch == QUERY_FLAG) || 166 (ch == SCHEME_FRAGMENT)) { 167 break; 168 } 169 170 end++; 171 } 172 173 return uriString_.substr(start, end - start); 174 } else { 175 return EMPTY; 176 } 177 } 178 GetUserInfo()179 string Uri::GetUserInfo() 180 { 181 if (uriString_.empty()) { 182 return EMPTY; 183 } 184 185 if (userInfo_ == NOT_CACHED) { 186 userInfo_ = ParseUserInfo(); 187 } 188 return userInfo_; 189 } 190 ParseUserInfo()191 string Uri::ParseUserInfo() 192 { 193 string authority = GetAuthority(); 194 if (authority.empty()) { 195 return EMPTY; 196 } 197 198 size_t end = authority.find_last_of(USER_HOST_SEPARATOR); 199 return (end == NOT_FOUND) ? EMPTY : authority.substr(0, end); 200 } 201 GetHost()202 string Uri::GetHost() 203 { 204 if (uriString_.empty()) { 205 return EMPTY; 206 } 207 208 if (host_ == NOT_CACHED) { 209 host_ = ParseHost(); 210 } 211 return host_; 212 } 213 ParseHost()214 string Uri::ParseHost() 215 { 216 string authority = GetAuthority(); 217 if (authority.empty()) { 218 return EMPTY; 219 } 220 221 // Parse out user info and then port. 222 size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR); 223 size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1); 224 size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start); 225 size_t end = (portSeparator == NOT_FOUND) ? authority.size() : portSeparator; 226 227 string host = EMPTY; 228 if (start < end) { 229 host = authority.substr(start, end - start); 230 } 231 232 return host; 233 } 234 GetPort()235 int Uri::GetPort() 236 { 237 if (uriString_.empty()) { 238 return PORT_NONE; 239 } 240 241 if (port_ == NOT_CALCULATED) { 242 port_ = ParsePort(); 243 } 244 return port_; 245 } 246 ParsePort()247 int Uri::ParsePort() 248 { 249 string authority = GetAuthority(); 250 if (authority.empty()) { 251 return PORT_NONE; 252 } 253 254 // Make sure we look for the port separtor *after* the user info separator. 255 size_t userInfoSeparator = authority.find_last_of(USER_HOST_SEPARATOR); 256 size_t start = (userInfoSeparator == NOT_FOUND) ? 0 : (userInfoSeparator + 1); 257 size_t portSeparator = authority.find_first_of(PORT_SEPARATOR, start); 258 if (portSeparator == NOT_FOUND) { 259 return PORT_NONE; 260 } 261 262 start = portSeparator + 1; 263 string portString = authority.substr(start); 264 265 int value = PORT_NONE; 266 return StrToInt(portString, value) ? value : PORT_NONE; 267 } 268 GetQuery()269 string Uri::GetQuery() 270 { 271 if (uriString_.empty()) { 272 return EMPTY; 273 } 274 275 if (query_ == NOT_CACHED) { 276 query_ = ParseQuery(); 277 } 278 return query_; 279 } 280 ParseQuery()281 string Uri::ParseQuery() 282 { 283 size_t ssi = FindSchemeSeparator(); 284 if (ssi == NOT_FOUND) { 285 ssi = 0; 286 } 287 size_t qsi = uriString_.find_first_of(QUERY_FLAG, ssi); 288 if (qsi == NOT_FOUND) { 289 return EMPTY; 290 } 291 292 size_t start = qsi + 1; 293 size_t fsi = FindFragmentSeparator(); 294 if (fsi == NOT_FOUND) { 295 return uriString_.substr(start); 296 } 297 298 if (fsi < qsi) { 299 // Invalid. 300 return EMPTY; 301 } 302 303 return uriString_.substr(start, fsi - start); 304 } 305 GetPath()306 string Uri::GetPath() 307 { 308 if (uriString_.empty()) { 309 return EMPTY; 310 } 311 312 if (path_ == NOT_CACHED) { 313 path_ = ParsePath(); 314 } 315 return path_; 316 } 317 GetPathSegments(std::vector<std::string> & segments)318 void Uri::GetPathSegments(std::vector<std::string>& segments) 319 { 320 if (uriString_.empty()) { 321 return; 322 } 323 if (path_ == NOT_CACHED) { 324 path_ = ParsePath(); 325 } 326 327 size_t previous = 0; 328 size_t current; 329 while ((current = path_.find(LEFT_SEPARATOR, previous)) != std::string::npos) { 330 if (previous < current) { 331 segments.emplace_back(path_.substr(previous, current - previous)); 332 } 333 previous = current + POS_INC; 334 } 335 // Add in the final path segment. 336 if (previous < path_.length()) { 337 segments.emplace_back(path_.substr(previous)); 338 } 339 } 340 ParsePath()341 string Uri::ParsePath() 342 { 343 size_t ssi = FindSchemeSeparator(); 344 // If the URI is absolute. 345 if (ssi != NOT_FOUND) { 346 // Is there anything after the ':'? 347 if ((ssi + 1) == uriString_.length()) { 348 // Opaque URI. 349 return EMPTY; 350 } 351 352 // A '/' after the ':' means this is hierarchical. 353 if (uriString_.at(ssi + 1) != LEFT_SEPARATOR) { 354 // Opaque URI. 355 return EMPTY; 356 } 357 } else { 358 // All relative URIs are hierarchical. 359 } 360 361 return ParsePath(ssi); 362 } 363 ParsePath(size_t ssi)364 string Uri::ParsePath(size_t ssi) 365 { 366 size_t length = uriString_.length(); 367 368 // Find start of path. 369 size_t pathStart = (ssi == NOT_FOUND) ? 0 : (ssi + POS_INC); 370 if ((length > (pathStart + POS_INC)) && (uriString_.at(pathStart) == LEFT_SEPARATOR) && 371 (uriString_.at(pathStart + POS_INC) == LEFT_SEPARATOR)) { 372 // Skip over authority to path. 373 pathStart += POS_INC_MORE; 374 375 while (pathStart < length) { 376 char ch = uriString_.at(pathStart); 377 if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) { 378 return EMPTY; 379 } 380 381 if ((ch == LEFT_SEPARATOR) || (ch == RIGHT_SEPARATOR)) { 382 break; 383 } 384 385 pathStart++; 386 } 387 } 388 389 // Find end of path. 390 size_t pathEnd = pathStart; 391 while (pathEnd < length) { 392 char ch = uriString_.at(pathEnd); 393 if ((ch == QUERY_FLAG) || (ch == SCHEME_FRAGMENT)) { 394 break; 395 } 396 397 pathEnd++; 398 } 399 400 return uriString_.substr(pathStart, pathEnd - pathStart); 401 } 402 GetFragment()403 string Uri::GetFragment() 404 { 405 if (uriString_.empty()) { 406 return EMPTY; 407 } 408 409 if (fragment_ == NOT_CACHED) { 410 fragment_ = ParseFragment(); 411 } 412 return fragment_; 413 } 414 ParseFragment()415 string Uri::ParseFragment() 416 { 417 size_t fsi = FindFragmentSeparator(); 418 return (fsi == NOT_FOUND) ? EMPTY : uriString_.substr(fsi + 1); 419 } 420 FindSchemeSeparator()421 size_t Uri::FindSchemeSeparator() 422 { 423 if (cachedSsi_ == NOT_FOUND) { 424 cachedSsi_ = uriString_.find_first_of(SCHEME_SEPARATOR); 425 } 426 return cachedSsi_; 427 } 428 FindFragmentSeparator()429 size_t Uri::FindFragmentSeparator() 430 { 431 if (cachedFsi_ == NOT_FOUND) { 432 cachedFsi_ = uriString_.find_first_of(SCHEME_FRAGMENT, FindSchemeSeparator()); 433 } 434 return cachedFsi_; 435 } 436 IsHierarchical()437 bool Uri::IsHierarchical() 438 { 439 if (uriString_.empty()) { 440 return false; 441 } 442 443 size_t ssi = FindSchemeSeparator(); 444 if (ssi == NOT_FOUND) { 445 // All relative URIs are hierarchical. 446 return true; 447 } 448 449 if (uriString_.length() == (ssi + 1)) { 450 // No ssp. 451 return false; 452 } 453 454 // If the ssp starts with a '/', this is hierarchical. 455 return (uriString_.at(ssi + 1) == LEFT_SEPARATOR); 456 } 457 IsOpaque()458 bool Uri::IsOpaque() 459 { 460 if (uriString_.empty()) { 461 return false; 462 } 463 464 return !IsHierarchical(); 465 } 466 IsAbsolute()467 bool Uri::IsAbsolute() 468 { 469 if (uriString_.empty()) { 470 return false; 471 } 472 473 return !IsRelative(); 474 } 475 IsRelative()476 bool Uri::IsRelative() 477 { 478 if (uriString_.empty()) { 479 return false; 480 } 481 482 // Note: We return true if the index is 0 483 return FindSchemeSeparator() == NOT_FOUND; 484 } 485 Equals(const Uri & other) const486 bool Uri::Equals(const Uri& other) const 487 { 488 return ToString() == other.ToString(); 489 } 490 CompareTo(const Uri & other) const491 int Uri::CompareTo(const Uri& other) const 492 { 493 return ToString().compare(other.ToString()); 494 } 495 ToString() const496 string Uri::ToString() const 497 { 498 return uriString_; 499 } 500 operator ==(const Uri & other) const501 bool Uri::operator==(const Uri& other) const 502 { 503 return ToString() == other.ToString(); 504 } 505 Marshalling(Parcel & parcel) const506 bool Uri::Marshalling(Parcel& parcel) const 507 { 508 if (IsAsciiString(uriString_)) { 509 return parcel.WriteString16(Str8ToStr16(uriString_)); 510 } 511 512 HILOG_IMPL(LOG_CORE, LOG_ERROR, 0xD001305, "URI", "uriString_ is not ascii string"); 513 return false; 514 } 515 Unmarshalling(Parcel & parcel)516 Uri* Uri::Unmarshalling(Parcel& parcel) 517 { 518 return new Uri(Str16ToStr8(parcel.ReadString16())); 519 } 520 } // namespace OHOS 521