1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "js_uri.h" 17 #include "tools/log.h" 18 namespace OHOS::Uri { 19 std::bitset<MAX_BIT_SIZE> g_ruleAlpha; 20 std::bitset<MAX_BIT_SIZE> g_ruleScheme; 21 std::bitset<MAX_BIT_SIZE> g_ruleUrlc; 22 std::bitset<MAX_BIT_SIZE> g_rulePath; 23 std::bitset<MAX_BIT_SIZE> g_ruleUserInfo; 24 std::bitset<MAX_BIT_SIZE> g_ruleDigit; 25 std::bitset<MAX_BIT_SIZE> g_rulePort; PreliminaryWork() const26 void Uri::PreliminaryWork() const 27 { 28 std::string digitAggregate = "0123456789"; 29 for (size_t i = 0; i < digitAggregate.size(); ++i) { 30 g_ruleDigit.set(digitAggregate[i]); 31 } 32 33 std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 34 for (size_t i = 0; i < alphasAggregate.size(); ++i) { 35 g_ruleAlpha.set(alphasAggregate[i]); 36 } 37 38 std::string schemeAggregate = digitAggregate + alphasAggregate + "+-.| _-~!$&=,;'(){}*"; 39 for (size_t i = 0; i < schemeAggregate.size(); ++i) { 40 g_ruleScheme.set(schemeAggregate[i]); 41 } 42 43 std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%\""; 44 for (size_t i = 0; i < uricAggregate.size(); ++i) { 45 g_ruleUrlc.set(uricAggregate[i]); 46 } 47 48 std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%"; 49 for (size_t i = 0; i < pathAggregate.size(); ++i) { 50 g_rulePath.set(pathAggregate[i]); 51 } 52 53 std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%"; 54 for (size_t i = 0; i < userInfoAggregate.size(); ++i) { 55 g_ruleUserInfo.set(userInfoAggregate[i]); 56 } 57 58 std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()"; 59 for (size_t i = 0; i < portAggregate.size(); ++i) { 60 g_rulePort.set(portAggregate[i]); 61 } 62 } 63 Uri(const std::string input)64 Uri::Uri(const std::string input) 65 { 66 PreliminaryWork(); 67 errStr_ = ""; 68 if (input.empty()) { 69 errStr_ = "uri is empty"; 70 return; 71 } 72 inputUri_ = input; 73 AnalysisUri(); 74 } 75 AssignSchemeSpecificPart()76 void Uri::AssignSchemeSpecificPart() 77 { 78 uriData_.SchemeSpecificPart.reserve(data_.length() + uriData_.query.length() + 1); 79 uriData_.SchemeSpecificPart.append(data_); 80 if (!uriData_.query.empty()) { 81 uriData_.SchemeSpecificPart.append("?"); 82 uriData_.SchemeSpecificPart.append(uriData_.query); 83 } 84 } 85 AnalysisUri()86 void Uri::AnalysisUri() 87 { 88 data_ = inputUri_; 89 size_t pos = data_.find('#'); // Fragment 90 if (pos != std::string::npos) { 91 AnalysisFragment(pos); 92 if (!errStr_.empty()) { 93 return; 94 } 95 } 96 pos = data_.find('?'); // Query 97 if (pos != std::string::npos) { 98 AnalysisQuery(pos); 99 if (!errStr_.empty()) { 100 return; 101 } 102 } 103 pos = data_.find(':'); // Scheme 104 if (pos != std::string::npos) { 105 AnalysisScheme(pos); 106 if (!errStr_.empty()) { 107 return; 108 } 109 } else { 110 SpecialPath(); 111 if (!errStr_.empty()) { 112 return; 113 } 114 AssignSchemeSpecificPart(); 115 return; 116 } 117 pos = data_.find("//"); // userInfo path host port ipv4 or ipv6 118 if (pos != std::string::npos && pos == 0) { 119 AssignSchemeSpecificPart(); 120 data_ = data_.substr(2); // 2:Intercept the string from the second subscript 121 AnalysisHostAndPath(); 122 if (!errStr_.empty()) { 123 return; 124 } 125 } else if (data_[0] == '/') { 126 uriData_.path = data_; 127 AssignSchemeSpecificPart(); 128 data_ = ""; 129 } else { 130 AssignSchemeSpecificPart(); 131 uriData_.query = ""; 132 data_ = ""; 133 } 134 } 135 CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const136 bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const 137 { 138 size_t dataLen = data.size(); 139 for (size_t i = 0; i < dataLen; ++i) { 140 if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number 141 bool isLegal = rule.test(data[i]); 142 if (!isLegal) { 143 return false; 144 } 145 } else if (!flag) { 146 return false; 147 } 148 } 149 return true; 150 } 151 SpecialPath()152 void Uri::SpecialPath() 153 { 154 if (!CheckCharacter(data_, g_rulePath, true)) { 155 errStr_ = "SpecialPath does not conform to the rule"; 156 return; 157 } 158 uriData_.path = data_; 159 data_ = ""; 160 } 161 AnalysisFragment(size_t pos)162 void Uri::AnalysisFragment(size_t pos) 163 { 164 if (pos == 0) { 165 errStr_ = "#It can't be the first"; 166 return; 167 } 168 std::string fragment = data_.substr(pos + 1); 169 if (!CheckCharacter(fragment, g_ruleUrlc, true)) { 170 errStr_ = "Fragment does not conform to the rule"; 171 return; 172 } 173 uriData_.fragment = fragment; 174 data_ = data_.substr(0, pos); 175 } 176 AnalysisQuery(size_t pos)177 void Uri::AnalysisQuery(size_t pos) 178 { 179 std::string query = data_.substr(pos + 1); 180 if (!CheckCharacter(query, g_ruleUrlc, true)) { 181 errStr_ = "Query does not conform to the rule"; 182 return; 183 } 184 uriData_.query = query; 185 data_ = data_.substr(0, pos); 186 } 187 AnalysisScheme(size_t pos)188 void Uri::AnalysisScheme(size_t pos) 189 { 190 size_t slashPos = data_.find('/'); 191 if (slashPos != std::string::npos && slashPos < pos) { 192 SpecialPath(); 193 uriData_.SchemeSpecificPart.reserve(uriData_.path.length() + uriData_.query.length() + 1); 194 uriData_.SchemeSpecificPart.append(uriData_.path); 195 uriData_.SchemeSpecificPart.append("?"); 196 uriData_.SchemeSpecificPart.append(uriData_.query); 197 data_ = ""; 198 } else { 199 if ((static_cast<int>(data_[0]) >= 0 && static_cast<int>(data_[0]) < MAX_BIT_SIZE) && 200 !g_ruleAlpha.test(data_[0])) { 201 errStr_ = "Scheme the first character must be a letter"; 202 return; 203 } 204 std::string scheme = data_.substr(0, pos); 205 if (!CheckCharacter(scheme, g_ruleScheme, false)) { 206 errStr_ = "scheme does not conform to the rule"; 207 return; 208 } 209 uriData_.scheme = scheme; 210 data_ = data_.substr(pos + 1); 211 } 212 } 213 AnalysisHost(bool isLawfulProt)214 void Uri::AnalysisHost(bool isLawfulProt) 215 { 216 // find ipv4 or ipv6 or host 217 if (data_[0] == '[') { 218 if (data_[data_.size() - 1] == ']') { 219 // IPV6 220 if (!isLawfulProt) { 221 errStr_ = "Prot does not conform to the rule"; 222 return; 223 } 224 AnalysisIPV6(); 225 } else { 226 errStr_ = "IPv6 is missing a closing bracket"; 227 return; 228 } 229 } else { 230 if (data_.find('[') != std::string::npos || data_.find(']') != std::string::npos) { 231 errStr_ = "host does not conform to the rule"; 232 return; 233 } 234 // ipv4 235 if (!isLawfulProt || !AnalysisIPV4()) { 236 uriData_.port = -1; 237 uriData_.host = ""; 238 uriData_.userInfo = ""; 239 } 240 } 241 } 242 AnalysisHostAndPath()243 void Uri::AnalysisHostAndPath() 244 { 245 if (data_.empty()) { 246 return; 247 } 248 // find path 249 size_t pos = data_.find('/'); 250 if (pos != std::string::npos) { 251 AnalysisPath(pos); 252 if (!errStr_.empty()) { 253 return; 254 } 255 } 256 uriData_.authority = data_; 257 // find UserInfo 258 pos = data_.find('@'); 259 if (pos != std::string::npos) { 260 AnalysisUserInfo(pos); 261 if (!errStr_.empty()) { 262 return; 263 } 264 } 265 bool isLawfulProt = true; 266 // find port 267 pos = data_.rfind(':'); 268 if (pos != std::string::npos) { 269 size_t pos1 = data_.rfind(']'); 270 if (pos1 == std::string::npos || pos > pos1) { 271 isLawfulProt = AnalysisPort(pos); 272 } 273 if (!errStr_.empty()) { 274 return; 275 } 276 } 277 AnalysisHost(isLawfulProt); 278 } 279 AnalysisPath(size_t pos)280 void Uri::AnalysisPath(size_t pos) 281 { 282 std::string path = data_.substr(pos); 283 if (!CheckCharacter(path, g_rulePath, true)) { 284 errStr_ = "path does not conform to the rule"; 285 return; 286 } 287 uriData_.path = path; 288 data_ = data_.substr(0, pos); 289 } 290 AnalysisUserInfo(size_t pos)291 void Uri::AnalysisUserInfo(size_t pos) 292 { 293 std::string userInfo = data_.substr(0, pos); 294 if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) { 295 errStr_ = "userInfo does not conform to the rule"; 296 return; 297 } 298 uriData_.userInfo = userInfo; 299 data_ = data_.substr(pos + 1); 300 } 301 AnalysisPort(size_t pos)302 bool Uri::AnalysisPort(size_t pos) 303 { 304 std::string port = data_.substr(pos + 1); 305 if (!CheckCharacter(port, g_rulePort, true)) { 306 errStr_ = "port does not conform to the rule"; 307 return false; 308 } else if (CheckCharacter(port, g_ruleDigit, false)) { 309 if (port.size() == 0 || port.size() > 10) { // 10:The maximum number of bits for int value 310 return false; 311 } 312 double tempPort = std::strtod(port.c_str(), nullptr); 313 if (tempPort < 0 || tempPort > INT32_MAX) { 314 return false; 315 } 316 uriData_.port = static_cast<int>(tempPort); 317 data_ = data_.substr(0, pos); 318 return true; 319 } else { 320 data_ = data_.substr(0, pos); 321 return false; 322 } 323 return false; 324 } 325 AnalysisIPV4()326 bool Uri::AnalysisIPV4() 327 { 328 std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)"); 329 std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?\\.)+" 330 "([a-zA-Z]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*" 331 "[a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s])?))|" 332 "([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?)"); 333 bool isIpv4 = std::regex_match(data_, ipv4); 334 bool isHosName = std::regex_match(data_, hostname); 335 if (!isIpv4 && !isHosName) { 336 return false; 337 } else { 338 uriData_.host = data_; 339 data_ = ""; 340 return true; 341 } 342 } 343 AnalysisIPV6()344 void Uri::AnalysisIPV6() 345 { 346 std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript 347 std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|" 348 "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}" 349 "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})" 350 "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}" 351 "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|" 352 "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)" 353 "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}" 354 "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|" 355 "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|" 356 "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?"); 357 if (!std::regex_match(str, ipv6)) { 358 errStr_ = "ipv6 does not conform to the rule"; 359 return; 360 } 361 uriData_.host = data_; 362 data_ = ""; 363 } 364 Equals(const Uri other) const365 bool Uri::Equals(const Uri other) const 366 { 367 if (uriData_.port != other.uriData_.port) { 368 return false; 369 } 370 if (uriData_.scheme != other.uriData_.scheme) { 371 return false; 372 } 373 if (uriData_.userInfo != other.uriData_.userInfo) { 374 return false; 375 } 376 if (uriData_.host != other.uriData_.host) { 377 return false; 378 } 379 if (uriData_.query != other.uriData_.query) { 380 return false; 381 } 382 if (uriData_.fragment != other.uriData_.fragment) { 383 return false; 384 } 385 if (uriData_.path != other.uriData_.path) { 386 return false; 387 } 388 if (uriData_.authority != other.uriData_.authority) { 389 return false; 390 } 391 if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) { 392 return false; 393 } 394 return true; 395 } 396 ToString() const397 std::string Uri::ToString() const 398 { 399 return inputUri_; 400 } 401 IsAbsolute() const402 bool Uri::IsAbsolute() const 403 { 404 return !uriData_.scheme.empty(); 405 } 406 IsRelative() const407 bool Uri::IsRelative() const 408 { 409 return uriData_.scheme.empty(); 410 } 411 IsOpaque() const412 bool Uri::IsOpaque() const 413 { 414 return !IsHierarchical(); 415 } 416 IsHierarchical() const417 bool Uri::IsHierarchical() const 418 { 419 size_t index = inputUri_.find(':'); 420 if (index == std::string::npos) { 421 return true; 422 } 423 if (inputUri_.length() == index + 1) { 424 return false; 425 } 426 return inputUri_[index + 1] == '/'; 427 } 428 AddQueryValue(const std::string key,const std::string value) const429 std::string Uri::AddQueryValue(const std::string key, const std::string value) const 430 { 431 return BuildUriString("query", key + "=" + value); 432 } 433 AddSegment(const std::string pathSegment) const434 std::string Uri::AddSegment(const std::string pathSegment) const 435 { 436 return BuildUriString("segment", pathSegment); 437 } 438 BuildUriString(const std::string str,const std::string param) const439 std::string Uri::BuildUriString(const std::string str, const std::string param) const 440 { 441 std::string result = ""; 442 if (!uriData_.scheme.empty()) { 443 result += uriData_.scheme + ":"; 444 } 445 if (!uriData_.authority.empty()) { 446 result += "//" + uriData_.authority; 447 } 448 if (!uriData_.path.empty()) { 449 result += uriData_.path ; 450 } 451 if (str == "segment") { 452 if (result.back() == '/') { 453 result += param; 454 } else { 455 result += "/" + param; 456 } 457 } 458 if (str != "clearquery") { 459 if (uriData_.query.empty()) { 460 if (str == "query") { 461 result += "?" + param; 462 } 463 } else { 464 result += "?" + uriData_.query; 465 if (str == "query") { 466 result += "&" + param; 467 } 468 } 469 } 470 if (!uriData_.fragment.empty()) { 471 result += "#" + uriData_.fragment; 472 } 473 return result; 474 } 475 GetSegment() const476 std::vector<std::string> Uri::GetSegment() const 477 { 478 std::vector<std::string> segments; 479 if (uriData_.path.empty()) { 480 return segments; 481 } 482 size_t previous = 0; 483 size_t current = 0; 484 for (current = uriData_.path.find('/', previous); current != std::string::npos; 485 current = uriData_.path.find('/', previous)) { 486 if (previous < current) { 487 std::string segment = uriData_.path.substr(previous, current - previous); 488 segments.push_back(segment); 489 } 490 previous = current + 1; 491 } 492 if (previous < uriData_.path.length()) { 493 segments.push_back(uriData_.path.substr(previous)); 494 } 495 return segments; 496 } 497 IsFailed() const498 std::string Uri::IsFailed() const 499 { 500 return errStr_; 501 } 502 Normalize() const503 std::string Uri::Normalize() const 504 { 505 std::vector<std::string> temp; 506 size_t pathLen = uriData_.path.size(); 507 if (pathLen == 0) { 508 return this->inputUri_; 509 } 510 size_t pos = 0; 511 size_t left = 0; 512 while ((pos = uriData_.path.find('/', left)) != std::string::npos) { 513 temp.push_back(uriData_.path.substr(left, pos - left)); 514 left = pos + 1; 515 } 516 if (left != pathLen) { 517 temp.push_back(uriData_.path.substr(left)); 518 } 519 size_t tempLen = temp.size(); 520 std::vector<std::string> normalizeTemp; 521 for (size_t i = 0; i < tempLen; ++i) { 522 if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) { 523 normalizeTemp.push_back(temp[i]); 524 } 525 if (temp[i] == "..") { 526 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") { 527 normalizeTemp.pop_back(); 528 } else { 529 normalizeTemp.push_back(temp[i]); 530 } 531 } 532 } 533 std::string normalizePath = ""; 534 tempLen = normalizeTemp.size(); 535 if (tempLen == 0) { 536 normalizePath = "/"; 537 } else { 538 for (size_t i = 0; i < tempLen; ++i) { 539 normalizePath += "/" + normalizeTemp[i]; 540 } 541 } 542 return Split(normalizePath); 543 } 544 545 Split(const std::string & path) const546 std::string Uri::Split(const std::string &path) const 547 { 548 std::string normalizeUri = ""; 549 if (!uriData_.scheme.empty()) { 550 normalizeUri += uriData_.scheme + ":"; 551 } 552 if (uriData_.path.empty()) { 553 normalizeUri += uriData_.SchemeSpecificPart; 554 } else { 555 if (!uriData_.host.empty()) { 556 normalizeUri += "//"; 557 if (!uriData_.userInfo.empty()) { 558 normalizeUri += uriData_.userInfo + "@"; 559 } 560 normalizeUri += uriData_.host; 561 if (uriData_.port != -1) { 562 normalizeUri += ":" + std::to_string(uriData_.port); 563 } 564 } else if (!uriData_.authority.empty()) { 565 normalizeUri += "//" + uriData_.authority; 566 } 567 normalizeUri += path; 568 } 569 if (!uriData_.query.empty()) { 570 normalizeUri += "?" + uriData_.query; 571 } 572 if (!uriData_.fragment.empty()) { 573 normalizeUri += "#" + uriData_.fragment; 574 } 575 return normalizeUri; 576 } 577 GetScheme() const578 std::string Uri::GetScheme() const 579 { 580 return uriData_.scheme; 581 } 582 GetAuthority() const583 std::string Uri::GetAuthority() const 584 { 585 return uriData_.authority; 586 } 587 GetSsp() const588 std::string Uri::GetSsp() const 589 { 590 return uriData_.SchemeSpecificPart; 591 } 592 GetUserinfo() const593 std::string Uri::GetUserinfo() const 594 { 595 return uriData_.userInfo; 596 } 597 GetHost() const598 std::string Uri::GetHost() const 599 { 600 return uriData_.host; 601 } 602 GetPort() const603 std::string Uri::GetPort() const 604 { 605 return std::to_string(uriData_.port); 606 } 607 GetPath() const608 std::string Uri::GetPath() const 609 { 610 return uriData_.path; 611 } 612 GetQuery() const613 std::string Uri::GetQuery() const 614 { 615 return uriData_.query; 616 } 617 GetFragment() const618 std::string Uri::GetFragment() const 619 { 620 return uriData_.fragment; 621 } 622 ClearQuery() const623 std::string Uri::ClearQuery() const 624 { 625 return BuildUriString("clearquery", ""); 626 } 627 } // namespace OHOS::Uri 628