1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 //! HTTP url [`PercentEncoder`].
15 //!
16 //! URI references are used to target requests, indicate redirects, and define
17 //! relationships.
18 //!
19 //! [`PercentEncoder`]: https://url.spec.whatwg.org/#fragment-percent-encode
20
21 use std::str;
22 use std::str::{Bytes, Chars};
23
24 use crate::error::HttpError;
25 use crate::request::uri::{InvalidUri, Uri};
26
27 type Utf8Char<'a> = (char, &'a str);
28
29 const USERINFO: &[u8; 19] = b" \"#<>?`{}/:;=@[]^|\\";
30 const FRAGMENT: &[u8; 5] = b" \"<>`";
31 const PATH: &[u8; 9] = b" \"#<>?`{}";
32 const QUERY: &[u8; 6] = b" \"#<>\'";
33
34 /// HTTP url percent encoding implementation.
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// use ylong_http::request::uri::PercentEncoder;
40 ///
41 /// let url = "https://www.example.com/data/测试文件.txt";
42 /// let encoded = PercentEncoder::parse(url).unwrap();
43 /// assert_eq!(
44 /// encoded,
45 /// "https://www.example.com/data/%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt"
46 /// );
47 /// ```
48 pub struct PercentEncoder {
49 normalized: Normalized,
50 }
51
52 impl PercentEncoder {
53 /// Percent-coding entry.
parse(origin: &str) -> Result<String, HttpError>54 pub fn parse(origin: &str) -> Result<String, HttpError> {
55 let mut encoder = Self {
56 normalized: Normalized::from_size(origin.len()),
57 };
58 let bytes = UrlChars {
59 remaining: origin.chars(),
60 };
61 let remaining = encoder.parse_scheme(bytes)?;
62 let remaining = encoder.parse_double_slash(remaining)?;
63 let remaining = encoder.parse_userinfo(remaining)?;
64 let remaining = encoder.parse_authority(remaining)?;
65 let remaining = encoder.parse_path(remaining)?;
66 encoder.parse_query_and_fragment(remaining)?;
67 Ok(encoder.normalized.url())
68 }
69
parse_scheme<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>70 fn parse_scheme<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
71 while let Some(char) = origin.next() {
72 match char {
73 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => {
74 self.normalized.push(char.to_ascii_lowercase())
75 }
76 ':' => {
77 self.normalized.push(char);
78 return Ok(origin);
79 }
80 _ => return Err(InvalidUri::InvalidScheme.into()),
81 }
82 }
83 Err(InvalidUri::InvalidScheme.into())
84 }
85
parse_double_slash<'a>(&mut self, origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>86 fn parse_double_slash<'a>(&mut self, origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
87 let mut chars = origin.clone();
88 let mut count = 0;
89 loop {
90 let mut tmp_chars = chars.clone();
91 if matches!(tmp_chars.next(), Some(c) if matches!(c, '/')) {
92 count += 1;
93 chars = tmp_chars;
94 self.normalized.push('/');
95 } else {
96 break;
97 }
98 }
99 if count == 2 {
100 Ok(chars)
101 } else {
102 Err(InvalidUri::InvalidScheme.into())
103 }
104 }
105
parse_userinfo<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>106 fn parse_userinfo<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
107 let mut chars = origin.clone();
108
109 let mut size = 0;
110 let mut after_at = None;
111 while let Some(ch) = chars.next() {
112 match ch {
113 '@' => {
114 after_at = Some((size, chars.clone()));
115 }
116 '/' | '?' | '#' => break,
117 _ => {}
118 }
119 size += 1;
120 }
121
122 let (mut info_len, remaining) = match after_at {
123 None => {
124 return Ok(origin);
125 }
126 Some((0, remaining)) => {
127 if matches!(remaining.clone().next(), Some(c) if matches!(c, '/' | '?' | '#')) {
128 return Err(InvalidUri::UriMissHost.into());
129 }
130 return Ok(remaining);
131 }
132 Some(at) => at,
133 };
134
135 let mut has_username = false;
136 while info_len > 0 {
137 info_len -= 1;
138 if let Some(ch) = origin.next_u8() {
139 if ch.0 == ':' && !has_username {
140 has_username = true;
141 self.normalized.push(':')
142 } else {
143 self.normalized.percent_encoding_push(ch, USERINFO);
144 }
145 }
146 }
147 self.normalized.push('@');
148
149 Ok(remaining)
150 }
parse_authority<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>151 fn parse_authority<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
152 loop {
153 let chars = origin.clone();
154 let c = if let Some(ch) = origin.next() {
155 ch
156 } else {
157 break;
158 };
159 match c {
160 '/' | '?' | '#' => {
161 origin = chars;
162 break;
163 }
164 _ => {
165 self.normalized.push(c);
166 }
167 }
168 }
169 Ok(origin)
170 }
171
parse_path<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>172 fn parse_path<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
173 loop {
174 let chars = origin.clone();
175
176 let (ch, u8_str) = if let Some((ch, u8_str)) = origin.next_u8() {
177 (ch, u8_str)
178 } else {
179 break;
180 };
181 match ch {
182 '/' => {
183 self.normalized.push(ch);
184 }
185 '#' | '?' => {
186 origin = chars;
187 break;
188 }
189 _ => {
190 self.normalized.percent_encoding_push((ch, u8_str), PATH);
191 }
192 }
193 }
194
195 Ok(origin)
196 }
197
parse_query_and_fragment(&mut self, mut origin: UrlChars) -> Result<(), HttpError>198 fn parse_query_and_fragment(&mut self, mut origin: UrlChars) -> Result<(), HttpError> {
199 let mut remaining = origin.clone();
200 match origin.first_valid() {
201 None => {}
202 Some('?') => {
203 self.normalized.push('?');
204 let chars = self.parse_query(origin)?;
205 remaining = chars;
206 }
207 Some('#') => {
208 self.normalized.push('#');
209 remaining = origin;
210 }
211 _ => {
212 return Err(InvalidUri::InvalidFormat.into());
213 }
214 }
215 self.parse_fragment(remaining)
216 }
217
parse_query<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError>218 fn parse_query<'a>(&mut self, mut origin: UrlChars<'a>) -> Result<UrlChars<'a>, HttpError> {
219 while let Some((ch, u8_str)) = origin.next_u8() {
220 match ch {
221 '#' => {
222 self.normalized.push('#');
223 break;
224 }
225 _ => self.normalized.percent_encoding_push((ch, u8_str), QUERY),
226 }
227 }
228
229 Ok(origin)
230 }
231
parse_fragment(&mut self, mut origin: UrlChars) -> Result<(), HttpError>232 fn parse_fragment(&mut self, mut origin: UrlChars) -> Result<(), HttpError> {
233 while let Some(utf8) = origin.next_u8() {
234 self.normalized.percent_encoding_push(utf8, FRAGMENT);
235 }
236 Ok(())
237 }
238 }
239
240 pub(crate) struct Normalized {
241 url: String,
242 }
243
244 impl Normalized {
from_size(size: usize) -> Self245 pub(crate) fn from_size(size: usize) -> Self {
246 Self {
247 url: String::with_capacity(size),
248 }
249 }
push(&mut self, ch: char)250 pub(crate) fn push(&mut self, ch: char) {
251 if !matches!(ch, '\t' | '\r' | '\n') {
252 self.url.push(ch);
253 }
254 }
255
percent_encoding_push(&mut self, u8_ch: Utf8Char, char_set: &[u8])256 pub(crate) fn percent_encoding_push(&mut self, u8_ch: Utf8Char, char_set: &[u8]) {
257 let (ch, u8_str) = u8_ch;
258 if !matches!(ch, '\t' | '\r' | '\n') {
259 self.percent_encoding_char(u8_str, char_set);
260 }
261 }
262
percent_encoding_char(&mut self, u8_str: &str, char_set: &[u8])263 pub(crate) fn percent_encoding_char(&mut self, u8_str: &str, char_set: &[u8]) {
264 let mut start = 0;
265 for (index, &byte) in u8_str.as_bytes().iter().enumerate() {
266 if should_percent_encoding(byte, char_set) {
267 if start < index {
268 let unencoded =
269 unsafe { str::from_utf8_unchecked(&u8_str.as_bytes()[start..index]) };
270 self.url.push_str(unencoded);
271 }
272 let encoded = percent_hex(byte);
273 self.url.push('%');
274 self.url.push_str(encoded);
275
276 start = index + 1;
277 }
278 }
279
280 let ch_len = u8_str.len();
281 if start < ch_len {
282 let unencoded = unsafe { str::from_utf8_unchecked(&u8_str.as_bytes()[start..ch_len]) };
283 self.url.push_str(unencoded);
284 }
285 }
286
url(self) -> String287 pub(crate) fn url(self) -> String {
288 self.url
289 }
290 }
291
292 #[derive(Clone)]
293 struct UrlChars<'a> {
294 remaining: Chars<'a>,
295 }
296
297 impl<'a> UrlChars<'a> {
next_u8(&mut self) -> Option<Utf8Char>298 pub(crate) fn next_u8(&mut self) -> Option<Utf8Char> {
299 let url_str = self.remaining.as_str();
300 self.remaining.next().map(|c| (c, &url_str[..c.len_utf8()]))
301 }
302
first_valid(&mut self) -> Option<char>303 pub(crate) fn first_valid(&mut self) -> Option<char> {
304 self.remaining
305 .by_ref()
306 .find(|&c| !matches!(c, '\t' | '\r' | '\n'))
307 }
308 }
309
310 impl<'a> Iterator for UrlChars<'a> {
311 type Item = char;
312
next(&mut self) -> Option<Self::Item>313 fn next(&mut self) -> Option<Self::Item> {
314 self.remaining.next()
315 }
316 }
317
should_percent_encoding(byte: u8, bytes: &[u8]) -> bool318 pub(crate) fn should_percent_encoding(byte: u8, bytes: &[u8]) -> bool {
319 !bytes.is_ascii() || byte < 0x20 || byte == 0x7f || byte >= 0x80 || bytes.contains(&byte)
320 }
321
percent_hex(byte: u8) -> &'static str322 pub(crate) fn percent_hex(byte: u8) -> &'static str {
323 static HEX_ASCII: &[u8; 512] = b"\
324 000102030405060708090A0B0C0D0E0F\
325 101112131415161718191A1B1C1D1E1F\
326 202122232425262728292A2B2C2D2E2F\
327 303132333435363738393A3B3C3D3E3F\
328 404142434445464748494A4B4C4D4E4F\
329 505152535455565758595A5B5C5D5E5F\
330 606162636465666768696A6B6C6D6E6F\
331 707172737475767778797A7B7C7D7E7F\
332 808182838485868788898A8B8C8D8E8F\
333 909192939495969798999A9B9C9D9E9F\
334 A0A1A2A3A4A5A6A7A8A9AAABACADAEAF\
335 B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF\
336 C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF\
337 D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF\
338 E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF\
339 F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF\
340 ";
341 let index = usize::from(byte) * 2;
342 unsafe { str::from_utf8_unchecked(&HEX_ASCII[index..index + 2]) }
343 }
344
345 #[cfg(test)]
346 mod ut_uri_percent_encoder {
347 use crate::request::uri::percent_encoding::PercentEncoder;
348 use crate::request::uri::{InvalidUri, Uri};
349
350 macro_rules! err_percent_encode {
351 ($url:expr, $err:expr) => {{
352 let encoded = PercentEncoder::parse($url).err();
353 assert_eq!(encoded, $err);
354 }};
355 }
356
357 macro_rules! success_percent_encode {
358 ($url:expr, $encoded:expr) => {{
359 let encoded = PercentEncoder::parse($url).unwrap();
360 assert_eq!(encoded, $encoded);
361 }};
362 }
363
364 /// UT test cases for `PercentEncoder::parse`.
365 ///
366 /// # Brief
367 /// 1. Creates PercentEncoder by calling PercentEncoder::new().
368 /// 2. parse an url that contains chinese.
369 /// 3. Checks if the test result is correct by assert_eq!().
370 #[test]
url_percent_encode()371 fn url_percent_encode() {
372 success_percent_encode!(
373 "https://测试名:测试密码@www.example.com/data/new-测试文件.txt?from=project-名称#fragment-百分比-encode",
374 "https://%E6%B5%8B%E8%AF%95%E5%90%8D:%E6%B5%8B%E8%AF%95%E5%AF%86%E7%A0%81@www.example.com/data/new-%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt?from=project-%E5%90%8D%E7%A7%B0#fragment-%E7%99%BE%E5%88%86%E6%AF%94-encode"
375 );
376
377 success_percent_encode!(
378 "https://@www.example.com/data/new-测试文件.txt?from=project-名称#fragment-百分比-encode",
379 "https://www.example.com/data/new-%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt?from=project-%E5%90%8D%E7%A7%B0#fragment-%E7%99%BE%E5%88%86%E6%AF%94-encode"
380 );
381
382 success_percent_encode!(
383 "https://www.example.com/data/new-测试文件.txt#fragment-百分比-encode",
384 "https://www.example.com/data/new-%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.txt#fragment-%E7%99%BE%E5%88%86%E6%AF%94-encode"
385 )
386 }
387
388 /// UT test cases for `PercentEncoder::parse`.
389 ///
390 /// # Brief
391 /// 1. Creates PercentEncoder by calling PercentEncoder::new().
392 /// 2. parse an url that is wrong.
393 /// 3. Checks if the test result is correct by assert_eq!().
394 #[test]
url_percent_encode_failure()395 fn url_percent_encode_failure() {
396 err_percent_encode!(
397 "htt ps://测试名:测试密码@www.example.com/data/new-测试文件.txt?from=project-名称#fragment-百分比-encode",
398 Some(InvalidUri::InvalidScheme.into())
399 );
400 err_percent_encode!("htt ps://", Some(InvalidUri::InvalidScheme.into()));
401 err_percent_encode!("https", Some(InvalidUri::InvalidScheme.into()));
402 err_percent_encode!(
403 "https:///www.example.com",
404 Some(InvalidUri::InvalidScheme.into())
405 );
406 err_percent_encode!("https://@/data", Some(InvalidUri::UriMissHost.into()))
407 }
408 }
409