1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 use crate::reader::Cacheable;
15 use crate::{
16     consts::*, deserializer::Deserializer, Array, Error, JsonValue, Number, Object, ParseError,
17 };
18 use core::convert::TryFrom;
19 #[cfg(feature = "c_adapter")]
20 pub use std::ffi::CString;
21 
22 macro_rules! unexpected_character {
23     ($deserializer: expr) => {{
24         let position = $deserializer.reader.position();
25         match read_error_char($deserializer) {
26             Ok(Some(ch)) => {
27                 Err(ParseError::UnexpectedCharacter(position.line(), position.column(), ch).into())
28             }
29             Ok(None) => Err(ParseError::InvalidUtf8Bytes(position.line()).into()),
30             Err(e) => Err(e),
31         }
32     }};
33 }
34 
35 macro_rules! unexpected_eoj {
36     ($deserializer: expr) => {
37         Err(ParseError::UnexpectedEndOfJson($deserializer.reader.position().line()).into())
38     };
39 }
40 
41 macro_rules! eat_whitespace_until_not {
42     ($deserializer: expr) => {{
43         loop {
44             match $deserializer.reader.peek().map_err(Error::new_reader)? {
45                 Some(ch) if WHITE_SPACE_SET.contains(&ch) => $deserializer.reader.discard(),
46                 x => break x,
47             }
48         }
49     }};
50 }
51 
52 macro_rules! eat_digits_until_not {
53     ($deserializer: expr) => {{
54         loop {
55             match $deserializer.reader.peek().map_err(Error::new_reader)? {
56                 Some(ch) if (ZERO..=NINE).contains(&ch) => $deserializer.reader.discard(),
57                 x => break x,
58             }
59         }
60     }};
61 }
62 
63 macro_rules! match_str {
64     ($deserializer: expr, $str: expr) => {{
65         for item in $str {
66             match $deserializer.reader.peek().map_err(Error::new_reader)? {
67                 Some(ch) if ch == *item => $deserializer.reader.discard(),
68                 Some(_) => return unexpected_character!($deserializer),
69                 None => return unexpected_eoj!($deserializer),
70             }
71         }
72     }};
73 }
74 
check_recursion<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<(), Error>75 pub(crate) fn check_recursion<R: Cacheable>(
76     deserializer: &mut Deserializer<R>,
77 ) -> Result<(), Error> {
78     if deserializer.recursion_depth > RECURSION_LIMIT {
79         Err(Error::ExceedRecursionLimit)
80     } else {
81         Ok(())
82     }
83 }
84 
85 #[inline]
start_parsing<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<JsonValue, Error>86 pub(crate) fn start_parsing<R: Cacheable>(
87     deserializer: &mut Deserializer<R>,
88 ) -> Result<JsonValue, Error> {
89     let value = parse_value(deserializer)?;
90 
91     // If the text is not finished, return TrailingBytes Error.
92     if eat_whitespace_until_not!(deserializer).is_some() {
93         return Err(ParseError::TrailingBytes(deserializer.reader.position().line()).into());
94     }
95     Ok(value)
96 }
97 
98 // Parses value.
parse_value<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>99 fn parse_value<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
100     match eat_whitespace_until_not!(deserializer) {
101         Some(ZERO..=NINE | MINUS) => Ok(JsonValue::Number(parse_number(deserializer)?)),
102         Some(LEFT_CURLY_BRACKET) => {
103             deserializer.reader.discard();
104             parse_object(deserializer)
105         }
106         Some(LEFT_SQUARE_BRACKET) => {
107             deserializer.reader.discard();
108             parse_array(deserializer)
109         }
110         Some(QUOTATION_MARK) => {
111             deserializer.reader.discard();
112             Ok(JsonValue::String(parse_string(deserializer)?))
113         }
114         Some(T_LOWER) => {
115             deserializer.reader.discard();
116             match_str!(deserializer, TRUE_LEFT_STR);
117             Ok(JsonValue::Boolean(true))
118         }
119         Some(F_LOWER) => {
120             deserializer.reader.discard();
121             match_str!(deserializer, FALSE_LEFT_STR);
122             Ok(JsonValue::Boolean(false))
123         }
124         Some(N_LOWER) => {
125             deserializer.reader.discard();
126             match_str!(deserializer, NULL_LEFT_STR);
127             Ok(JsonValue::Null)
128         }
129         Some(_) => unexpected_character!(deserializer),
130         None => unexpected_eoj!(deserializer),
131     }
132 }
133 
134 // Parses object
parse_object<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>135 fn parse_object<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
136     // Uses an internal state machine to determine the flow.
137     enum InnerState {
138         Start,      // State at the start of the match.
139         AfterComma, // Comma already exists.
140         NoComma,    // Comma didn't exist before
141     }
142 
143     deserializer.recursion_depth += 1;
144     check_recursion(deserializer)?;
145 
146     // Creates an Object to store key-value pairs.
147     let mut object = Object::new();
148     // The initial status is Start.
149     let mut state = InnerState::Start;
150 
151     loop {
152         match (state, eat_whitespace_until_not!(deserializer)) {
153             // If "}" is encountered in the initial or NoComma state, object is null.
154             (InnerState::Start | InnerState::NoComma, Some(RIGHT_CURLY_BRACKET)) => {
155                 deserializer.reader.discard();
156                 deserializer.recursion_depth -= 1;
157                 break;
158             }
159             // If "\" is encountered in the initial state or
160             // if "," is already present, matches key-value pairs.
161             (InnerState::Start | InnerState::AfterComma, Some(QUOTATION_MARK)) => {
162                 deserializer.reader.discard();
163                 let k = parse_key(deserializer)?;
164 
165                 // Matches ':'
166                 match eat_whitespace_until_not!(deserializer) {
167                     Some(COLON) => deserializer.reader.discard(),
168                     Some(_) => return unexpected_character!(deserializer),
169                     None => return unexpected_eoj!(deserializer),
170                 };
171 
172                 // Inserts into object.
173                 object.insert(k, parse_value(deserializer)?);
174 
175                 // Sets the state to NoComma.
176                 state = InnerState::NoComma;
177             }
178             // In the initial state, it is illegal to encounter any other character.
179             (InnerState::Start, Some(_)) => return unexpected_character!(deserializer),
180             // In the NoComma state, when "," is encountered, converts state to HaveComma.
181             (InnerState::NoComma, Some(COMMA)) => {
182                 deserializer.reader.discard();
183                 state = InnerState::AfterComma;
184             }
185             // In the NoComma state, it's illegal to encounter any other character.
186             (InnerState::NoComma, Some(_)) => return unexpected_character!(deserializer),
187             // In the HaveComma state, it's illegal to encounter any other character.
188             (InnerState::AfterComma, Some(_)) => return unexpected_character!(deserializer),
189             // In all cases, None is illegal.
190             (_, None) => return unexpected_eoj!(deserializer),
191         }
192     }
193     Ok(JsonValue::Object(object))
194 }
195 
196 // Parses string
197 #[cfg(not(feature = "c_adapter"))]
parse_string<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<String, Error>198 pub(crate) fn parse_string<R: Cacheable>(
199     deserializer: &mut Deserializer<R>,
200 ) -> Result<String, Error> {
201     let vec = parse_string_inner(deserializer)?;
202     // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
203     Ok(unsafe { String::from_utf8_unchecked(vec) })
204 }
205 
206 #[cfg(feature = "c_adapter")]
parse_string<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<CString, Error>207 pub(crate) fn parse_string<R: Cacheable>(
208     deserializer: &mut Deserializer<R>,
209 ) -> Result<CString, Error> {
210     let vec = parse_string_inner(deserializer)?;
211     // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
212     Ok(unsafe { CString::from_vec_unchecked(vec) })
213 }
214 
215 // Parses key
216 #[inline]
parse_key<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<String, Error>217 fn parse_key<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<String, Error> {
218     let vec = parse_string_inner(deserializer)?;
219     // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
220     Ok(unsafe { String::from_utf8_unchecked(vec) })
221 }
222 
parse_string_inner<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Vec<u8>, Error>223 pub(crate) fn parse_string_inner<R: Cacheable>(
224     deserializer: &mut Deserializer<R>,
225 ) -> Result<Vec<u8>, Error> {
226     // Used to store strings.
227     let mut vec = Vec::new();
228 
229     // Sets the starting position of the string.
230     deserializer.reader.start_caching();
231 
232     loop {
233         match deserializer.reader.peek().map_err(Error::new_reader)? {
234             Some(ch) => {
235                 // Improves character recognition speed (reduce the number of comparisons) by looking up tables.
236                 // If it is an ordinary character, skips it.
237                 if !ESCAPE[ch as usize] {
238                     deserializer.reader.discard();
239                     continue;
240                 }
241                 match ch {
242                     // When '"' is encountered, the string is added to vec.
243                     QUOTATION_MARK => {
244                         vec.extend_from_slice(deserializer.reader.cached_slice().unwrap());
245                         deserializer.reader.end_caching();
246                         deserializer.reader.discard();
247                         break;
248                     }
249                     // When '\\' is encountered, matches escape character.
250                     REVERSE_SOLIDUS => {
251                         vec.extend_from_slice(deserializer.reader.cached_slice().unwrap());
252                         deserializer.reader.discard();
253                         parse_escape_character(deserializer, &mut vec)?;
254                         deserializer.reader.start_caching();
255                     }
256 
257                     _ => {
258                         // Other control characters are not output.
259                         return unexpected_character!(deserializer);
260                     }
261                 }
262             }
263             None => return unexpected_eoj!(deserializer),
264         }
265     }
266     Ok(vec)
267 }
268 
269 // Parses escape characters.
parse_escape_character<R: Cacheable>( deserializer: &mut Deserializer<R>, vec: &mut Vec<u8>, ) -> Result<(), Error>270 fn parse_escape_character<R: Cacheable>(
271     deserializer: &mut Deserializer<R>,
272     vec: &mut Vec<u8>,
273 ) -> Result<(), Error> {
274     vec.push(
275         match deserializer.reader.peek().map_err(Error::new_reader)? {
276             Some(QUOTATION_MARK) => QUOTATION_MARK,
277             Some(REVERSE_SOLIDUS) => REVERSE_SOLIDUS,
278             Some(SOLIDUS) => SOLIDUS,
279             Some(BS) => BS_UNICODE as u8,
280             Some(FF) => FF_UNICODE as u8,
281             Some(LF) => LF_UNICODE as u8,
282             Some(CR) => CR_UNICODE as u8,
283             Some(HT) => HT_UNICODE as u8,
284             Some(UNICODE) => {
285                 deserializer.reader.discard();
286                 return parse_unicode(deserializer, vec);
287             }
288             Some(_) => return unexpected_character!(deserializer),
289             None => return unexpected_eoj!(deserializer),
290         },
291     );
292     deserializer.reader.discard();
293     Ok(())
294 }
295 
296 // Parses unicode
parse_unicode<R: Cacheable>( deserializer: &mut Deserializer<R>, vec: &mut Vec<u8>, ) -> Result<(), Error>297 fn parse_unicode<R: Cacheable>(
298     deserializer: &mut Deserializer<R>,
299     vec: &mut Vec<u8>,
300 ) -> Result<(), Error> {
301     // Reads a hexadecimal number.
302     #[inline]
303     fn get_next_digit<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<u16, Error> {
304         if let Some(ch) = deserializer.reader.peek().map_err(Error::new_reader)? {
305             let result = match ch {
306                 ZERO..=NINE => ch as u16 - ZERO as u16,
307                 A_LOWER..=F_LOWER => ch as u16 - A_LOWER as u16 + 10,
308                 A_UPPER..=F_UPPER => ch as u16 - A_UPPER as u16 + 10,
309                 _ => return unexpected_character!(deserializer),
310             };
311             deserializer.reader.discard();
312             return Ok(result);
313         }
314         unexpected_eoj!(deserializer)
315     }
316 
317     // Reads four hexadecimal digits consecutively.
318     #[inline]
319     fn get_next_four_digits<R: Cacheable>(
320         deserializer: &mut Deserializer<R>,
321     ) -> Result<u16, Error> {
322         Ok(get_next_digit(deserializer)? << 12
323             | get_next_digit(deserializer)? << 8
324             | get_next_digit(deserializer)? << 4
325             | get_next_digit(deserializer)?)
326     }
327 
328     // Unicode character logic: \uXXXX or \uXXXX\uXXXX
329     let unicode1 = get_next_four_digits(deserializer)?;
330     let unicode = match char::try_from(unicode1 as u32) {
331         Ok(code) => code,
332         Err(_) => {
333             match_str!(deserializer, UNICODE_START_STR);
334 
335             match core::char::decode_utf16(
336                 [unicode1, get_next_four_digits(deserializer)?]
337                     .iter()
338                     .copied(),
339             )
340             .next()
341             {
342                 Some(Ok(code)) => code,
343                 _ => return Err(Error::Utf8Transform),
344             }
345         }
346     };
347     vec.extend_from_slice(unicode.encode_utf8(&mut [0; 4]).as_bytes());
348     Ok(())
349 }
350 
351 // Matches number.
parse_number<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Number, Error>352 pub(crate) fn parse_number<R: Cacheable>(
353     deserializer: &mut Deserializer<R>,
354 ) -> Result<Number, Error> {
355     // Sets the starting position of the string.
356     deserializer.reader.start_caching();
357 
358     // `neg\dot\exp` determines which of `u64\i64\f64` will be used to represent the final number.
359     let mut neg = false;
360     let mut dot = false;
361     let mut exp = false;
362 
363     // Matches '-', JSON syntax does not match '+'
364     if let Some(MINUS) = deserializer.reader.peek().map_err(Error::new_reader)? {
365         deserializer.reader.discard();
366         neg = true;
367     }
368     // `next_ch` temporarily saves unmatched characters after peek.
369     // Used to reduce the number of repeated peeks.
370     let mut next_ch = match deserializer.reader.peek().map_err(Error::new_reader)? {
371         // The integer part cannot have a leading 0, so if it encounters a 0 here,
372         // it enters the value 0 state directly.
373         Some(ZERO) => {
374             deserializer.reader.discard();
375             // The reason to peek here is to compare with
376             // Some(ONE... =NINE) branches keep the same return value.
377             deserializer.reader.peek().map_err(Error::new_reader)?
378         }
379         Some(ONE..=NINE) => {
380             // Matches one digit character first. Ensure that there is at least one digit character.
381             deserializer.reader.discard();
382             // Matches as many numeric characters as possible.
383             eat_digits_until_not!(deserializer)
384         }
385         Some(_) => return unexpected_character!(deserializer),
386         None => return unexpected_eoj!(deserializer),
387     };
388 
389     // If there is a decimal point, matches fractional part.
390     if let Some(DECIMAL_POINT) = next_ch {
391         deserializer.reader.discard();
392         dot = true;
393 
394         // Matches a numeric character.
395         match deserializer.reader.peek().map_err(Error::new_reader)? {
396             Some(ZERO..=NINE) => deserializer.reader.discard(),
397             Some(_) => return unexpected_character!(deserializer),
398             None => return unexpected_eoj!(deserializer),
399         };
400         //Saves the extra characters for the next match.
401         next_ch = eat_digits_until_not!(deserializer)
402     }
403 
404     // If e is present, matches exponential part.
405     if let Some(E_LOWER | E_UPPER) = next_ch {
406         deserializer.reader.discard();
407         exp = true;
408         // Try to match the sign of the exponential part, which can be without the sign.
409         match deserializer.reader.peek().map_err(Error::new_reader)? {
410             Some(PLUS | MINUS) => deserializer.reader.discard(),
411             Some(_) => {}
412             None => return unexpected_eoj!(deserializer),
413         }
414         // Matches a numeric character.
415         match deserializer.reader.peek().map_err(Error::new_reader)? {
416             Some(ZERO..=NINE) => deserializer.reader.discard(),
417             Some(_) => return unexpected_character!(deserializer),
418             None => return unexpected_eoj!(deserializer),
419         };
420         // Matches the remaining numeric characters.
421         eat_digits_until_not!(deserializer);
422     }
423 
424     // The contents of u8 have been checked, so the unchecked method can be used here.
425     let str =
426         unsafe { core::str::from_utf8_unchecked(deserializer.reader.cached_slice().unwrap()) };
427     let number = match (neg, dot, exp) {
428         (false, false, false) => {
429             Number::Unsigned(str.parse::<u64>().map_err(|_| Error::ParseNumber)?)
430         }
431         (true, false, false) => Number::Signed(str.parse::<i64>().map_err(|_| Error::ParseNumber)?),
432         (_, _, _) => Number::Float(str.parse::<f64>().map_err(|_| Error::ParseNumber)?),
433     };
434 
435     deserializer.reader.end_caching();
436     Ok(number)
437 }
438 
parse_array<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>439 fn parse_array<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
440     enum InnerState {
441         Start,
442         AfterComma,
443         NoComma,
444     }
445 
446     deserializer.recursion_depth += 1;
447     check_recursion(deserializer)?;
448 
449     // Creates an Array to store value.
450     let mut array = Array::new();
451     // The initial status is Start.
452     let mut state = InnerState::Start;
453 
454     loop {
455         match (state, eat_whitespace_until_not!(deserializer)) {
456             // In the initial state, if "]" is encountered, meaning the array is empty.
457             (InnerState::Start, Some(RIGHT_SQUARE_BRACKET)) => break,
458             // If in the initial state or "," has appeared,
459             // matches key-value pairs when any character is encountered.
460             (InnerState::Start | InnerState::AfterComma, _) => {
461                 array.push(parse_value(deserializer)?);
462 
463                 // Here sets the state to NoComma.
464                 state = InnerState::NoComma;
465             }
466             // In NoComma state, the array ends when "]" is encountered.
467             (InnerState::NoComma, Some(RIGHT_SQUARE_BRACKET)) => break,
468             // In the NoComma state, when "," is encountered, converts to the HaveComma state.
469             (InnerState::NoComma, Some(COMMA)) => {
470                 deserializer.reader.discard();
471                 state = InnerState::AfterComma;
472             }
473             // In the NoComma state, it is illegal to encounter any other character.
474             (InnerState::NoComma, Some(_)) => return unexpected_character!(deserializer),
475             // In all cases, None is illegal.
476             (_, None) => return unexpected_eoj!(deserializer),
477         }
478     }
479     deserializer.reader.discard();
480     deserializer.recursion_depth -= 1;
481     Ok(JsonValue::Array(array))
482 }
483 
read_error_char<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Option<char>, Error>484 pub(crate) fn read_error_char<R: Cacheable>(
485     deserializer: &mut Deserializer<R>,
486 ) -> Result<Option<char>, Error> {
487     const CONT_MASK: u8 = 0b0011_1111;
488 
489     #[inline]
490     fn utf8_first_byte(byte: u8, width: u32) -> u32 {
491         (byte & (0x7F >> width)) as u32
492     }
493 
494     #[inline]
495     fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
496         (ch << 6) | (byte & CONT_MASK) as u32
497     }
498 
499     let x = match deserializer.reader.next().map_err(Error::new_reader)? {
500         Some(x) => x,
501         None => return Ok(None),
502     };
503 
504     let ch = if x < 128 {
505         x as u32
506     } else {
507         let init = utf8_first_byte(x, 2);
508 
509         let y = match deserializer.reader.next().map_err(Error::new_reader)? {
510             Some(y) => y,
511             None => return Ok(None),
512         };
513 
514         let mut ch = utf8_acc_cont_byte(init, y);
515 
516         if x >= 0xE0 {
517             let z = match deserializer.reader.next().map_err(Error::new_reader)? {
518                 Some(z) => z,
519                 None => return Ok(None),
520             };
521 
522             let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
523             ch = init << 12 | y_z;
524 
525             if x >= 0xF0 {
526                 let w = match deserializer.reader.next().map_err(Error::new_reader)? {
527                     Some(w) => w,
528                     None => return Ok(None),
529                 };
530                 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
531             }
532         }
533         ch
534     };
535     unsafe { Ok(Some(char::from_u32_unchecked(ch))) }
536 }
537 
538 #[cfg(test)]
539 mod ut_states {
540     use crate::reader::BytesReader;
541     use crate::states::*;
542     use std::io::{ErrorKind, Read};
543 
544     struct ErrorIo;
545 
546     impl Read for ErrorIo {
read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize>547         fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
548             Err(ErrorKind::AddrInUse.into())
549         }
550     }
551 
552     /// UT test for macro `eat_whitespace_until_not`.
553     ///
554     /// # Title
555     /// ut_macro_eat_whitespace_until_not
556     ///
557     /// # Brief
558     /// 1. Constructs various inputs.
559     /// 2. Uses macro `eat_whitespace_until_not`.
560     /// 3. Checks if the results are correct.
561     #[test]
ut_macro_eat_whitespace_until_not()562     fn ut_macro_eat_whitespace_until_not() {
563         fn test_func<R: BytesReader + Cacheable>(
564             deserializer: &mut Deserializer<R>,
565         ) -> Result<Option<u8>, Error> {
566             Ok(eat_whitespace_until_not!(deserializer))
567         }
568 
569         let mut deserializer = Deserializer::new_from_slice(b"      n");
570         assert_eq!(test_func(&mut deserializer).unwrap(), Some(b'n'));
571 
572         let mut deserializer = Deserializer::new_from_slice(b"      ");
573         assert_eq!(test_func(&mut deserializer).unwrap(), None);
574 
575         let mut deserializer = Deserializer::new_from_io(ErrorIo);
576         assert!(test_func(&mut deserializer).is_err());
577     }
578 
579     /// UT test for macro `eat_digits_until_not`.
580     ///
581     /// # Title
582     /// ut_macro_eat_digits_until_not
583     ///
584     /// # Brief
585     /// 1. Constructs various inputs.
586     /// 2. Uses macro `eat_digits_until_not`.
587     /// 3. Checks if the results are correct.
588     #[test]
ut_macro_eat_digits_until_not()589     fn ut_macro_eat_digits_until_not() {
590         fn test_func<R: BytesReader + Cacheable>(
591             deserializer: &mut Deserializer<R>,
592         ) -> Result<Option<u8>, Error> {
593             Ok(eat_digits_until_not!(deserializer))
594         }
595 
596         let mut deserializer = Deserializer::new_from_slice(b"1234n");
597         assert_eq!(test_func(&mut deserializer).unwrap(), Some(b'n'));
598 
599         let mut deserializer = Deserializer::new_from_slice(b"1234");
600         assert_eq!(test_func(&mut deserializer).unwrap(), None);
601 
602         let mut deserializer = Deserializer::new_from_io(ErrorIo);
603         assert!(test_func(&mut deserializer).is_err());
604     }
605 
606     /// UT test for macro `match_str`.
607     ///
608     /// # Title
609     /// ut_macro_match_str
610     ///
611     /// # Brief
612     /// 1. Constructs various inputs.
613     /// 2. Uses macro `match_str`.
614     /// 3. Checks if the results are correct.
615     #[test]
ut_macro_match_str()616     fn ut_macro_match_str() {
617         #[allow(clippy::unit_arg)]
618         fn test_func<R: Cacheable>(
619             deserializer: &mut Deserializer<R>,
620             target: &[u8],
621         ) -> Result<(), Error> {
622             Ok(match_str!(deserializer, target))
623         }
624 
625         let mut deserializer = Deserializer::new_from_slice(b"1234");
626         assert!(test_func(&mut deserializer, b"1234").is_ok());
627 
628         let mut deserializer = Deserializer::new_from_io(ErrorIo);
629         assert!(test_func(&mut deserializer, b"1234").is_err());
630     }
631 
632     /// UT test for `start_parsing`.
633     ///
634     /// # Title
635     /// ut_start_parsing
636     ///
637     /// # Brief
638     /// 1. Constructs various inputs.
639     /// 2. Calls `start_parsing`.
640     /// 3. Checks if the results are correct.
641     #[test]
ut_start_parsing()642     fn ut_start_parsing() {
643         let mut deserializer = Deserializer::new_from_slice(b"null");
644         assert_eq!(start_parsing(&mut deserializer).unwrap(), JsonValue::Null);
645 
646         let mut deserializer = Deserializer::new_from_slice(b"null      invalid");
647         assert!(start_parsing(&mut deserializer).is_err());
648     }
649 
650     /// UT test for `read_error_char`.
651     ///
652     /// # Title
653     /// ut_read_error_char
654     ///
655     /// # Brief
656     /// 1. Constructs various inputs.
657     /// 2. Calls `read_error_char`.
658     /// 3. Checks if the results are correct.
659     #[test]
ut_read_error_char()660     fn ut_read_error_char() {
661         let mut deserializer = Deserializer::new_from_slice("��".as_bytes());
662         assert_eq!(read_error_char(&mut deserializer).unwrap(), Some('��'));
663 
664         let mut deserializer = Deserializer::new_from_slice(&[]);
665         assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
666 
667         let mut deserializer = Deserializer::new_from_slice(&[0xf0]);
668         assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
669 
670         let mut deserializer = Deserializer::new_from_slice(&[0xf0, 0xa4]);
671         assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
672 
673         let mut deserializer = Deserializer::new_from_slice(&[0xf0, 0xa4, 0xad]);
674         assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
675     }
676 
677     /// UT test for `parse_value`.
678     ///
679     /// # Title
680     /// ut_parse_value
681     ///
682     /// # Brief
683     /// 1. Creates an instance of json.
684     /// 2. Calls the parsing function of State.
685     /// 3. Checks if the results are correct.
686     #[test]
ut_parse_value()687     fn ut_parse_value() {
688         let mut deserializer = Deserializer::new_from_slice(b"null");
689         assert_eq!(parse_value(&mut deserializer).unwrap(), JsonValue::Null);
690 
691         let mut deserializer = Deserializer::new_from_slice(b"true");
692         assert_eq!(
693             parse_value(&mut deserializer).unwrap(),
694             JsonValue::Boolean(true)
695         );
696 
697         let mut deserializer = Deserializer::new_from_slice(b"false");
698         assert_eq!(
699             parse_value(&mut deserializer).unwrap(),
700             JsonValue::Boolean(false)
701         );
702 
703         let mut deserializer = Deserializer::new_from_slice(b"123");
704         assert!(parse_value(&mut deserializer).is_ok());
705 
706         let mut deserializer = Deserializer::new_from_slice(b"\"abc\"");
707         assert!(parse_value(&mut deserializer).is_ok());
708 
709         let mut deserializer = Deserializer::new_from_slice(b"[1, 2, 3]");
710         assert!(parse_value(&mut deserializer).is_ok());
711 
712         let mut deserializer = Deserializer::new_from_slice(b"{\"key\":\"value\"}");
713         assert!(parse_value(&mut deserializer).is_ok());
714 
715         let mut deserializer = Deserializer::new_from_slice(b"\"abc\"");
716         assert!(parse_value(&mut deserializer).is_ok());
717     }
718 
719     /// UT test for `parse_string`.
720     ///
721     /// # Title
722     /// ut_parse_string
723     ///
724     /// # Brief
725     /// 1. Creates an instance of Reader.
726     /// 2. Calls the parsing function of State.
727     /// 3. Checks if the results are correct.
728     #[test]
ut_parse_string()729     fn ut_parse_string() {
730         // 1.Enter a valid key (or String) and return a string.
731         // 2.Enter an invalid key (or string) and return an Error message.
732 
733         #[cfg(feature = "c_adapter")]
734         use std::ffi::CString;
735 
736         // Ensure that the previous '"' has been read before entering parse_string.
737         // Empty string
738         let str = "\"";
739         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
740         #[cfg(not(feature = "c_adapter"))]
741         assert_eq!(parse_string(&mut deserializer).unwrap(), String::from(""));
742         #[cfg(feature = "c_adapter")]
743         assert_eq!(
744             parse_string(&mut deserializer).unwrap(),
745             CString::new("").unwrap()
746         );
747 
748         // General character
749         let str = "abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'\"";
750         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
751         #[cfg(not(feature = "c_adapter"))]
752         assert_eq!(
753             parse_string(&mut deserializer).unwrap(),
754             String::from("abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'"),
755         );
756         #[cfg(feature = "c_adapter")]
757         assert_eq!(
758             parse_string(&mut deserializer).unwrap(),
759             CString::new("abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'").unwrap(),
760         );
761 
762         // Escape character
763         let str = r#"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t""#;
764         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
765         #[cfg(not(feature = "c_adapter"))]
766         assert_eq!(
767             parse_string(&mut deserializer).unwrap(),
768             String::from(
769                 "/\\\"\u{CAFE}\u{BABE}\u{AB98}\u{FCDE}\u{bcda}\u{ef4A}\u{0008}\u{000c}\n\r\t"
770             ),
771         );
772         #[cfg(feature = "c_adapter")]
773         assert_eq!(
774             parse_string(&mut deserializer).unwrap(),
775             CString::new(
776                 "/\\\"\u{CAFE}\u{BABE}\u{AB98}\u{FCDE}\u{bcda}\u{ef4A}\u{0008}\u{000c}\n\r\t"
777             )
778             .unwrap(),
779         );
780 
781         let str = r#"\uD852\uDF62""#;
782         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
783         #[cfg(not(feature = "c_adapter"))]
784         assert_eq!(parse_string(&mut deserializer).unwrap(), String::from("��"),);
785         #[cfg(feature = "c_adapter")]
786         assert_eq!(
787             parse_string(&mut deserializer).unwrap(),
788             CString::new("��").unwrap(),
789         );
790 
791         // Error scenes
792         // 1.There are no trailing quotes to end a match (or encounter a terminator).
793         let str = "abc";
794         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
795         assert!(parse_string(&mut deserializer).is_err());
796 
797         // 2.Illegal escape character.
798         let str = r#"\g""#;
799         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
800         assert!(parse_string(&mut deserializer).is_err());
801 
802         // 3.A backslash is followed by a terminator.
803         let str = r#"\"#;
804         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
805         assert!(parse_string(&mut deserializer).is_err());
806 
807         // 4.Illegal unicode characters.
808         let str = r#"\uBEEF"#;
809         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
810         assert!(parse_string(&mut deserializer).is_err());
811 
812         let str = r#"\uZ000"#;
813         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
814         assert!(parse_string(&mut deserializer).is_err());
815 
816         let str = r#"\u"#;
817         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
818         assert!(parse_string(&mut deserializer).is_err());
819 
820         let str = r#"\uD852\uDB00""#;
821         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
822         assert!(parse_string(&mut deserializer).is_err());
823 
824         // 5.Control character.
825         let str = "\u{0}";
826         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
827         assert!(parse_string(&mut deserializer).is_err());
828     }
829 
830     /// UT test for `parse_number`.
831     ///
832     /// # Title
833     /// ut_parse_number
834     ///
835     /// # Brief
836     /// 1. Creates an instance of Reader.
837     /// 2. Calls the parsing function of State.
838     /// 3. Checks if the results are correct.
839     #[test]
ut_parse_number()840     fn ut_parse_number() {
841         // 1.Enters a value (legal) and return a numeric value.
842         // 2.Enters a value (illegal) and return the corresponding Error.
843         // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
844 
845         let str = r#"0"#;
846         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
847         assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
848 
849         let str = r#"-0"#;
850         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
851         assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
852 
853         let str = r#"0.123e+4"#;
854         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
855         assert_eq!(parse_number(&mut deserializer).unwrap(), 1230.into());
856 
857         // Error scenes.
858         // 1.No number exists.
859         let str = r#""#;
860         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
861         assert!(parse_number(&mut deserializer).is_err());
862 
863         // 2.Non-numeric characters exist.
864         let str = r#"a123"#;
865         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
866         assert!(parse_number(&mut deserializer).is_err());
867 
868         // 3.There is no integer part.
869         let str = r#".123"#;
870         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
871         assert!(parse_number(&mut deserializer).is_err());
872 
873         // 4.Positive numbers appear with a plus sign.
874         let str = r#"+1234"#;
875         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
876         assert!(parse_number(&mut deserializer).is_err());
877 
878         // 5.Integer part in front of a number of 0.
879         let str = r#"00001234"#;
880         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
881         // In this case, only 0 will be read.
882         // The subsequent matching will cause an error when encounter a number.
883         assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
884 
885         // 6.The integer part contains other characters.
886         let str = r#"12a34"#;
887         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
888         // In this case, only 12 will be read.
889         // The subsequent matching will cause an error when encounter 'a'.
890         assert_eq!(parse_number(&mut deserializer).unwrap(), 12.into());
891 
892         // 7.The decimal part contains other characters.
893         let str = r#"12.a34"#;
894         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
895         assert!(parse_number(&mut deserializer).is_err());
896 
897         let str = r#"12."#;
898         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
899         assert!(parse_number(&mut deserializer).is_err());
900 
901         let str = r#"12.3a4"#;
902         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
903         // In this case, only 12.3 will be read.
904         // The subsequent matching will cause an error when encounter 'a'.
905         assert_eq!(parse_number(&mut deserializer).unwrap(), (12.3).into());
906 
907         // 8.The exponential part contains other characters.
908         let str = r#"12.34e+2a3"#;
909         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
910         // In this case, only 12.34e+2 will be read.
911         // The subsequent matching will cause an error when encounter 'a'.
912         assert_eq!(parse_number(&mut deserializer).unwrap(), (1234).into());
913 
914         let str = r#"12.34e"#;
915         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
916         assert!(parse_number(&mut deserializer).is_err());
917 
918         let str = r#"12.34ea"#;
919         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
920         assert!(parse_number(&mut deserializer).is_err());
921 
922         let str = r#"12.34e+"#;
923         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
924         assert!(parse_number(&mut deserializer).is_err());
925     }
926 
927     /// UT test for `ut_parse_array`.
928     ///
929     /// # Title
930     /// ut_parse_array
931     ///
932     /// # Brief
933     /// 1. Creates an instance of Reader.
934     /// 2. Calls the parsing function of State.
935     /// 3. Checks if the results are correct.
936     #[test]
ut_parse_array()937     fn ut_parse_array() {
938         // 1.Enters a value (legal) and return a numeric value.
939         // 2.Enters a value (illegal) and return the corresponding Error.
940         // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
941 
942         // Before entering the parse_array function, needs to match '['.
943         let str = r#"]"#;
944         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
945         assert_eq!(parse_array(&mut deserializer).unwrap(), Array::new().into());
946 
947         let str = r#"              ]"#;
948         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
949         assert_eq!(parse_array(&mut deserializer).unwrap(), Array::new().into());
950 
951         let str = r#"1, 2, 3]"#;
952         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
953         let array = array!(1u8, 2u8, 3u8);
954         assert_eq!(parse_array(&mut deserializer).unwrap(), array.into());
955 
956         let str = "\
957             1,\
958             2,\
959             3\
960         ]";
961         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
962         let array = array!(1u8, 2u8, 3u8);
963         assert_eq!(parse_array(&mut deserializer).unwrap(), array.into());
964 
965         // Error scenes.
966         // 1.Encounter terminator too early.
967         let str = "";
968         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
969         assert!(parse_array(&mut deserializer).is_err());
970 
971         let str = "1  ";
972         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
973         assert!(parse_array(&mut deserializer).is_err());
974 
975         // 2.',' is not used between values.
976         let str = "1 2";
977         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
978         assert!(parse_array(&mut deserializer).is_err());
979 
980         // 3.The extra ',' at the end.
981         let str = "1, 2,]";
982         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
983         assert!(parse_array(&mut deserializer).is_err());
984     }
985 
986     /// UT test for `parse_object`.
987     ///
988     /// # Title
989     /// parse_object
990     ///
991     /// # Brief
992     /// 1. Creates an instance of Reader.
993     /// 2. Calls the parsing function of State.
994     /// 3. Checks if the results are correct.
995     #[test]
ut_parse_object()996     fn ut_parse_object() {
997         // 1.Enters a value (legal) and return a numeric value.
998         // 2.Enters a value (illegal) and return the corresponding Error.
999         // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
1000 
1001         // Before entering parse_object, needs to match '{'.
1002         let str = "}";
1003         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1004         assert_eq!(
1005             parse_object(&mut deserializer).unwrap(),
1006             Object::new().into()
1007         );
1008 
1009         let str = "\"key1\": \"value\", \"key2\": \"value\"}";
1010         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1011         let object = object!("key1" => "value"; "key2" => "value");
1012         assert_eq!(parse_object(&mut deserializer).unwrap(), object.into());
1013 
1014         let str = "\
1015             \"key1\": \"value\",\
1016             \"key2\": \"value\"\
1017         }";
1018         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1019         let object = object!("key1" => "value"; "key2" => "value");
1020         assert_eq!(parse_object(&mut deserializer).unwrap(), object.into());
1021 
1022         // Error scenes.
1023         // 1.Encounter terminator too early.
1024         let str = "";
1025         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1026         assert!(parse_object(&mut deserializer).is_err());
1027 
1028         // 2.Encounter ',' too early.
1029         let str = ",";
1030         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1031         assert!(parse_object(&mut deserializer).is_err());
1032 
1033         // 3.The extra ',' at the end.
1034         let str = "\"key1\": \"value\", \"key2\": \"value\",}";
1035         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1036         assert!(parse_object(&mut deserializer).is_err());
1037 
1038         // 4.There is no ':'.
1039         let str = "\"key1\"t";
1040         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1041         assert!(parse_object(&mut deserializer).is_err());
1042 
1043         let str = "\"key1\"";
1044         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1045         assert!(parse_object(&mut deserializer).is_err());
1046 
1047         // 5.Extra character.
1048         let str = "\"key1\": 1      t";
1049         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1050         assert!(parse_object(&mut deserializer).is_err());
1051 
1052         let str = "\"key1\": 1, t";
1053         let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1054         assert!(parse_object(&mut deserializer).is_err());
1055     }
1056     /// UT test for recursion limit.
1057     ///
1058     /// # Title
1059     /// ut_recursion_limit
1060     ///
1061     /// # Brief
1062     /// 1. Creates an instance exceeds recursion limit.
1063     /// 2. Calls the parsing function of State.
1064     /// 3. Checks if the results are correct.
1065     #[test]
ut_recursion_limit()1066     fn ut_recursion_limit() {
1067         // Examples of array.
1068         // This example has 128 layers of recursion(The upper recursion limit).
1069         let text = r#"
1070         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1071         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1072         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1073         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1074         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1075         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1076         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1077         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1078         "#;
1079         let mut deserializer = Deserializer::new_from_slice(text.as_ref());
1080         assert!(start_parsing(&mut deserializer).is_ok());
1081 
1082         // This example has 129 layers of recursion(The upper recursion limit is 128).
1083         let text = r#"
1084         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1085         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1086         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1087         [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1088         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1089         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1090         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1091         ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1092         "#;
1093         let mut deserializer = Deserializer::new_from_slice(text.as_ref());
1094         assert!(start_parsing(&mut deserializer).is_err());
1095 
1096         // Examples of object.
1097         let mut str = String::from(r#"{"key":"value"}"#);
1098         // 128 layers
1099         for _i in 0..RECURSION_LIMIT - 1 {
1100             str = str.replace(r#""value""#, r#"{"key":"value"}"#);
1101         }
1102         let text = str.as_bytes();
1103         let mut deserializer = Deserializer::new_from_slice(text);
1104         assert!(start_parsing(&mut deserializer).is_ok());
1105 
1106         let mut str = String::from(r#"{"key":"value"}"#);
1107         // 129 layers
1108         for _i in 0..RECURSION_LIMIT {
1109             str = str.replace(r#""value""#, r#"{"key":"value"}"#);
1110         }
1111         let text = str.as_bytes();
1112         let mut deserializer = Deserializer::new_from_slice(text);
1113         assert!(start_parsing(&mut deserializer).is_err());
1114     }
1115 }
1116