1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 use crate::reader::Cacheable;
15 use crate::{
16 consts::*, deserializer::Deserializer, Array, Error, JsonValue, Number, Object, ParseError,
17 };
18 use core::convert::TryFrom;
19 #[cfg(feature = "c_adapter")]
20 pub use std::ffi::CString;
21
22 macro_rules! unexpected_character {
23 ($deserializer: expr) => {{
24 let position = $deserializer.reader.position();
25 match read_error_char($deserializer) {
26 Ok(Some(ch)) => {
27 Err(ParseError::UnexpectedCharacter(position.line(), position.column(), ch).into())
28 }
29 Ok(None) => Err(ParseError::InvalidUtf8Bytes(position.line()).into()),
30 Err(e) => Err(e),
31 }
32 }};
33 }
34
35 macro_rules! unexpected_eoj {
36 ($deserializer: expr) => {
37 Err(ParseError::UnexpectedEndOfJson($deserializer.reader.position().line()).into())
38 };
39 }
40
41 macro_rules! eat_whitespace_until_not {
42 ($deserializer: expr) => {{
43 loop {
44 match $deserializer.reader.peek().map_err(Error::new_reader)? {
45 Some(ch) if WHITE_SPACE_SET.contains(&ch) => $deserializer.reader.discard(),
46 x => break x,
47 }
48 }
49 }};
50 }
51
52 macro_rules! eat_digits_until_not {
53 ($deserializer: expr) => {{
54 loop {
55 match $deserializer.reader.peek().map_err(Error::new_reader)? {
56 Some(ch) if (ZERO..=NINE).contains(&ch) => $deserializer.reader.discard(),
57 x => break x,
58 }
59 }
60 }};
61 }
62
63 macro_rules! match_str {
64 ($deserializer: expr, $str: expr) => {{
65 for item in $str {
66 match $deserializer.reader.peek().map_err(Error::new_reader)? {
67 Some(ch) if ch == *item => $deserializer.reader.discard(),
68 Some(_) => return unexpected_character!($deserializer),
69 None => return unexpected_eoj!($deserializer),
70 }
71 }
72 }};
73 }
74
check_recursion<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<(), Error>75 pub(crate) fn check_recursion<R: Cacheable>(
76 deserializer: &mut Deserializer<R>,
77 ) -> Result<(), Error> {
78 if deserializer.recursion_depth > RECURSION_LIMIT {
79 Err(Error::ExceedRecursionLimit)
80 } else {
81 Ok(())
82 }
83 }
84
85 #[inline]
start_parsing<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<JsonValue, Error>86 pub(crate) fn start_parsing<R: Cacheable>(
87 deserializer: &mut Deserializer<R>,
88 ) -> Result<JsonValue, Error> {
89 let value = parse_value(deserializer)?;
90
91 // If the text is not finished, return TrailingBytes Error.
92 if eat_whitespace_until_not!(deserializer).is_some() {
93 return Err(ParseError::TrailingBytes(deserializer.reader.position().line()).into());
94 }
95 Ok(value)
96 }
97
98 // Parses value.
parse_value<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>99 fn parse_value<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
100 match eat_whitespace_until_not!(deserializer) {
101 Some(ZERO..=NINE | MINUS) => Ok(JsonValue::Number(parse_number(deserializer)?)),
102 Some(LEFT_CURLY_BRACKET) => {
103 deserializer.reader.discard();
104 parse_object(deserializer)
105 }
106 Some(LEFT_SQUARE_BRACKET) => {
107 deserializer.reader.discard();
108 parse_array(deserializer)
109 }
110 Some(QUOTATION_MARK) => {
111 deserializer.reader.discard();
112 Ok(JsonValue::String(parse_string(deserializer)?))
113 }
114 Some(T_LOWER) => {
115 deserializer.reader.discard();
116 match_str!(deserializer, TRUE_LEFT_STR);
117 Ok(JsonValue::Boolean(true))
118 }
119 Some(F_LOWER) => {
120 deserializer.reader.discard();
121 match_str!(deserializer, FALSE_LEFT_STR);
122 Ok(JsonValue::Boolean(false))
123 }
124 Some(N_LOWER) => {
125 deserializer.reader.discard();
126 match_str!(deserializer, NULL_LEFT_STR);
127 Ok(JsonValue::Null)
128 }
129 Some(_) => unexpected_character!(deserializer),
130 None => unexpected_eoj!(deserializer),
131 }
132 }
133
134 // Parses object
parse_object<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>135 fn parse_object<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
136 // Uses an internal state machine to determine the flow.
137 enum InnerState {
138 Start, // State at the start of the match.
139 AfterComma, // Comma already exists.
140 NoComma, // Comma didn't exist before
141 }
142
143 deserializer.recursion_depth += 1;
144 check_recursion(deserializer)?;
145
146 // Creates an Object to store key-value pairs.
147 let mut object = Object::new();
148 // The initial status is Start.
149 let mut state = InnerState::Start;
150
151 loop {
152 match (state, eat_whitespace_until_not!(deserializer)) {
153 // If "}" is encountered in the initial or NoComma state, object is null.
154 (InnerState::Start | InnerState::NoComma, Some(RIGHT_CURLY_BRACKET)) => {
155 deserializer.reader.discard();
156 deserializer.recursion_depth -= 1;
157 break;
158 }
159 // If "\" is encountered in the initial state or
160 // if "," is already present, matches key-value pairs.
161 (InnerState::Start | InnerState::AfterComma, Some(QUOTATION_MARK)) => {
162 deserializer.reader.discard();
163 let k = parse_key(deserializer)?;
164
165 // Matches ':'
166 match eat_whitespace_until_not!(deserializer) {
167 Some(COLON) => deserializer.reader.discard(),
168 Some(_) => return unexpected_character!(deserializer),
169 None => return unexpected_eoj!(deserializer),
170 };
171
172 // Inserts into object.
173 object.insert(k, parse_value(deserializer)?);
174
175 // Sets the state to NoComma.
176 state = InnerState::NoComma;
177 }
178 // In the initial state, it is illegal to encounter any other character.
179 (InnerState::Start, Some(_)) => return unexpected_character!(deserializer),
180 // In the NoComma state, when "," is encountered, converts state to HaveComma.
181 (InnerState::NoComma, Some(COMMA)) => {
182 deserializer.reader.discard();
183 state = InnerState::AfterComma;
184 }
185 // In the NoComma state, it's illegal to encounter any other character.
186 (InnerState::NoComma, Some(_)) => return unexpected_character!(deserializer),
187 // In the HaveComma state, it's illegal to encounter any other character.
188 (InnerState::AfterComma, Some(_)) => return unexpected_character!(deserializer),
189 // In all cases, None is illegal.
190 (_, None) => return unexpected_eoj!(deserializer),
191 }
192 }
193 Ok(JsonValue::Object(object))
194 }
195
196 // Parses string
197 #[cfg(not(feature = "c_adapter"))]
parse_string<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<String, Error>198 pub(crate) fn parse_string<R: Cacheable>(
199 deserializer: &mut Deserializer<R>,
200 ) -> Result<String, Error> {
201 let vec = parse_string_inner(deserializer)?;
202 // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
203 Ok(unsafe { String::from_utf8_unchecked(vec) })
204 }
205
206 #[cfg(feature = "c_adapter")]
parse_string<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<CString, Error>207 pub(crate) fn parse_string<R: Cacheable>(
208 deserializer: &mut Deserializer<R>,
209 ) -> Result<CString, Error> {
210 let vec = parse_string_inner(deserializer)?;
211 // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
212 Ok(unsafe { CString::from_vec_unchecked(vec) })
213 }
214
215 // Parses key
216 #[inline]
parse_key<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<String, Error>217 fn parse_key<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<String, Error> {
218 let vec = parse_string_inner(deserializer)?;
219 // Since the vec contents are all checked upon matching, the unchecked method is used directly here.
220 Ok(unsafe { String::from_utf8_unchecked(vec) })
221 }
222
parse_string_inner<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Vec<u8>, Error>223 pub(crate) fn parse_string_inner<R: Cacheable>(
224 deserializer: &mut Deserializer<R>,
225 ) -> Result<Vec<u8>, Error> {
226 // Used to store strings.
227 let mut vec = Vec::new();
228
229 // Sets the starting position of the string.
230 deserializer.reader.start_caching();
231
232 loop {
233 match deserializer.reader.peek().map_err(Error::new_reader)? {
234 Some(ch) => {
235 // Improves character recognition speed (reduce the number of comparisons) by looking up tables.
236 // If it is an ordinary character, skips it.
237 if !ESCAPE[ch as usize] {
238 deserializer.reader.discard();
239 continue;
240 }
241 match ch {
242 // When '"' is encountered, the string is added to vec.
243 QUOTATION_MARK => {
244 vec.extend_from_slice(deserializer.reader.cached_slice().unwrap());
245 deserializer.reader.end_caching();
246 deserializer.reader.discard();
247 break;
248 }
249 // When '\\' is encountered, matches escape character.
250 REVERSE_SOLIDUS => {
251 vec.extend_from_slice(deserializer.reader.cached_slice().unwrap());
252 deserializer.reader.discard();
253 parse_escape_character(deserializer, &mut vec)?;
254 deserializer.reader.start_caching();
255 }
256
257 _ => {
258 // Other control characters are not output.
259 return unexpected_character!(deserializer);
260 }
261 }
262 }
263 None => return unexpected_eoj!(deserializer),
264 }
265 }
266 Ok(vec)
267 }
268
269 // Parses escape characters.
parse_escape_character<R: Cacheable>( deserializer: &mut Deserializer<R>, vec: &mut Vec<u8>, ) -> Result<(), Error>270 fn parse_escape_character<R: Cacheable>(
271 deserializer: &mut Deserializer<R>,
272 vec: &mut Vec<u8>,
273 ) -> Result<(), Error> {
274 vec.push(
275 match deserializer.reader.peek().map_err(Error::new_reader)? {
276 Some(QUOTATION_MARK) => QUOTATION_MARK,
277 Some(REVERSE_SOLIDUS) => REVERSE_SOLIDUS,
278 Some(SOLIDUS) => SOLIDUS,
279 Some(BS) => BS_UNICODE as u8,
280 Some(FF) => FF_UNICODE as u8,
281 Some(LF) => LF_UNICODE as u8,
282 Some(CR) => CR_UNICODE as u8,
283 Some(HT) => HT_UNICODE as u8,
284 Some(UNICODE) => {
285 deserializer.reader.discard();
286 return parse_unicode(deserializer, vec);
287 }
288 Some(_) => return unexpected_character!(deserializer),
289 None => return unexpected_eoj!(deserializer),
290 },
291 );
292 deserializer.reader.discard();
293 Ok(())
294 }
295
296 // Parses unicode
parse_unicode<R: Cacheable>( deserializer: &mut Deserializer<R>, vec: &mut Vec<u8>, ) -> Result<(), Error>297 fn parse_unicode<R: Cacheable>(
298 deserializer: &mut Deserializer<R>,
299 vec: &mut Vec<u8>,
300 ) -> Result<(), Error> {
301 // Reads a hexadecimal number.
302 #[inline]
303 fn get_next_digit<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<u16, Error> {
304 if let Some(ch) = deserializer.reader.peek().map_err(Error::new_reader)? {
305 let result = match ch {
306 ZERO..=NINE => ch as u16 - ZERO as u16,
307 A_LOWER..=F_LOWER => ch as u16 - A_LOWER as u16 + 10,
308 A_UPPER..=F_UPPER => ch as u16 - A_UPPER as u16 + 10,
309 _ => return unexpected_character!(deserializer),
310 };
311 deserializer.reader.discard();
312 return Ok(result);
313 }
314 unexpected_eoj!(deserializer)
315 }
316
317 // Reads four hexadecimal digits consecutively.
318 #[inline]
319 fn get_next_four_digits<R: Cacheable>(
320 deserializer: &mut Deserializer<R>,
321 ) -> Result<u16, Error> {
322 Ok(get_next_digit(deserializer)? << 12
323 | get_next_digit(deserializer)? << 8
324 | get_next_digit(deserializer)? << 4
325 | get_next_digit(deserializer)?)
326 }
327
328 // Unicode character logic: \uXXXX or \uXXXX\uXXXX
329 let unicode1 = get_next_four_digits(deserializer)?;
330 let unicode = match char::try_from(unicode1 as u32) {
331 Ok(code) => code,
332 Err(_) => {
333 match_str!(deserializer, UNICODE_START_STR);
334
335 match core::char::decode_utf16(
336 [unicode1, get_next_four_digits(deserializer)?]
337 .iter()
338 .copied(),
339 )
340 .next()
341 {
342 Some(Ok(code)) => code,
343 _ => return Err(Error::Utf8Transform),
344 }
345 }
346 };
347 vec.extend_from_slice(unicode.encode_utf8(&mut [0; 4]).as_bytes());
348 Ok(())
349 }
350
351 // Matches number.
parse_number<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Number, Error>352 pub(crate) fn parse_number<R: Cacheable>(
353 deserializer: &mut Deserializer<R>,
354 ) -> Result<Number, Error> {
355 // Sets the starting position of the string.
356 deserializer.reader.start_caching();
357
358 // `neg\dot\exp` determines which of `u64\i64\f64` will be used to represent the final number.
359 let mut neg = false;
360 let mut dot = false;
361 let mut exp = false;
362
363 // Matches '-', JSON syntax does not match '+'
364 if let Some(MINUS) = deserializer.reader.peek().map_err(Error::new_reader)? {
365 deserializer.reader.discard();
366 neg = true;
367 }
368 // `next_ch` temporarily saves unmatched characters after peek.
369 // Used to reduce the number of repeated peeks.
370 let mut next_ch = match deserializer.reader.peek().map_err(Error::new_reader)? {
371 // The integer part cannot have a leading 0, so if it encounters a 0 here,
372 // it enters the value 0 state directly.
373 Some(ZERO) => {
374 deserializer.reader.discard();
375 // The reason to peek here is to compare with
376 // Some(ONE... =NINE) branches keep the same return value.
377 deserializer.reader.peek().map_err(Error::new_reader)?
378 }
379 Some(ONE..=NINE) => {
380 // Matches one digit character first. Ensure that there is at least one digit character.
381 deserializer.reader.discard();
382 // Matches as many numeric characters as possible.
383 eat_digits_until_not!(deserializer)
384 }
385 Some(_) => return unexpected_character!(deserializer),
386 None => return unexpected_eoj!(deserializer),
387 };
388
389 // If there is a decimal point, matches fractional part.
390 if let Some(DECIMAL_POINT) = next_ch {
391 deserializer.reader.discard();
392 dot = true;
393
394 // Matches a numeric character.
395 match deserializer.reader.peek().map_err(Error::new_reader)? {
396 Some(ZERO..=NINE) => deserializer.reader.discard(),
397 Some(_) => return unexpected_character!(deserializer),
398 None => return unexpected_eoj!(deserializer),
399 };
400 //Saves the extra characters for the next match.
401 next_ch = eat_digits_until_not!(deserializer)
402 }
403
404 // If e is present, matches exponential part.
405 if let Some(E_LOWER | E_UPPER) = next_ch {
406 deserializer.reader.discard();
407 exp = true;
408 // Try to match the sign of the exponential part, which can be without the sign.
409 match deserializer.reader.peek().map_err(Error::new_reader)? {
410 Some(PLUS | MINUS) => deserializer.reader.discard(),
411 Some(_) => {}
412 None => return unexpected_eoj!(deserializer),
413 }
414 // Matches a numeric character.
415 match deserializer.reader.peek().map_err(Error::new_reader)? {
416 Some(ZERO..=NINE) => deserializer.reader.discard(),
417 Some(_) => return unexpected_character!(deserializer),
418 None => return unexpected_eoj!(deserializer),
419 };
420 // Matches the remaining numeric characters.
421 eat_digits_until_not!(deserializer);
422 }
423
424 // The contents of u8 have been checked, so the unchecked method can be used here.
425 let str =
426 unsafe { core::str::from_utf8_unchecked(deserializer.reader.cached_slice().unwrap()) };
427 let number = match (neg, dot, exp) {
428 (false, false, false) => {
429 Number::Unsigned(str.parse::<u64>().map_err(|_| Error::ParseNumber)?)
430 }
431 (true, false, false) => Number::Signed(str.parse::<i64>().map_err(|_| Error::ParseNumber)?),
432 (_, _, _) => Number::Float(str.parse::<f64>().map_err(|_| Error::ParseNumber)?),
433 };
434
435 deserializer.reader.end_caching();
436 Ok(number)
437 }
438
parse_array<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error>439 fn parse_array<R: Cacheable>(deserializer: &mut Deserializer<R>) -> Result<JsonValue, Error> {
440 enum InnerState {
441 Start,
442 AfterComma,
443 NoComma,
444 }
445
446 deserializer.recursion_depth += 1;
447 check_recursion(deserializer)?;
448
449 // Creates an Array to store value.
450 let mut array = Array::new();
451 // The initial status is Start.
452 let mut state = InnerState::Start;
453
454 loop {
455 match (state, eat_whitespace_until_not!(deserializer)) {
456 // In the initial state, if "]" is encountered, meaning the array is empty.
457 (InnerState::Start, Some(RIGHT_SQUARE_BRACKET)) => break,
458 // If in the initial state or "," has appeared,
459 // matches key-value pairs when any character is encountered.
460 (InnerState::Start | InnerState::AfterComma, _) => {
461 array.push(parse_value(deserializer)?);
462
463 // Here sets the state to NoComma.
464 state = InnerState::NoComma;
465 }
466 // In NoComma state, the array ends when "]" is encountered.
467 (InnerState::NoComma, Some(RIGHT_SQUARE_BRACKET)) => break,
468 // In the NoComma state, when "," is encountered, converts to the HaveComma state.
469 (InnerState::NoComma, Some(COMMA)) => {
470 deserializer.reader.discard();
471 state = InnerState::AfterComma;
472 }
473 // In the NoComma state, it is illegal to encounter any other character.
474 (InnerState::NoComma, Some(_)) => return unexpected_character!(deserializer),
475 // In all cases, None is illegal.
476 (_, None) => return unexpected_eoj!(deserializer),
477 }
478 }
479 deserializer.reader.discard();
480 deserializer.recursion_depth -= 1;
481 Ok(JsonValue::Array(array))
482 }
483
read_error_char<R: Cacheable>( deserializer: &mut Deserializer<R>, ) -> Result<Option<char>, Error>484 pub(crate) fn read_error_char<R: Cacheable>(
485 deserializer: &mut Deserializer<R>,
486 ) -> Result<Option<char>, Error> {
487 const CONT_MASK: u8 = 0b0011_1111;
488
489 #[inline]
490 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
491 (byte & (0x7F >> width)) as u32
492 }
493
494 #[inline]
495 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
496 (ch << 6) | (byte & CONT_MASK) as u32
497 }
498
499 let x = match deserializer.reader.next().map_err(Error::new_reader)? {
500 Some(x) => x,
501 None => return Ok(None),
502 };
503
504 let ch = if x < 128 {
505 x as u32
506 } else {
507 let init = utf8_first_byte(x, 2);
508
509 let y = match deserializer.reader.next().map_err(Error::new_reader)? {
510 Some(y) => y,
511 None => return Ok(None),
512 };
513
514 let mut ch = utf8_acc_cont_byte(init, y);
515
516 if x >= 0xE0 {
517 let z = match deserializer.reader.next().map_err(Error::new_reader)? {
518 Some(z) => z,
519 None => return Ok(None),
520 };
521
522 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
523 ch = init << 12 | y_z;
524
525 if x >= 0xF0 {
526 let w = match deserializer.reader.next().map_err(Error::new_reader)? {
527 Some(w) => w,
528 None => return Ok(None),
529 };
530 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
531 }
532 }
533 ch
534 };
535 unsafe { Ok(Some(char::from_u32_unchecked(ch))) }
536 }
537
538 #[cfg(test)]
539 mod ut_states {
540 use crate::reader::BytesReader;
541 use crate::states::*;
542 use std::io::{ErrorKind, Read};
543
544 struct ErrorIo;
545
546 impl Read for ErrorIo {
read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize>547 fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
548 Err(ErrorKind::AddrInUse.into())
549 }
550 }
551
552 /// UT test for macro `eat_whitespace_until_not`.
553 ///
554 /// # Title
555 /// ut_macro_eat_whitespace_until_not
556 ///
557 /// # Brief
558 /// 1. Constructs various inputs.
559 /// 2. Uses macro `eat_whitespace_until_not`.
560 /// 3. Checks if the results are correct.
561 #[test]
ut_macro_eat_whitespace_until_not()562 fn ut_macro_eat_whitespace_until_not() {
563 fn test_func<R: BytesReader + Cacheable>(
564 deserializer: &mut Deserializer<R>,
565 ) -> Result<Option<u8>, Error> {
566 Ok(eat_whitespace_until_not!(deserializer))
567 }
568
569 let mut deserializer = Deserializer::new_from_slice(b" n");
570 assert_eq!(test_func(&mut deserializer).unwrap(), Some(b'n'));
571
572 let mut deserializer = Deserializer::new_from_slice(b" ");
573 assert_eq!(test_func(&mut deserializer).unwrap(), None);
574
575 let mut deserializer = Deserializer::new_from_io(ErrorIo);
576 assert!(test_func(&mut deserializer).is_err());
577 }
578
579 /// UT test for macro `eat_digits_until_not`.
580 ///
581 /// # Title
582 /// ut_macro_eat_digits_until_not
583 ///
584 /// # Brief
585 /// 1. Constructs various inputs.
586 /// 2. Uses macro `eat_digits_until_not`.
587 /// 3. Checks if the results are correct.
588 #[test]
ut_macro_eat_digits_until_not()589 fn ut_macro_eat_digits_until_not() {
590 fn test_func<R: BytesReader + Cacheable>(
591 deserializer: &mut Deserializer<R>,
592 ) -> Result<Option<u8>, Error> {
593 Ok(eat_digits_until_not!(deserializer))
594 }
595
596 let mut deserializer = Deserializer::new_from_slice(b"1234n");
597 assert_eq!(test_func(&mut deserializer).unwrap(), Some(b'n'));
598
599 let mut deserializer = Deserializer::new_from_slice(b"1234");
600 assert_eq!(test_func(&mut deserializer).unwrap(), None);
601
602 let mut deserializer = Deserializer::new_from_io(ErrorIo);
603 assert!(test_func(&mut deserializer).is_err());
604 }
605
606 /// UT test for macro `match_str`.
607 ///
608 /// # Title
609 /// ut_macro_match_str
610 ///
611 /// # Brief
612 /// 1. Constructs various inputs.
613 /// 2. Uses macro `match_str`.
614 /// 3. Checks if the results are correct.
615 #[test]
ut_macro_match_str()616 fn ut_macro_match_str() {
617 #[allow(clippy::unit_arg)]
618 fn test_func<R: Cacheable>(
619 deserializer: &mut Deserializer<R>,
620 target: &[u8],
621 ) -> Result<(), Error> {
622 Ok(match_str!(deserializer, target))
623 }
624
625 let mut deserializer = Deserializer::new_from_slice(b"1234");
626 assert!(test_func(&mut deserializer, b"1234").is_ok());
627
628 let mut deserializer = Deserializer::new_from_io(ErrorIo);
629 assert!(test_func(&mut deserializer, b"1234").is_err());
630 }
631
632 /// UT test for `start_parsing`.
633 ///
634 /// # Title
635 /// ut_start_parsing
636 ///
637 /// # Brief
638 /// 1. Constructs various inputs.
639 /// 2. Calls `start_parsing`.
640 /// 3. Checks if the results are correct.
641 #[test]
ut_start_parsing()642 fn ut_start_parsing() {
643 let mut deserializer = Deserializer::new_from_slice(b"null");
644 assert_eq!(start_parsing(&mut deserializer).unwrap(), JsonValue::Null);
645
646 let mut deserializer = Deserializer::new_from_slice(b"null invalid");
647 assert!(start_parsing(&mut deserializer).is_err());
648 }
649
650 /// UT test for `read_error_char`.
651 ///
652 /// # Title
653 /// ut_read_error_char
654 ///
655 /// # Brief
656 /// 1. Constructs various inputs.
657 /// 2. Calls `read_error_char`.
658 /// 3. Checks if the results are correct.
659 #[test]
ut_read_error_char()660 fn ut_read_error_char() {
661 let mut deserializer = Deserializer::new_from_slice("".as_bytes());
662 assert_eq!(read_error_char(&mut deserializer).unwrap(), Some(''));
663
664 let mut deserializer = Deserializer::new_from_slice(&[]);
665 assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
666
667 let mut deserializer = Deserializer::new_from_slice(&[0xf0]);
668 assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
669
670 let mut deserializer = Deserializer::new_from_slice(&[0xf0, 0xa4]);
671 assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
672
673 let mut deserializer = Deserializer::new_from_slice(&[0xf0, 0xa4, 0xad]);
674 assert_eq!(read_error_char(&mut deserializer).unwrap(), None);
675 }
676
677 /// UT test for `parse_value`.
678 ///
679 /// # Title
680 /// ut_parse_value
681 ///
682 /// # Brief
683 /// 1. Creates an instance of json.
684 /// 2. Calls the parsing function of State.
685 /// 3. Checks if the results are correct.
686 #[test]
ut_parse_value()687 fn ut_parse_value() {
688 let mut deserializer = Deserializer::new_from_slice(b"null");
689 assert_eq!(parse_value(&mut deserializer).unwrap(), JsonValue::Null);
690
691 let mut deserializer = Deserializer::new_from_slice(b"true");
692 assert_eq!(
693 parse_value(&mut deserializer).unwrap(),
694 JsonValue::Boolean(true)
695 );
696
697 let mut deserializer = Deserializer::new_from_slice(b"false");
698 assert_eq!(
699 parse_value(&mut deserializer).unwrap(),
700 JsonValue::Boolean(false)
701 );
702
703 let mut deserializer = Deserializer::new_from_slice(b"123");
704 assert!(parse_value(&mut deserializer).is_ok());
705
706 let mut deserializer = Deserializer::new_from_slice(b"\"abc\"");
707 assert!(parse_value(&mut deserializer).is_ok());
708
709 let mut deserializer = Deserializer::new_from_slice(b"[1, 2, 3]");
710 assert!(parse_value(&mut deserializer).is_ok());
711
712 let mut deserializer = Deserializer::new_from_slice(b"{\"key\":\"value\"}");
713 assert!(parse_value(&mut deserializer).is_ok());
714
715 let mut deserializer = Deserializer::new_from_slice(b"\"abc\"");
716 assert!(parse_value(&mut deserializer).is_ok());
717 }
718
719 /// UT test for `parse_string`.
720 ///
721 /// # Title
722 /// ut_parse_string
723 ///
724 /// # Brief
725 /// 1. Creates an instance of Reader.
726 /// 2. Calls the parsing function of State.
727 /// 3. Checks if the results are correct.
728 #[test]
ut_parse_string()729 fn ut_parse_string() {
730 // 1.Enter a valid key (or String) and return a string.
731 // 2.Enter an invalid key (or string) and return an Error message.
732
733 #[cfg(feature = "c_adapter")]
734 use std::ffi::CString;
735
736 // Ensure that the previous '"' has been read before entering parse_string.
737 // Empty string
738 let str = "\"";
739 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
740 #[cfg(not(feature = "c_adapter"))]
741 assert_eq!(parse_string(&mut deserializer).unwrap(), String::from(""));
742 #[cfg(feature = "c_adapter")]
743 assert_eq!(
744 parse_string(&mut deserializer).unwrap(),
745 CString::new("").unwrap()
746 );
747
748 // General character
749 let str = "abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'\"";
750 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
751 #[cfg(not(feature = "c_adapter"))]
752 assert_eq!(
753 parse_string(&mut deserializer).unwrap(),
754 String::from("abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'"),
755 );
756 #[cfg(feature = "c_adapter")]
757 assert_eq!(
758 parse_string(&mut deserializer).unwrap(),
759 CString::new("abcdefghijklmnopqrstuvwxyz1234567890-=~!@#$%^&*()_+[]{}|<>?:;'").unwrap(),
760 );
761
762 // Escape character
763 let str = r#"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t""#;
764 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
765 #[cfg(not(feature = "c_adapter"))]
766 assert_eq!(
767 parse_string(&mut deserializer).unwrap(),
768 String::from(
769 "/\\\"\u{CAFE}\u{BABE}\u{AB98}\u{FCDE}\u{bcda}\u{ef4A}\u{0008}\u{000c}\n\r\t"
770 ),
771 );
772 #[cfg(feature = "c_adapter")]
773 assert_eq!(
774 parse_string(&mut deserializer).unwrap(),
775 CString::new(
776 "/\\\"\u{CAFE}\u{BABE}\u{AB98}\u{FCDE}\u{bcda}\u{ef4A}\u{0008}\u{000c}\n\r\t"
777 )
778 .unwrap(),
779 );
780
781 let str = r#"\uD852\uDF62""#;
782 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
783 #[cfg(not(feature = "c_adapter"))]
784 assert_eq!(parse_string(&mut deserializer).unwrap(), String::from(""),);
785 #[cfg(feature = "c_adapter")]
786 assert_eq!(
787 parse_string(&mut deserializer).unwrap(),
788 CString::new("").unwrap(),
789 );
790
791 // Error scenes
792 // 1.There are no trailing quotes to end a match (or encounter a terminator).
793 let str = "abc";
794 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
795 assert!(parse_string(&mut deserializer).is_err());
796
797 // 2.Illegal escape character.
798 let str = r#"\g""#;
799 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
800 assert!(parse_string(&mut deserializer).is_err());
801
802 // 3.A backslash is followed by a terminator.
803 let str = r#"\"#;
804 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
805 assert!(parse_string(&mut deserializer).is_err());
806
807 // 4.Illegal unicode characters.
808 let str = r#"\uBEEF"#;
809 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
810 assert!(parse_string(&mut deserializer).is_err());
811
812 let str = r#"\uZ000"#;
813 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
814 assert!(parse_string(&mut deserializer).is_err());
815
816 let str = r#"\u"#;
817 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
818 assert!(parse_string(&mut deserializer).is_err());
819
820 let str = r#"\uD852\uDB00""#;
821 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
822 assert!(parse_string(&mut deserializer).is_err());
823
824 // 5.Control character.
825 let str = "\u{0}";
826 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
827 assert!(parse_string(&mut deserializer).is_err());
828 }
829
830 /// UT test for `parse_number`.
831 ///
832 /// # Title
833 /// ut_parse_number
834 ///
835 /// # Brief
836 /// 1. Creates an instance of Reader.
837 /// 2. Calls the parsing function of State.
838 /// 3. Checks if the results are correct.
839 #[test]
ut_parse_number()840 fn ut_parse_number() {
841 // 1.Enters a value (legal) and return a numeric value.
842 // 2.Enters a value (illegal) and return the corresponding Error.
843 // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
844
845 let str = r#"0"#;
846 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
847 assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
848
849 let str = r#"-0"#;
850 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
851 assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
852
853 let str = r#"0.123e+4"#;
854 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
855 assert_eq!(parse_number(&mut deserializer).unwrap(), 1230.into());
856
857 // Error scenes.
858 // 1.No number exists.
859 let str = r#""#;
860 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
861 assert!(parse_number(&mut deserializer).is_err());
862
863 // 2.Non-numeric characters exist.
864 let str = r#"a123"#;
865 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
866 assert!(parse_number(&mut deserializer).is_err());
867
868 // 3.There is no integer part.
869 let str = r#".123"#;
870 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
871 assert!(parse_number(&mut deserializer).is_err());
872
873 // 4.Positive numbers appear with a plus sign.
874 let str = r#"+1234"#;
875 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
876 assert!(parse_number(&mut deserializer).is_err());
877
878 // 5.Integer part in front of a number of 0.
879 let str = r#"00001234"#;
880 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
881 // In this case, only 0 will be read.
882 // The subsequent matching will cause an error when encounter a number.
883 assert_eq!(parse_number(&mut deserializer).unwrap(), 0.into());
884
885 // 6.The integer part contains other characters.
886 let str = r#"12a34"#;
887 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
888 // In this case, only 12 will be read.
889 // The subsequent matching will cause an error when encounter 'a'.
890 assert_eq!(parse_number(&mut deserializer).unwrap(), 12.into());
891
892 // 7.The decimal part contains other characters.
893 let str = r#"12.a34"#;
894 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
895 assert!(parse_number(&mut deserializer).is_err());
896
897 let str = r#"12."#;
898 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
899 assert!(parse_number(&mut deserializer).is_err());
900
901 let str = r#"12.3a4"#;
902 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
903 // In this case, only 12.3 will be read.
904 // The subsequent matching will cause an error when encounter 'a'.
905 assert_eq!(parse_number(&mut deserializer).unwrap(), (12.3).into());
906
907 // 8.The exponential part contains other characters.
908 let str = r#"12.34e+2a3"#;
909 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
910 // In this case, only 12.34e+2 will be read.
911 // The subsequent matching will cause an error when encounter 'a'.
912 assert_eq!(parse_number(&mut deserializer).unwrap(), (1234).into());
913
914 let str = r#"12.34e"#;
915 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
916 assert!(parse_number(&mut deserializer).is_err());
917
918 let str = r#"12.34ea"#;
919 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
920 assert!(parse_number(&mut deserializer).is_err());
921
922 let str = r#"12.34e+"#;
923 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
924 assert!(parse_number(&mut deserializer).is_err());
925 }
926
927 /// UT test for `ut_parse_array`.
928 ///
929 /// # Title
930 /// ut_parse_array
931 ///
932 /// # Brief
933 /// 1. Creates an instance of Reader.
934 /// 2. Calls the parsing function of State.
935 /// 3. Checks if the results are correct.
936 #[test]
ut_parse_array()937 fn ut_parse_array() {
938 // 1.Enters a value (legal) and return a numeric value.
939 // 2.Enters a value (illegal) and return the corresponding Error.
940 // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
941
942 // Before entering the parse_array function, needs to match '['.
943 let str = r#"]"#;
944 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
945 assert_eq!(parse_array(&mut deserializer).unwrap(), Array::new().into());
946
947 let str = r#" ]"#;
948 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
949 assert_eq!(parse_array(&mut deserializer).unwrap(), Array::new().into());
950
951 let str = r#"1, 2, 3]"#;
952 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
953 let array = array!(1u8, 2u8, 3u8);
954 assert_eq!(parse_array(&mut deserializer).unwrap(), array.into());
955
956 let str = "\
957 1,\
958 2,\
959 3\
960 ]";
961 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
962 let array = array!(1u8, 2u8, 3u8);
963 assert_eq!(parse_array(&mut deserializer).unwrap(), array.into());
964
965 // Error scenes.
966 // 1.Encounter terminator too early.
967 let str = "";
968 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
969 assert!(parse_array(&mut deserializer).is_err());
970
971 let str = "1 ";
972 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
973 assert!(parse_array(&mut deserializer).is_err());
974
975 // 2.',' is not used between values.
976 let str = "1 2";
977 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
978 assert!(parse_array(&mut deserializer).is_err());
979
980 // 3.The extra ',' at the end.
981 let str = "1, 2,]";
982 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
983 assert!(parse_array(&mut deserializer).is_err());
984 }
985
986 /// UT test for `parse_object`.
987 ///
988 /// # Title
989 /// parse_object
990 ///
991 /// # Brief
992 /// 1. Creates an instance of Reader.
993 /// 2. Calls the parsing function of State.
994 /// 3. Checks if the results are correct.
995 #[test]
ut_parse_object()996 fn ut_parse_object() {
997 // 1.Enters a value (legal) and return a numeric value.
998 // 2.Enters a value (illegal) and return the corresponding Error.
999 // 3.Enters a value (text terminated prematurely, illegal) and return the corresponding Error.
1000
1001 // Before entering parse_object, needs to match '{'.
1002 let str = "}";
1003 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1004 assert_eq!(
1005 parse_object(&mut deserializer).unwrap(),
1006 Object::new().into()
1007 );
1008
1009 let str = "\"key1\": \"value\", \"key2\": \"value\"}";
1010 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1011 let object = object!("key1" => "value"; "key2" => "value");
1012 assert_eq!(parse_object(&mut deserializer).unwrap(), object.into());
1013
1014 let str = "\
1015 \"key1\": \"value\",\
1016 \"key2\": \"value\"\
1017 }";
1018 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1019 let object = object!("key1" => "value"; "key2" => "value");
1020 assert_eq!(parse_object(&mut deserializer).unwrap(), object.into());
1021
1022 // Error scenes.
1023 // 1.Encounter terminator too early.
1024 let str = "";
1025 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1026 assert!(parse_object(&mut deserializer).is_err());
1027
1028 // 2.Encounter ',' too early.
1029 let str = ",";
1030 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1031 assert!(parse_object(&mut deserializer).is_err());
1032
1033 // 3.The extra ',' at the end.
1034 let str = "\"key1\": \"value\", \"key2\": \"value\",}";
1035 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1036 assert!(parse_object(&mut deserializer).is_err());
1037
1038 // 4.There is no ':'.
1039 let str = "\"key1\"t";
1040 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1041 assert!(parse_object(&mut deserializer).is_err());
1042
1043 let str = "\"key1\"";
1044 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1045 assert!(parse_object(&mut deserializer).is_err());
1046
1047 // 5.Extra character.
1048 let str = "\"key1\": 1 t";
1049 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1050 assert!(parse_object(&mut deserializer).is_err());
1051
1052 let str = "\"key1\": 1, t";
1053 let mut deserializer = Deserializer::new_from_slice(str.as_bytes());
1054 assert!(parse_object(&mut deserializer).is_err());
1055 }
1056 /// UT test for recursion limit.
1057 ///
1058 /// # Title
1059 /// ut_recursion_limit
1060 ///
1061 /// # Brief
1062 /// 1. Creates an instance exceeds recursion limit.
1063 /// 2. Calls the parsing function of State.
1064 /// 3. Checks if the results are correct.
1065 #[test]
ut_recursion_limit()1066 fn ut_recursion_limit() {
1067 // Examples of array.
1068 // This example has 128 layers of recursion(The upper recursion limit).
1069 let text = r#"
1070 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1071 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1072 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1073 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1074 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1075 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1076 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1077 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1078 "#;
1079 let mut deserializer = Deserializer::new_from_slice(text.as_ref());
1080 assert!(start_parsing(&mut deserializer).is_ok());
1081
1082 // This example has 129 layers of recursion(The upper recursion limit is 128).
1083 let text = r#"
1084 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1085 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1086 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1087 [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1088 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1089 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1090 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1091 ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
1092 "#;
1093 let mut deserializer = Deserializer::new_from_slice(text.as_ref());
1094 assert!(start_parsing(&mut deserializer).is_err());
1095
1096 // Examples of object.
1097 let mut str = String::from(r#"{"key":"value"}"#);
1098 // 128 layers
1099 for _i in 0..RECURSION_LIMIT - 1 {
1100 str = str.replace(r#""value""#, r#"{"key":"value"}"#);
1101 }
1102 let text = str.as_bytes();
1103 let mut deserializer = Deserializer::new_from_slice(text);
1104 assert!(start_parsing(&mut deserializer).is_ok());
1105
1106 let mut str = String::from(r#"{"key":"value"}"#);
1107 // 129 layers
1108 for _i in 0..RECURSION_LIMIT {
1109 str = str.replace(r#""value""#, r#"{"key":"value"}"#);
1110 }
1111 let text = str.as_bytes();
1112 let mut deserializer = Deserializer::new_from_slice(text);
1113 assert!(start_parsing(&mut deserializer).is_err());
1114 }
1115 }
1116