1 //
2 // Copyright (C) 2018 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "tokenizer.h"
18 
19 #include <string>
20 #include <vector>
21 
22 #include <gtest/gtest.h>
23 
24 namespace android {
25 namespace init {
26 
27 namespace {
28 
RunTest(const std::string & data,const std::vector<std::vector<std::string>> & expected_tokens)29 void RunTest(const std::string& data, const std::vector<std::vector<std::string>>& expected_tokens) {
30     auto data_copy = std::string{data};
31     data_copy.push_back('\n');
32     data_copy.push_back('\0');
33 
34     parse_state state;
35     state.line = 0;
36     state.ptr = data_copy.data();
37     state.nexttoken = 0;
38 
39     std::vector<std::string> current_line;
40     std::vector<std::vector<std::string>> tokens;
41 
42     while (true) {
43         switch (next_token(&state)) {
44             case T_EOF:
45                 EXPECT_EQ(expected_tokens, tokens) << data;
46                 return;
47             case T_NEWLINE:
48                 tokens.emplace_back(std::move(current_line));
49                 current_line.clear();
50                 break;
51             case T_TEXT:
52                 current_line.emplace_back(state.text);
53                 break;
54         }
55     }
56 }
57 
58 }  // namespace
59 
TEST(tokenizer,null)60 TEST(tokenizer, null) {
61     RunTest("", {{}});
62 }
63 
TEST(tokenizer,simple_oneline)64 TEST(tokenizer, simple_oneline) {
65     RunTest("one two\tthree\rfour", {{"one", "two", "three", "four"}});
66 }
67 
TEST(tokenizer,simple_multiline)68 TEST(tokenizer, simple_multiline) {
69     RunTest("1 2 3\n4 5 6\n7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}});
70 }
71 
TEST(tokenizer,preceding_space)72 TEST(tokenizer, preceding_space) {
73     // Preceding spaces are ignored.
74     RunTest("    1 2 3\n\t\t\t\t4 5 6\n\r\r\r\r7 8 9",
75             {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}});
76 }
77 
TEST(tokenizer,comments)78 TEST(tokenizer, comments) {
79     // Entirely commented lines still produce a T_NEWLINE token for tracking line count.
80     RunTest("1 2 3\n#4 5 6\n7 8 9", {{"1", "2", "3"}, {}, {"7", "8", "9"}});
81 
82     RunTest("#1 2 3\n4 5 6\n7 8 9", {{}, {"4", "5", "6"}, {"7", "8", "9"}});
83 
84     RunTest("1 2 3\n4 5 6\n#7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {}});
85 
86     RunTest("1 2 #3\n4 #5 6\n#7 8 9", {{"1", "2"}, {"4"}, {}});
87 }
88 
TEST(tokenizer,control_chars)89 TEST(tokenizer, control_chars) {
90     // Literal \n, \r, \t, and \\ produce the control characters \n, \r, \t, and \\ respectively.
91     // Literal \? produces ? for all other character '?'
92 
93     RunTest(R"(1 token\ntoken 2)", {{"1", "token\ntoken", "2"}});
94     RunTest(R"(1 token\rtoken 2)", {{"1", "token\rtoken", "2"}});
95     RunTest(R"(1 token\ttoken 2)", {{"1", "token\ttoken", "2"}});
96     RunTest(R"(1 token\\token 2)", {{"1", "token\\token", "2"}});
97     RunTest(R"(1 token\btoken 2)", {{"1", "tokenbtoken", "2"}});
98 
99     RunTest(R"(1 token\n 2)", {{"1", "token\n", "2"}});
100     RunTest(R"(1 token\r 2)", {{"1", "token\r", "2"}});
101     RunTest(R"(1 token\t 2)", {{"1", "token\t", "2"}});
102     RunTest(R"(1 token\\ 2)", {{"1", "token\\", "2"}});
103     RunTest(R"(1 token\b 2)", {{"1", "tokenb", "2"}});
104 
105     RunTest(R"(1 \ntoken 2)", {{"1", "\ntoken", "2"}});
106     RunTest(R"(1 \rtoken 2)", {{"1", "\rtoken", "2"}});
107     RunTest(R"(1 \ttoken 2)", {{"1", "\ttoken", "2"}});
108     RunTest(R"(1 \\token 2)", {{"1", "\\token", "2"}});
109     RunTest(R"(1 \btoken 2)", {{"1", "btoken", "2"}});
110 
111     RunTest(R"(1 \n 2)", {{"1", "\n", "2"}});
112     RunTest(R"(1 \r 2)", {{"1", "\r", "2"}});
113     RunTest(R"(1 \t 2)", {{"1", "\t", "2"}});
114     RunTest(R"(1 \\ 2)", {{"1", "\\", "2"}});
115     RunTest(R"(1 \b 2)", {{"1", "b", "2"}});
116 }
117 
TEST(tokenizer,cr_lf)118 TEST(tokenizer, cr_lf) {
119     // \ before \n, \r, or \r\n is interpreted as a line continuation
120     // Extra whitespace on the next line is eaten, except \r unlike in the above tests.
121 
122     RunTest("lf\\\ncont", {{"lfcont"}});
123     RunTest("lf\\\n    \t\t\t\tcont", {{"lfcont"}});
124 
125     RunTest("crlf\\\r\ncont", {{"crlfcont"}});
126     RunTest("crlf\\\r\n    \t\t\t\tcont", {{"crlfcont"}});
127 
128     RunTest("cr\\\rcont", {{"crcont"}});
129 
130     RunTest("lfspace \\\ncont", {{"lfspace", "cont"}});
131     RunTest("lfspace \\\n    \t\t\t\tcont", {{"lfspace", "cont"}});
132 
133     RunTest("crlfspace \\\r\ncont", {{"crlfspace", "cont"}});
134     RunTest("crlfspace \\\r\n    \t\t\t\tcont", {{"crlfspace", "cont"}});
135 
136     RunTest("crspace \\\rcont", {{"crspace", "cont"}});
137 }
138 
TEST(tokenizer,quoted)139 TEST(tokenizer, quoted) {
140     RunTest("\"quoted simple string\"", {{"quoted simple string"}});
141 
142     // Unterminated quotes just return T_EOF without any T_NEWLINE.
143     RunTest("\"unterminated quoted string", {});
144 
145     RunTest("\"1 2 3\"\n \"unterminated quoted string", {{"1 2 3"}});
146 
147     // Escaping quotes is not allowed and are treated as an unterminated quoted string.
148     RunTest("\"quoted escaped quote\\\"\"", {});
149     RunTest("\"quoted escaped\\\" quote\"", {});
150     RunTest("\"\\\"quoted escaped quote\"", {});
151 
152     RunTest("\"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n\"",
153             {{"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n"}});
154 
155     RunTest("\"quoted simple string\" \"second quoted string\"",
156             {{"quoted simple string", "second quoted string"}});
157 
158     RunTest("\"# comment quoted string\"", {{"# comment quoted string"}});
159 
160     RunTest("\"Adjacent \"\"quoted strings\"", {{"Adjacent quoted strings"}});
161 }
162 
163 }  // namespace init
164 }  // namespace android
165