1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "parser/lexer.h"
17
18 #include <utility>
19 #include <cstdlib>
20 #include <limits>
21 #include "util/string_builder.h"
22
23 namespace OHOS {
24 namespace Idl {
Lexer()25 Lexer::Lexer()
26 {
27 InitializeKeywords();
28 }
29
~Lexer()30 Lexer::~Lexer()
31 {
32 if (currentFile_ != nullptr) {
33 currentFile_->Close();
34 }
35 }
36
InitializeKeywords()37 void Lexer::InitializeKeywords()
38 {
39 keywords_[String("boolean")] = Token::BOOLEAN;
40 keywords_[String("byte")] = Token::BYTE;
41 keywords_[String("char")] = Token::CHAR;
42 keywords_[String("double")] = Token::DOUBLE;
43 keywords_[String("float")] = Token::FLOAT;
44 keywords_[String("in")] = Token::IN;
45 keywords_[String("inout")] = Token::INOUT;
46 keywords_[String("int")] = Token::INTEGER;
47 keywords_[String("interface")] = Token::INTERFACE;
48 keywords_[String("List")] = Token::LIST;
49 keywords_[String("long")] = Token::LONG;
50 keywords_[String("Map")] = Token::MAP;
51 keywords_[String("oneway")] = Token::ONEWAY;
52 keywords_[String("out")] = Token::OUT;
53 keywords_[String("sequenceable")] = Token::SEQUENCEABLE;
54 keywords_[String("short")] = Token::SHORT;
55 keywords_[String("String")] = Token::STRING;
56 keywords_[String("cacheable")] = Token::CACHEABLE;
57 }
58
OpenSourceFile(const String & filePath)59 bool Lexer::OpenSourceFile(const String& filePath)
60 {
61 currentFile_ = std::make_shared<File>(filePath, File::READ);
62 if (!currentFile_->IsValid()) {
63 return false;
64 }
65
66 return true;
67 }
68
GetToken(bool skipComment)69 Token Lexer::GetToken(bool skipComment)
70 {
71 if (!havePeek_) {
72 currentToken_ = ReadToken(skipComment);
73 }
74 havePeek_ = false;
75 return currentToken_;
76 }
77
PeekToken(bool skipComment)78 Token Lexer::PeekToken(bool skipComment)
79 {
80 if (!havePeek_) {
81 currentToken_ = ReadToken(skipComment);
82 havePeek_ = true;
83 }
84 return currentToken_;
85 }
86
ReadToken(bool skipComment)87 Token Lexer::ReadToken(bool skipComment)
88 {
89 while (!currentFile_->IsEof()) {
90 char c = currentFile_->GetChar();
91 tokenLineNo_ = currentFile_->GetCharLineNumber();
92 tokenColumnNo_ = currentFile_->GetCharColumnNumber();
93 if (IsSpace(c)) {
94 continue;
95 } else if (IsAlphabet(c) || c == '_') {
96 return ReadIdentifier(c);
97 }
98 switch (c) {
99 case '<':
100 case '>':
101 case '{':
102 case '}':
103 case '[':
104 case ']':
105 case ',':
106 case '(':
107 case ')':
108 case '.':
109 case ';':
110 currentToken_ = token_map_[c];
111 return currentToken_;
112 case '/':
113 if (ReadTokenPeek(skipComment, c)) {
114 return currentToken_;
115 } else {
116 continue;
117 }
118 default:
119 currentToken_ = Token::UNKNOWN;
120 return currentToken_;
121 }
122 }
123 currentToken_ = Token::END_OF_FILE;
124 return currentToken_;
125 }
126
ReadTokenPeek(bool skipComment,char letter)127 bool Lexer::ReadTokenPeek(bool skipComment, char letter)
128 {
129 if (currentFile_->PeekChar() == '/') {
130 ReadLineComment(letter);
131 if (!skipComment) {
132 return true;
133 }
134 return false;
135 } else if (currentFile_->PeekChar() == '*') {
136 ReadBlockComment(letter);
137 if (!skipComment) {
138 return true;
139 }
140 return false;
141 }
142 currentToken_ = Token::UNKNOWN;
143 return true;
144 }
145
ReadIdentifier(char c)146 Token Lexer::ReadIdentifier(char c)
147 {
148 StringBuilder sb;
149
150 sb.Append(c);
151 while (!currentFile_->IsEof()) {
152 c = currentFile_->PeekChar();
153 if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') {
154 c = currentFile_->GetChar();
155 sb.Append(c);
156 continue;
157 }
158 if (IsSpace(c)) {
159 currentFile_->GetChar();
160 }
161 break;
162 }
163 String key = sb.ToString();
164 auto it = keywords_.find(key);
165 if (it == keywords_.end()) {
166 identifier_ = key;
167 currentToken_ = Token::IDENTIFIER;
168 } else {
169 currentToken_ = it->second;
170 }
171 return currentToken_;
172 }
173
ReadLineComment(char c)174 Token Lexer::ReadLineComment(char c)
175 {
176 StringBuilder sb;
177
178 sb.Append(c);
179 while (!currentFile_->IsEof()) {
180 c = currentFile_->GetChar();
181 if (c == '\n') {
182 break;
183 }
184 sb.Append(c);
185 }
186 comment_ = sb.ToString();
187 currentToken_ = Token::COMMENT_LINE;
188 return currentToken_;
189 }
190
ReadBlockComment(char c)191 Token Lexer::ReadBlockComment(char c)
192 {
193 StringBuilder sb;
194
195 sb.Append(c);
196 while (!currentFile_->IsEof()) {
197 c = currentFile_->GetChar();
198 sb.Append(c);
199 if (c == '*' && currentFile_->PeekChar() == '/') {
200 c = currentFile_->GetChar();
201 sb.Append(c);
202 break;
203 }
204 }
205 comment_ = sb.ToString();
206 currentToken_ = Token::COMMENT_BLOCK;
207 return currentToken_;
208 }
209
SkipCurrentLine()210 void Lexer::SkipCurrentLine()
211 {
212 while (!currentFile_->IsEof()) {
213 char c = currentFile_->GetChar();
214 if (c == '\n') {
215 currentFile_->GetChar();
216 return;
217 }
218 }
219 }
220
SkipCurrentLine(char untilChar)221 bool Lexer::SkipCurrentLine(char untilChar)
222 {
223 while (!currentFile_->IsEof()) {
224 int c = currentFile_->GetChar();
225 if (c == untilChar) {
226 return true;
227 }
228 if (c == '\n') {
229 currentFile_->GetChar();
230 return false;
231 }
232 }
233 return true;
234 }
235
TokenToChar(Token token)236 int Lexer::TokenToChar(Token token)
237 {
238 switch (token) {
239 case Token::ANGLE_BRACKETS_LEFT:
240 return '<';
241 case Token::ANGLE_BRACKETS_RIGHT:
242 return '>';
243 case Token::BRACES_LEFT:
244 return '{';
245 case Token::BRACES_RIGHT:
246 return '}';
247 case Token::BRACKETS_LEFT:
248 return '[';
249 case Token::BRACKETS_RIGHT:
250 return ']';
251 case Token::COMMA:
252 return ',';
253 case Token::DOT:
254 return '.';
255 case Token::PARENTHESES_LEFT:
256 return '(';
257 case Token::PARENTHESES_RIGHT:
258 return ')';
259 case Token::SEMICOLON:
260 return ';';
261 default:
262 return -1;
263 }
264 }
265
DumpToken() const266 String Lexer::DumpToken() const
267 {
268 switch (currentToken_) {
269 case Token::ANGLE_BRACKETS_LEFT:
270 return "<";
271 case Token::ANGLE_BRACKETS_RIGHT:
272 return ">";
273 case Token::BOOLEAN:
274 return "boolean";
275 case Token::BRACES_LEFT:
276 return "{";
277 case Token::BRACES_RIGHT:
278 return "}";
279 case Token::BRACKETS_LEFT:
280 return "[";
281 case Token::BRACKETS_RIGHT:
282 return "]";
283 case Token::BYTE:
284 return "byte";
285 case Token::CHAR:
286 return "char";
287 case Token::COMMA:
288 return ",";
289 case Token::COMMENT_BLOCK:
290 case Token::COMMENT_LINE:
291 return comment_;
292 case Token::DOT:
293 return ".";
294 default:
295 return DumpTokenSecond();
296 }
297 }
298
DumpTokenSecond() const299 String Lexer::DumpTokenSecond() const
300 {
301 switch (currentToken_) {
302 case Token::DOUBLE:
303 return "double";
304 case Token::END_OF_FILE:
305 return "eof";
306 case Token::FLOAT:
307 return "float";
308 case Token::IDENTIFIER:
309 return identifier_;
310 case Token::IN:
311 return "in";
312 case Token::INOUT:
313 return "inout";
314 case Token::INTEGER:
315 return "int";
316 case Token::LIST:
317 return "List";
318 case Token::LONG:
319 return "long";
320 case Token::MAP:
321 return "Map";
322 case Token::ONEWAY:
323 return "oneway";
324 case Token::OUT:
325 return "out";
326 case Token::SEQUENCEABLE:
327 return "sequenceable";
328 case Token::PARENTHESES_LEFT:
329 return "(";
330 case Token::PARENTHESES_RIGHT:
331 return ")";
332 case Token::SEMICOLON:
333 return ";";
334 case Token::SHORT:
335 return "short";
336 case Token::STRING:
337 return "String";
338 default:
339 return "unknown token";
340 }
341 }
342
strToInt(const char * str,int strLen,int & number)343 bool Lexer::strToInt(const char *str, int strLen, int& number)
344 {
345 int result = 0;
346 int positionWeight = 1;
347 const int ten = 10;
348
349 for (int i = strLen - 1; i >= 0; i--) {
350 if (str[i] < '0' || str[i] > '9') {
351 return false;
352 }
353 int digit = str[i] - '0';
354 if (static_cast<int64_t>(digit * positionWeight) > std::numeric_limits<int32_t>::max() - result) {
355 return false;
356 }
357 result += digit * positionWeight;
358 positionWeight *= ten;
359 }
360 number = result;
361 return true;
362 }
363
ParseCacheable(int & cacheTime)364 bool Lexer::ParseCacheable(int& cacheTime)
365 {
366 bool ret = true;
367 StringBuilder numbersb;
368
369 while (!currentFile_->IsEof()) {
370 char c = currentFile_->PeekChar();
371 if (IsSpace(c)) {
372 currentFile_->GetChar();
373 continue;
374 }
375 if (!IsDecimalDigital(c)) {
376 if (c != ']' && c != ',') {
377 ret = false;
378 }
379 break;
380 }
381 numbersb.Append(c);
382 currentFile_->GetChar();
383 }
384
385 if (ret == false) {
386 return ret;
387 }
388 String numberStr = numbersb.ToString();
389 if (numberStr.IsNull()) {
390 return false;
391 }
392
393 ret = strToInt(numberStr.string(), numberStr.GetLength(), cacheTime);
394 return ret;
395 }
396
397 } // namespace idl
398 } // namespace OHOS
399