1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24 {"void", TokenType::VOID },
25 {"boolean", TokenType::BOOLEAN },
26 {"byte", TokenType::BYTE },
27 {"short", TokenType::SHORT },
28 {"int", TokenType::INT },
29 {"long", TokenType::LONG },
30 {"String", TokenType::STRING },
31 {"String16", TokenType::STRING16 },
32 {"float", TokenType::FLOAT },
33 {"double", TokenType::DOUBLE },
34 {"FileDescriptor", TokenType::FD },
35 {"Ashmem", TokenType::ASHMEM },
36 {"NativeBuffer", TokenType::NATIVE_BUFFER},
37 {"Pointer", TokenType::POINTER },
38 {"List", TokenType::LIST },
39 {"Map", TokenType::MAP },
40 {"SharedMemQueue", TokenType::SMQ },
41 {"char", TokenType::CHAR },
42 {"unsigned", TokenType::UNSIGNED },
43 {"enum", TokenType::ENUM },
44 {"struct", TokenType::STRUCT },
45 {"union", TokenType::UNION },
46 {"package", TokenType::PACKAGE },
47 {"interface_token", TokenType::INTERFACE_TOKEN },
48 {"support_delegator", TokenType::SUPPORT_DELEGATOR },
49 {"sequenceable", TokenType::SEQ },
50 {"import", TokenType::IMPORT },
51 {"interface", TokenType::INTERFACE },
52 {"extends", TokenType::EXTENDS },
53 {"oneway", TokenType::ONEWAY },
54 {"callback", TokenType::CALLBACK },
55 {"freezecontrol", TokenType::FREEZECONTROL},
56 {"full", TokenType::FULL },
57 {"lite", TokenType::LITE },
58 {"mini", TokenType::MINI },
59 {"cacheable", TokenType::CACHEABLE },
60 {"in", TokenType::IN },
61 {"out", TokenType::OUT },
62 {"inout", TokenType::INOUT },
63 };
64
65 Lexer::StrTokenTypeMap Lexer::symbols_ = {
66 {".", TokenType::DOT },
67 {",", TokenType::COMMA },
68 {":", TokenType::COLON },
69 {"=", TokenType::ASSIGN },
70 {";", TokenType::SEMICOLON },
71 {"{", TokenType::BRACES_LEFT },
72 {"}", TokenType::BRACES_RIGHT },
73 {"[", TokenType::BRACKETS_LEFT },
74 {"]", TokenType::BRACKETS_RIGHT },
75 {"(", TokenType::PARENTHESES_LEFT },
76 {")", TokenType::PARENTHESES_RIGHT },
77 {"<", TokenType::ANGLE_BRACKETS_LEFT },
78 {">", TokenType::ANGLE_BRACKETS_RIGHT},
79 {"+", TokenType::ADD },
80 {"-", TokenType::SUB },
81 {"*", TokenType::STAR },
82 {"/", TokenType::SLASH },
83 {"%", TokenType::PERCENT_SIGN },
84 {"<<", TokenType::LEFT_SHIFT },
85 {">>", TokenType::RIGHT_SHIFT },
86 {"&", TokenType::AND },
87 {"^", TokenType::XOR },
88 {"|", TokenType::OR },
89 {"~", TokenType::TILDE },
90 {"++", TokenType::PPLUS },
91 {"--", TokenType::MMINUS },
92 };
93
Lexer()94 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
95
Reset(const std::string & filePath)96 bool Lexer::Reset(const std::string &filePath)
97 {
98 file_ = std::make_unique<File>(filePath, int(File::READ));
99 if (!file_->IsValid()) {
100 return false;
101 }
102
103 havePeek_ = false;
104 InitCurToken(curToken_);
105 return true;
106 }
107
PeekToken(bool skipComment)108 Token Lexer::PeekToken(bool skipComment)
109 {
110 if (!havePeek_) {
111 ReadToken(curToken_, skipComment);
112 havePeek_ = true;
113 }
114 return curToken_;
115 }
116
GetToken(bool skipComment)117 Token Lexer::GetToken(bool skipComment)
118 {
119 if (!havePeek_) {
120 ReadToken(curToken_, skipComment);
121 }
122 havePeek_ = false;
123 return curToken_;
124 }
125
SkipCurrentLine()126 void Lexer::SkipCurrentLine()
127 {
128 while (!file_->IsEof()) {
129 char c = file_->GetChar();
130 if (c == '\n') {
131 file_->GetChar();
132 break;
133 }
134 }
135 havePeek_ = false;
136 }
137
SkipCurrentLine(char untilChar)138 bool Lexer::SkipCurrentLine(char untilChar)
139 {
140 bool ret = true;
141 while (!file_->IsEof()) {
142 int c = file_->GetChar();
143 if (c == untilChar) {
144 ret = true;
145 break;
146 }
147 if (c == '\n') {
148 file_->GetChar();
149 ret = false;
150 break;
151 }
152 }
153 havePeek_ = false;
154 return ret;
155 }
156
Skip(char untilChar)157 void Lexer::Skip(char untilChar)
158 {
159 while (!file_->IsEof()) {
160 int c = file_->GetChar();
161 if (c == untilChar) {
162 break;
163 }
164 }
165 havePeek_ = false;
166 }
167
SkipToken(TokenType tokenType)168 void Lexer::SkipToken(TokenType tokenType)
169 {
170 while (curToken_.kind != tokenType) {
171 GetToken(false);
172 }
173 }
174
SkipUntilToken(TokenType tokenType)175 void Lexer::SkipUntilToken(TokenType tokenType)
176 {
177 Token token = PeekToken();
178 while (token.kind != tokenType) {
179 GetToken(false);
180 token = PeekToken();
181 }
182 }
183
SkipEof()184 void Lexer::SkipEof()
185 {
186 while (!file_->IsEof()) {}
187 havePeek_ = false;
188 }
189
ReadCacheableTime(Token & token)190 bool Lexer::ReadCacheableTime(Token &token)
191 {
192 bool ret = true;
193 StringBuilder sb;
194
195 while (!file_->IsEof()) {
196 char c = file_->PeekChar();
197 if (isspace(c)) {
198 file_->GetChar();
199 continue;
200 }
201 if (!isdigit(c)) {
202 if (c != ']' && c != ',') {
203 ret = false;
204 }
205 break;
206 }
207 sb.Append(c);
208 file_->GetChar();
209 }
210
211 if (ret == false) {
212 return ret;
213 }
214
215 token.value = sb.ToString();
216 if (token.value.empty()) {
217 return false;
218 }
219
220 return ret;
221 }
222
ReadToken(Token & token,bool skipComment)223 void Lexer::ReadToken(Token &token, bool skipComment)
224 {
225 if (!file_->IsEof()) {
226 InitCurToken(token);
227 }
228 while (!file_->IsEof()) {
229 char c = file_->PeekChar();
230 if (isspace(c)) {
231 file_->GetChar();
232 continue;
233 }
234 token.location.row = file_->GetCharLineNumber();
235 token.location.col = file_->GetCharColumnNumber();
236 if (isalpha(c) || c == '_') {
237 ReadId(token);
238 return;
239 } else if (isdigit(c)) {
240 ReadNum(token);
241 return;
242 } else if (c == '<') {
243 ReadShiftLeftOp(token);
244 return;
245 } else if (c == '>') {
246 ReadShiftRightOp(token);
247 return;
248 } else if (c == '+') {
249 ReadPPlusOp(token);
250 return;
251 } else if (c == '-') {
252 ReadMMinusOp(token);
253 return;
254 } else if (c == '/') {
255 ReadComment(token);
256 if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
257 InitCurToken(token);
258 continue;
259 }
260 return;
261 }
262 ReadSymbolToken(token);
263 return;
264 }
265 token.kind = TokenType::END_OF_FILE;
266 token.value = "";
267 }
268
InitCurToken(Token & token)269 void Lexer::InitCurToken(Token &token)
270 {
271 token.kind = TokenType::UNKNOWN;
272 token.location.filePath = file_->GetPath();
273 token.location.row = 1;
274 token.location.col = 1;
275 token.value = "";
276 }
277
ReadId(Token & token)278 void Lexer::ReadId(Token &token)
279 {
280 char c = file_->GetChar();
281 StringBuilder sb;
282 sb.Append(c);
283 while (!file_->IsEof()) {
284 c = file_->PeekChar();
285 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
286 c = file_->GetChar();
287 sb.Append(c);
288 continue;
289 }
290 if (isspace(c)) {
291 file_->GetChar();
292 }
293 break;
294 }
295
296 std::string key = sb.ToString();
297 auto it = keyWords_.find(key);
298 token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
299 token.value = sb.ToString();
300 }
301
ReadNum(Token & token)302 void Lexer::ReadNum(Token &token)
303 {
304 char c = file_->PeekChar();
305 if (c == '0') {
306 file_->GetChar();
307 c = file_->PeekChar();
308 if (c == 'b' || c == 'B') {
309 // binary number
310 ReadBinaryNum(token);
311 } else if (isdigit(c)) {
312 // octal number
313 return ReadOctNum(token);
314 } else if (c == 'X' || c == 'x') {
315 // hexadecimal number
316 return ReadHexNum(token);
317 } else {
318 // decimal number 0
319 token.kind = TokenType::NUM;
320 token.value = "0";
321 }
322 } else {
323 ReadDecNum(token);
324 }
325 ReadNumSuffix(token);
326 }
327
ReadBinaryNum(Token & token)328 void Lexer::ReadBinaryNum(Token &token)
329 {
330 StringBuilder sb;
331 char c = file_->GetChar(); // read 'b' or 'B'
332 sb.AppendFormat("0%c", c);
333 bool err = true;
334
335 while (!file_->IsEof()) {
336 c = file_->PeekChar();
337 if (c == '0' || c == '1') {
338 sb.Append(c);
339 file_->GetChar();
340 err = false;
341 } else {
342 break;
343 }
344 }
345
346 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
347 token.value = sb.ToString();
348 }
349
ReadOctNum(Token & token)350 void Lexer::ReadOctNum(Token &token)
351 {
352 StringBuilder sb;
353 sb.Append("0");
354 bool err = false;
355
356 while (!file_->IsEof()) {
357 char c = file_->PeekChar();
358 if (!isdigit(c)) {
359 break;
360 }
361
362 if (!(c >= '0' && c <= '7')) {
363 err = true;
364 }
365 sb.Append(c);
366 file_->GetChar();
367 }
368
369 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
370 token.value = sb.ToString();
371 }
372
ReadHexNum(Token & token)373 void Lexer::ReadHexNum(Token &token)
374 {
375 StringBuilder sb;
376 char c = file_->GetChar(); // read 'x' or 'X'
377 sb.AppendFormat("0%c", c);
378 bool err = true;
379
380 while (!file_->IsEof()) {
381 c = file_->PeekChar();
382 if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
383 sb.Append(c);
384 file_->GetChar();
385 err = false;
386 } else {
387 break;
388 }
389 }
390
391 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
392 token.value = sb.ToString();
393 }
394
ReadDecNum(Token & token)395 void Lexer::ReadDecNum(Token &token)
396 {
397 StringBuilder sb;
398 char c = file_->GetChar();
399 sb.Append(c);
400
401 while (!file_->IsEof()) {
402 c = file_->PeekChar();
403 if (!isdigit(c)) {
404 break;
405 }
406
407 sb.Append(c);
408 file_->GetChar();
409 }
410
411 token.kind = TokenType::NUM;
412 token.value = sb.ToString();
413 }
414
ReadNumSuffix(Token & token)415 void Lexer::ReadNumSuffix(Token &token)
416 {
417 while (!file_->IsEof()) {
418 char c = file_->PeekChar();
419 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
420 token.value += c;
421 file_->GetChar();
422 } else {
423 break;
424 }
425 }
426 }
427
ReadShiftLeftOp(Token & token)428 void Lexer::ReadShiftLeftOp(Token &token)
429 {
430 char c = file_->GetChar();
431 char next = file_->PeekChar();
432 if (next == '<') {
433 file_->GetChar();
434 token.kind = TokenType::LEFT_SHIFT;
435 token.value = "<<";
436 return;
437 }
438
439 std::string symbol = StringHelper::Format("%c", c);
440 auto iter = symbols_.find(symbol);
441 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
442 token.value = symbol;
443 }
444
ReadShiftRightOp(Token & token)445 void Lexer::ReadShiftRightOp(Token &token)
446 {
447 char c = file_->GetChar();
448 char next = file_->PeekChar();
449 if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
450 file_->GetChar();
451 token.kind = TokenType::RIGHT_SHIFT;
452 token.value = ">>";
453 return;
454 }
455
456 std::string symbol = StringHelper::Format("%c", c);
457 auto iter = symbols_.find(symbol);
458 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
459 token.value = symbol;
460 }
461
ReadPPlusOp(Token & token)462 void Lexer::ReadPPlusOp(Token &token)
463 {
464 char c = file_->GetChar();
465 char next = file_->PeekChar();
466 if (next == '+') {
467 file_->GetChar();
468 token.kind = TokenType::PPLUS;
469 token.value = "++";
470 return;
471 }
472
473 std::string symbol = StringHelper::Format("%c", c);
474 auto iter = symbols_.find(symbol);
475 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
476 token.value = symbol;
477 }
478
ReadMMinusOp(Token & token)479 void Lexer::ReadMMinusOp(Token &token)
480 {
481 char c = file_->GetChar();
482 char next = file_->PeekChar();
483 if (next == '-') {
484 file_->GetChar();
485 token.kind = TokenType::MMINUS;
486 token.value = "--";
487 return;
488 }
489
490 std::string symbol = StringHelper::Format("%c", c);
491 auto iter = symbols_.find(symbol);
492 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
493 token.value = symbol;
494 }
495
ReadComment(Token & token)496 void Lexer::ReadComment(Token &token)
497 {
498 char c = file_->GetChar();
499 char next = file_->PeekChar();
500 if (next == '/') {
501 ReadLineComment(token);
502 return;
503 } else if (next == '*') {
504 ReadBlockComment(token);
505 return;
506 }
507
508 std::string symbol = StringHelper::Format("%c", c);
509 auto iter = symbols_.find(symbol);
510 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
511 token.value = symbol;
512 }
513
ReadLineComment(Token & token)514 void Lexer::ReadLineComment(Token &token)
515 {
516 StringBuilder sb;
517 char c = file_->GetChar();
518 sb.AppendFormat("/%c", c);
519
520 while (!file_->IsEof()) {
521 c = file_->GetChar();
522 if (c == '\n') {
523 break;
524 }
525 sb.Append(c);
526 }
527
528 token.kind = TokenType::COMMENT_LINE;
529 token.value = sb.ToString();
530 }
531
ReadBlockComment(Token & token)532 void Lexer::ReadBlockComment(Token &token)
533 {
534 StringBuilder sb;
535 char c = file_->GetChar();
536 sb.AppendFormat("/%c", c);
537
538 while (!file_->IsEof()) {
539 c = file_->GetChar();
540 sb.Append(c);
541
542 if (c == '*' && file_->PeekChar() == '/') {
543 c = file_->GetChar();
544 sb.Append(c);
545 break;
546 }
547 }
548
549 token.kind = TokenType::COMMENT_BLOCK;
550 token.value = sb.ToString();
551 }
552
ReadSymbolToken(Token & token)553 void Lexer::ReadSymbolToken(Token &token)
554 {
555 char c = file_->GetChar();
556 std::string symbol = StringHelper::Format("%c", c);
557 auto iter = symbols_.find(symbol);
558 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
559 token.value = symbol;
560 }
561 } // namespace Idl
562 } // namespace OHOS