1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20 
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24     {"void",           TokenType::VOID      },
25     {"boolean",        TokenType::BOOLEAN      },
26     {"byte",           TokenType::BYTE         },
27     {"short",          TokenType::SHORT        },
28     {"int",            TokenType::INT          },
29     {"long",           TokenType::LONG         },
30     {"String",         TokenType::STRING       },
31     {"String16",       TokenType::STRING16     },
32     {"float",          TokenType::FLOAT        },
33     {"double",         TokenType::DOUBLE       },
34     {"FileDescriptor", TokenType::FD           },
35     {"Ashmem",         TokenType::ASHMEM       },
36     {"NativeBuffer",   TokenType::NATIVE_BUFFER},
37     {"Pointer",        TokenType::POINTER      },
38     {"List",           TokenType::LIST         },
39     {"Map",            TokenType::MAP          },
40     {"SharedMemQueue", TokenType::SMQ          },
41     {"char",           TokenType::CHAR         },
42     {"unsigned",       TokenType::UNSIGNED     },
43     {"enum",           TokenType::ENUM         },
44     {"struct",         TokenType::STRUCT       },
45     {"union",          TokenType::UNION        },
46     {"package",        TokenType::PACKAGE      },
47     {"interface_token",     TokenType::INTERFACE_TOKEN    },
48     {"support_delegator",   TokenType::SUPPORT_DELEGATOR  },
49     {"sequenceable",   TokenType::SEQ          },
50     {"import",         TokenType::IMPORT       },
51     {"interface",      TokenType::INTERFACE    },
52     {"extends",        TokenType::EXTENDS      },
53     {"oneway",         TokenType::ONEWAY       },
54     {"callback",       TokenType::CALLBACK     },
55     {"freezecontrol",  TokenType::FREEZECONTROL},
56     {"full",           TokenType::FULL         },
57     {"lite",           TokenType::LITE         },
58     {"mini",           TokenType::MINI         },
59     {"cacheable",      TokenType::CACHEABLE    },
60     {"in",             TokenType::IN           },
61     {"out",            TokenType::OUT          },
62     {"inout",          TokenType::INOUT        },
63 };
64 
65 Lexer::StrTokenTypeMap Lexer::symbols_ = {
66     {".",  TokenType::DOT                 },
67     {",",  TokenType::COMMA               },
68     {":",  TokenType::COLON               },
69     {"=",  TokenType::ASSIGN              },
70     {";",  TokenType::SEMICOLON           },
71     {"{",  TokenType::BRACES_LEFT         },
72     {"}",  TokenType::BRACES_RIGHT        },
73     {"[",  TokenType::BRACKETS_LEFT       },
74     {"]",  TokenType::BRACKETS_RIGHT      },
75     {"(",  TokenType::PARENTHESES_LEFT    },
76     {")",  TokenType::PARENTHESES_RIGHT   },
77     {"<",  TokenType::ANGLE_BRACKETS_LEFT },
78     {">",  TokenType::ANGLE_BRACKETS_RIGHT},
79     {"+",  TokenType::ADD                 },
80     {"-",  TokenType::SUB                 },
81     {"*",  TokenType::STAR                },
82     {"/",  TokenType::SLASH               },
83     {"%",  TokenType::PERCENT_SIGN        },
84     {"<<", TokenType::LEFT_SHIFT          },
85     {">>", TokenType::RIGHT_SHIFT         },
86     {"&",  TokenType::AND                 },
87     {"^",  TokenType::XOR                 },
88     {"|",  TokenType::OR                  },
89     {"~",  TokenType::TILDE               },
90     {"++", TokenType::PPLUS               },
91     {"--", TokenType::MMINUS              },
92 };
93 
Lexer()94 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
95 
Reset(const std::string & filePath)96 bool Lexer::Reset(const std::string &filePath)
97 {
98     file_ = std::make_unique<File>(filePath, int(File::READ));
99     if (!file_->IsValid()) {
100         return false;
101     }
102 
103     havePeek_ = false;
104     InitCurToken(curToken_);
105     return true;
106 }
107 
PeekToken(bool skipComment)108 Token Lexer::PeekToken(bool skipComment)
109 {
110     if (!havePeek_) {
111         ReadToken(curToken_, skipComment);
112         havePeek_ = true;
113     }
114     return curToken_;
115 }
116 
GetToken(bool skipComment)117 Token Lexer::GetToken(bool skipComment)
118 {
119     if (!havePeek_) {
120         ReadToken(curToken_, skipComment);
121     }
122     havePeek_ = false;
123     return curToken_;
124 }
125 
SkipCurrentLine()126 void Lexer::SkipCurrentLine()
127 {
128     while (!file_->IsEof()) {
129         char c = file_->GetChar();
130         if (c == '\n') {
131             file_->GetChar();
132             break;
133         }
134     }
135     havePeek_ = false;
136 }
137 
SkipCurrentLine(char untilChar)138 bool Lexer::SkipCurrentLine(char untilChar)
139 {
140     bool ret = true;
141     while (!file_->IsEof()) {
142         int c = file_->GetChar();
143         if (c == untilChar) {
144             ret = true;
145             break;
146         }
147         if (c == '\n') {
148             file_->GetChar();
149             ret = false;
150             break;
151         }
152     }
153     havePeek_ = false;
154     return ret;
155 }
156 
Skip(char untilChar)157 void Lexer::Skip(char untilChar)
158 {
159     while (!file_->IsEof()) {
160         int c = file_->GetChar();
161         if (c == untilChar) {
162             break;
163         }
164     }
165     havePeek_ = false;
166 }
167 
SkipToken(TokenType tokenType)168 void Lexer::SkipToken(TokenType tokenType)
169 {
170     while (curToken_.kind != tokenType) {
171         GetToken(false);
172     }
173 }
174 
SkipUntilToken(TokenType tokenType)175 void Lexer::SkipUntilToken(TokenType tokenType)
176 {
177     Token token = PeekToken();
178     while (token.kind != tokenType) {
179         GetToken(false);
180         token = PeekToken();
181     }
182 }
183 
SkipEof()184 void Lexer::SkipEof()
185 {
186     while (!file_->IsEof()) {}
187     havePeek_ = false;
188 }
189 
ReadCacheableTime(Token & token)190 bool Lexer::ReadCacheableTime(Token &token)
191 {
192     bool ret = true;
193     StringBuilder sb;
194 
195     while (!file_->IsEof()) {
196         char c = file_->PeekChar();
197         if (isspace(c)) {
198             file_->GetChar();
199             continue;
200         }
201         if (!isdigit(c)) {
202             if (c != ']' && c != ',') {
203                 ret = false;
204             }
205             break;
206         }
207         sb.Append(c);
208         file_->GetChar();
209     }
210 
211     if (ret == false) {
212         return ret;
213     }
214 
215     token.value = sb.ToString();
216     if (token.value.empty()) {
217         return false;
218     }
219 
220     return ret;
221 }
222 
ReadToken(Token & token,bool skipComment)223 void Lexer::ReadToken(Token &token, bool skipComment)
224 {
225     if (!file_->IsEof()) {
226         InitCurToken(token);
227     }
228     while (!file_->IsEof()) {
229         char c = file_->PeekChar();
230         if (isspace(c)) {
231             file_->GetChar();
232             continue;
233         }
234         token.location.row = file_->GetCharLineNumber();
235         token.location.col = file_->GetCharColumnNumber();
236         if (isalpha(c) || c == '_') {
237             ReadId(token);
238             return;
239         } else if (isdigit(c)) {
240             ReadNum(token);
241             return;
242         } else if (c == '<') {
243             ReadShiftLeftOp(token);
244             return;
245         } else if (c == '>') {
246             ReadShiftRightOp(token);
247             return;
248         } else if (c == '+') {
249             ReadPPlusOp(token);
250             return;
251         } else if (c == '-') {
252             ReadMMinusOp(token);
253             return;
254         } else if (c == '/') {
255             ReadComment(token);
256             if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
257                 InitCurToken(token);
258                 continue;
259             }
260             return;
261         }
262         ReadSymbolToken(token);
263         return;
264     }
265     token.kind = TokenType::END_OF_FILE;
266     token.value = "";
267 }
268 
InitCurToken(Token & token)269 void Lexer::InitCurToken(Token &token)
270 {
271     token.kind = TokenType::UNKNOWN;
272     token.location.filePath = file_->GetPath();
273     token.location.row = 1;
274     token.location.col = 1;
275     token.value = "";
276 }
277 
ReadId(Token & token)278 void Lexer::ReadId(Token &token)
279 {
280     char c = file_->GetChar();
281     StringBuilder sb;
282     sb.Append(c);
283     while (!file_->IsEof()) {
284         c = file_->PeekChar();
285         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
286             c = file_->GetChar();
287             sb.Append(c);
288             continue;
289         }
290         if (isspace(c)) {
291             file_->GetChar();
292         }
293         break;
294     }
295 
296     std::string key = sb.ToString();
297     auto it = keyWords_.find(key);
298     token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
299     token.value = sb.ToString();
300 }
301 
ReadNum(Token & token)302 void Lexer::ReadNum(Token &token)
303 {
304     char c = file_->PeekChar();
305     if (c == '0') {
306         file_->GetChar();
307         c = file_->PeekChar();
308         if (c == 'b' || c == 'B') {
309             // binary number
310             ReadBinaryNum(token);
311         } else if (isdigit(c)) {
312             // octal number
313             return ReadOctNum(token);
314         } else if (c == 'X' || c == 'x') {
315             // hexadecimal number
316             return ReadHexNum(token);
317         } else {
318             // decimal number 0
319             token.kind = TokenType::NUM;
320             token.value = "0";
321         }
322     } else {
323         ReadDecNum(token);
324     }
325     ReadNumSuffix(token);
326 }
327 
ReadBinaryNum(Token & token)328 void Lexer::ReadBinaryNum(Token &token)
329 {
330     StringBuilder sb;
331     char c = file_->GetChar(); // read 'b' or 'B'
332     sb.AppendFormat("0%c", c);
333     bool err = true;
334 
335     while (!file_->IsEof()) {
336         c = file_->PeekChar();
337         if (c == '0' || c == '1') {
338             sb.Append(c);
339             file_->GetChar();
340             err = false;
341         } else {
342             break;
343         }
344     }
345 
346     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
347     token.value = sb.ToString();
348 }
349 
ReadOctNum(Token & token)350 void Lexer::ReadOctNum(Token &token)
351 {
352     StringBuilder sb;
353     sb.Append("0");
354     bool err = false;
355 
356     while (!file_->IsEof()) {
357         char c = file_->PeekChar();
358         if (!isdigit(c)) {
359             break;
360         }
361 
362         if (!(c >= '0' && c <= '7')) {
363             err = true;
364         }
365         sb.Append(c);
366         file_->GetChar();
367     }
368 
369     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
370     token.value = sb.ToString();
371 }
372 
ReadHexNum(Token & token)373 void Lexer::ReadHexNum(Token &token)
374 {
375     StringBuilder sb;
376     char c = file_->GetChar(); // read 'x' or 'X'
377     sb.AppendFormat("0%c", c);
378     bool err = true;
379 
380     while (!file_->IsEof()) {
381         c = file_->PeekChar();
382         if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
383             sb.Append(c);
384             file_->GetChar();
385             err = false;
386         } else {
387             break;
388         }
389     }
390 
391     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
392     token.value = sb.ToString();
393 }
394 
ReadDecNum(Token & token)395 void Lexer::ReadDecNum(Token &token)
396 {
397     StringBuilder sb;
398     char c = file_->GetChar();
399     sb.Append(c);
400 
401     while (!file_->IsEof()) {
402         c = file_->PeekChar();
403         if (!isdigit(c)) {
404             break;
405         }
406 
407         sb.Append(c);
408         file_->GetChar();
409     }
410 
411     token.kind = TokenType::NUM;
412     token.value = sb.ToString();
413 }
414 
ReadNumSuffix(Token & token)415 void Lexer::ReadNumSuffix(Token &token)
416 {
417     while (!file_->IsEof()) {
418         char c = file_->PeekChar();
419         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
420             token.value += c;
421             file_->GetChar();
422         } else {
423             break;
424         }
425     }
426 }
427 
ReadShiftLeftOp(Token & token)428 void Lexer::ReadShiftLeftOp(Token &token)
429 {
430     char c = file_->GetChar();
431     char next = file_->PeekChar();
432     if (next == '<') {
433         file_->GetChar();
434         token.kind = TokenType::LEFT_SHIFT;
435         token.value = "<<";
436         return;
437     }
438 
439     std::string symbol = StringHelper::Format("%c", c);
440     auto iter = symbols_.find(symbol);
441     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
442     token.value = symbol;
443 }
444 
ReadShiftRightOp(Token & token)445 void Lexer::ReadShiftRightOp(Token &token)
446 {
447     char c = file_->GetChar();
448     char next = file_->PeekChar();
449     if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
450         file_->GetChar();
451         token.kind = TokenType::RIGHT_SHIFT;
452         token.value = ">>";
453         return;
454     }
455 
456     std::string symbol = StringHelper::Format("%c", c);
457     auto iter = symbols_.find(symbol);
458     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
459     token.value = symbol;
460 }
461 
ReadPPlusOp(Token & token)462 void Lexer::ReadPPlusOp(Token &token)
463 {
464     char c = file_->GetChar();
465     char next = file_->PeekChar();
466     if (next == '+') {
467         file_->GetChar();
468         token.kind = TokenType::PPLUS;
469         token.value = "++";
470         return;
471     }
472 
473     std::string symbol = StringHelper::Format("%c", c);
474     auto iter = symbols_.find(symbol);
475     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
476     token.value = symbol;
477 }
478 
ReadMMinusOp(Token & token)479 void Lexer::ReadMMinusOp(Token &token)
480 {
481     char c = file_->GetChar();
482     char next = file_->PeekChar();
483     if (next == '-') {
484         file_->GetChar();
485         token.kind = TokenType::MMINUS;
486         token.value = "--";
487         return;
488     }
489 
490     std::string symbol = StringHelper::Format("%c", c);
491     auto iter = symbols_.find(symbol);
492     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
493     token.value = symbol;
494 }
495 
ReadComment(Token & token)496 void Lexer::ReadComment(Token &token)
497 {
498     char c = file_->GetChar();
499     char next = file_->PeekChar();
500     if (next == '/') {
501         ReadLineComment(token);
502         return;
503     } else if (next == '*') {
504         ReadBlockComment(token);
505         return;
506     }
507 
508     std::string symbol = StringHelper::Format("%c", c);
509     auto iter = symbols_.find(symbol);
510     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
511     token.value = symbol;
512 }
513 
ReadLineComment(Token & token)514 void Lexer::ReadLineComment(Token &token)
515 {
516     StringBuilder sb;
517     char c = file_->GetChar();
518     sb.AppendFormat("/%c", c);
519 
520     while (!file_->IsEof()) {
521         c = file_->GetChar();
522         if (c == '\n') {
523             break;
524         }
525         sb.Append(c);
526     }
527 
528     token.kind = TokenType::COMMENT_LINE;
529     token.value = sb.ToString();
530 }
531 
ReadBlockComment(Token & token)532 void Lexer::ReadBlockComment(Token &token)
533 {
534     StringBuilder sb;
535     char c = file_->GetChar();
536     sb.AppendFormat("/%c", c);
537 
538     while (!file_->IsEof()) {
539         c = file_->GetChar();
540         sb.Append(c);
541 
542         if (c == '*' && file_->PeekChar() == '/') {
543             c = file_->GetChar();
544             sb.Append(c);
545             break;
546         }
547     }
548 
549     token.kind = TokenType::COMMENT_BLOCK;
550     token.value = sb.ToString();
551 }
552 
ReadSymbolToken(Token & token)553 void Lexer::ReadSymbolToken(Token &token)
554 {
555     char c = file_->GetChar();
556     std::string symbol = StringHelper::Format("%c", c);
557     auto iter = symbols_.find(symbol);
558     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
559     token.value = symbol;
560 }
561 } // namespace Idl
562 } // namespace OHOS