1 /*
2  * Copyright (c) 2022-2023 Huawei Device Co., Ltd.
3  *
4  * HDF is dual licensed: you can use it either under the terms of
5  * the GPL, or the BSD license, at your option.
6  * See the LICENSE file in the root of this repository for complete details.
7  */
8 
9 #include "lexer/lexer.h"
10 #include "util/logger.h"
11 #include "util/string_builder.h"
12 #include "util/string_helper.h"
13 
14 namespace OHOS {
15 namespace HDI {
16 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
17     {"boolean",        TokenType::BOOLEAN      },
18     {"byte",           TokenType::BYTE         },
19     {"short",          TokenType::SHORT        },
20     {"int",            TokenType::INT          },
21     {"long",           TokenType::LONG         },
22     {"String",         TokenType::STRING       },
23     {"float",          TokenType::FLOAT        },
24     {"double",         TokenType::DOUBLE       },
25     {"FileDescriptor", TokenType::FD           },
26     {"Ashmem",         TokenType::ASHMEM       },
27     {"NativeBuffer",   TokenType::NATIVE_BUFFER},
28     {"Pointer",        TokenType::POINTER      },
29     {"List",           TokenType::LIST         },
30     {"Map",            TokenType::MAP          },
31     {"SharedMemQueue", TokenType::SMQ          },
32     {"char",           TokenType::CHAR         },
33     {"unsigned",       TokenType::UNSIGNED     },
34     {"enum",           TokenType::ENUM         },
35     {"struct",         TokenType::STRUCT       },
36     {"union",          TokenType::UNION        },
37     {"package",        TokenType::PACKAGE      },
38     {"sequenceable",   TokenType::SEQ          },
39     {"import",         TokenType::IMPORT       },
40     {"interface",      TokenType::INTERFACE    },
41     {"extends",        TokenType::EXTENDS      },
42     {"oneway",         TokenType::ONEWAY       },
43     {"callback",       TokenType::CALLBACK     },
44     {"full",           TokenType::FULL         },
45     {"lite",           TokenType::LITE         },
46     {"mini",           TokenType::MINI         },
47     {"in",             TokenType::IN           },
48     {"out",            TokenType::OUT          },
49 };
50 
51 Lexer::StrTokenTypeMap Lexer::symbols_ = {
52     {".",  TokenType::DOT                 },
53     {",",  TokenType::COMMA               },
54     {":",  TokenType::COLON               },
55     {"=",  TokenType::ASSIGN              },
56     {";",  TokenType::SEMICOLON           },
57     {"{",  TokenType::BRACES_LEFT         },
58     {"}",  TokenType::BRACES_RIGHT        },
59     {"[",  TokenType::BRACKETS_LEFT       },
60     {"]",  TokenType::BRACKETS_RIGHT      },
61     {"(",  TokenType::PARENTHESES_LEFT    },
62     {")",  TokenType::PARENTHESES_RIGHT   },
63     {"<",  TokenType::ANGLE_BRACKETS_LEFT },
64     {">",  TokenType::ANGLE_BRACKETS_RIGHT},
65     {"+",  TokenType::ADD                 },
66     {"-",  TokenType::SUB                 },
67     {"*",  TokenType::STAR                },
68     {"/",  TokenType::SLASH               },
69     {"%",  TokenType::PERCENT_SIGN        },
70     {"<<", TokenType::LEFT_SHIFT          },
71     {">>", TokenType::RIGHT_SHIFT         },
72     {"&",  TokenType::AND                 },
73     {"^",  TokenType::XOR                 },
74     {"|",  TokenType::OR                  },
75     {"~",  TokenType::TILDE               },
76     {"++", TokenType::PPLUS               },
77     {"--", TokenType::MMINUS              },
78 };
79 
Lexer()80 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
81 
Reset(const std::string & filePath)82 bool Lexer::Reset(const std::string &filePath)
83 {
84     file_ = std::make_unique<File>(filePath, int(File::READ));
85     if (file_ == nullptr || !file_->IsValid()) {
86         return false;
87     }
88 
89     havePeek_ = false;
90     InitCurToken(curToken_);
91     return true;
92 }
93 
PeekToken(bool skipComment)94 Token Lexer::PeekToken(bool skipComment)
95 {
96     if (!havePeek_) {
97         ReadToken(curToken_, skipComment);
98         havePeek_ = true;
99     }
100     return curToken_;
101 }
102 
GetToken(bool skipComment)103 Token Lexer::GetToken(bool skipComment)
104 {
105     if (!havePeek_) {
106         ReadToken(curToken_, skipComment);
107     }
108     havePeek_ = false;
109     return curToken_;
110 }
111 
SkipCurrentLine()112 void Lexer::SkipCurrentLine()
113 {
114     while (!file_->IsEof()) {
115         char c = file_->GetChar();
116         if (c == '\n') {
117             file_->GetChar();
118             break;
119         }
120     }
121     havePeek_ = false;
122 }
123 
SkipCurrentLine(char untilChar)124 bool Lexer::SkipCurrentLine(char untilChar)
125 {
126     bool ret = true;
127     while (!file_->IsEof()) {
128         int c = file_->GetChar();
129         if (c == untilChar) {
130             ret = true;
131             break;
132         }
133         if (c == '\n') {
134             file_->GetChar();
135             ret = false;
136             break;
137         }
138     }
139     havePeek_ = false;
140     return ret;
141 }
142 
Skip(char untilChar)143 void Lexer::Skip(char untilChar)
144 {
145     while (!file_->IsEof()) {
146         int c = file_->GetChar();
147         if (c == untilChar) {
148             break;
149         }
150     }
151     havePeek_ = false;
152 }
153 
SkipToken(TokenType tokenType)154 void Lexer::SkipToken(TokenType tokenType)
155 {
156     while (curToken_.kind != tokenType) {
157         GetToken(false);
158     }
159 }
160 
SkipUntilToken(TokenType tokenType)161 void Lexer::SkipUntilToken(TokenType tokenType)
162 {
163     Token token = PeekToken();
164     while (token.kind != tokenType) {
165         GetToken(false);
166         token = PeekToken();
167     }
168 }
169 
SkipEof()170 void Lexer::SkipEof()
171 {
172     while (!file_->IsEof()) {}
173     havePeek_ = false;
174 }
175 
ReadToken(Token & token,bool skipComment)176 void Lexer::ReadToken(Token &token, bool skipComment)
177 {
178     if (!file_->IsEof()) {
179         InitCurToken(token);
180     }
181     while (!file_->IsEof()) {
182         char c = file_->PeekChar();
183         if (isspace(c)) {
184             file_->GetChar();
185             continue;
186         }
187         token.location.row = file_->GetCharLineNumber();
188         token.location.col = file_->GetCharColumnNumber();
189         if (isalpha(c) || c == '_') {
190             ReadId(token);
191             return;
192         } else if (isdigit(c)) {
193             ReadNum(token);
194             return;
195         } else if (c == '<') {
196             ReadShiftLeftOp(token);
197             return;
198         } else if (c == '>') {
199             ReadShiftRightOp(token);
200             return;
201         } else if (c == '+') {
202             ReadPPlusOp(token);
203             return;
204         } else if (c == '-') {
205             ReadMMinusOp(token);
206             return;
207         } else if (c == '/') {
208             ReadComment(token);
209             if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
210                 InitCurToken(token);
211                 continue;
212             }
213             return;
214         }
215         ReadSymbolToken(token);
216         return;
217     }
218     token.kind = TokenType::END_OF_FILE;
219     token.value = "";
220 }
221 
InitCurToken(Token & token)222 void Lexer::InitCurToken(Token &token)
223 {
224     token.kind = TokenType::UNKNOWN;
225     token.location.filePath = file_->GetPath();
226     token.location.row = 1;
227     token.location.col = 1;
228     token.value = "";
229 }
230 
ReadId(Token & token)231 void Lexer::ReadId(Token &token)
232 {
233     char c = file_->GetChar();
234     StringBuilder sb;
235     sb.Append(c);
236     while (!file_->IsEof()) {
237         c = file_->PeekChar();
238         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
239             c = file_->GetChar();
240             sb.Append(c);
241             continue;
242         }
243         if (isspace(c)) {
244             file_->GetChar();
245         }
246         break;
247     }
248 
249     std::string key = sb.ToString();
250     auto it = keyWords_.find(key);
251     token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
252     token.value = sb.ToString();
253 }
254 
ReadNum(Token & token)255 void Lexer::ReadNum(Token &token)
256 {
257     char c = file_->PeekChar();
258     if (c == '0') {
259         file_->GetChar();
260         c = file_->PeekChar();
261         if (c == 'b' || c == 'B') {
262             // binary number
263             ReadBinaryNum(token);
264         } else if (isdigit(c)) {
265             // octal number
266             return ReadOctNum(token);
267         } else if (c == 'X' || c == 'x') {
268             // hexadecimal number
269             return ReadHexNum(token);
270         } else {
271             // decimal number 0
272             token.kind = TokenType::NUM;
273             token.value = "0";
274         }
275     } else {
276         ReadDecNum(token);
277     }
278     ReadNumSuffix(token);
279 }
280 
ReadBinaryNum(Token & token)281 void Lexer::ReadBinaryNum(Token &token)
282 {
283     StringBuilder sb;
284     char c = file_->GetChar(); // read 'b' or 'B'
285     sb.AppendFormat("0%c", c);
286     bool err = true;
287 
288     while (!file_->IsEof()) {
289         c = file_->PeekChar();
290         if (c == '0' || c == '1') {
291             sb.Append(c);
292             file_->GetChar();
293             err = false;
294         } else {
295             break;
296         }
297     }
298 
299     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
300     token.value = sb.ToString();
301 }
302 
ReadOctNum(Token & token)303 void Lexer::ReadOctNum(Token &token)
304 {
305     StringBuilder sb;
306     sb.Append("0");
307     bool err = false;
308 
309     while (!file_->IsEof()) {
310         char c = file_->PeekChar();
311         if (!isdigit(c)) {
312             break;
313         }
314 
315         if (!(c >= '0' && c <= '7')) {
316             err = true;
317         }
318         sb.Append(c);
319         file_->GetChar();
320     }
321 
322     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
323     token.value = sb.ToString();
324 }
325 
ReadHexNum(Token & token)326 void Lexer::ReadHexNum(Token &token)
327 {
328     StringBuilder sb;
329     char c = file_->GetChar(); // read 'x' or 'X'
330     sb.AppendFormat("0%c", c);
331     bool err = true;
332 
333     while (!file_->IsEof()) {
334         c = file_->PeekChar();
335         if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
336             sb.Append(c);
337             file_->GetChar();
338             err = false;
339         } else {
340             break;
341         }
342     }
343 
344     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
345     token.value = sb.ToString();
346 }
347 
ReadDecNum(Token & token)348 void Lexer::ReadDecNum(Token &token)
349 {
350     StringBuilder sb;
351     char c = file_->GetChar();
352     sb.Append(c);
353 
354     while (!file_->IsEof()) {
355         c = file_->PeekChar();
356         if (!isdigit(c)) {
357             break;
358         }
359 
360         sb.Append(c);
361         file_->GetChar();
362     }
363 
364     token.kind = TokenType::NUM;
365     token.value = sb.ToString();
366 }
367 
ReadNumSuffix(Token & token)368 void Lexer::ReadNumSuffix(Token &token)
369 {
370     while (!file_->IsEof()) {
371         char c = file_->PeekChar();
372         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
373             token.value += c;
374             file_->GetChar();
375         } else {
376             break;
377         }
378     }
379 }
380 
ReadShiftLeftOp(Token & token)381 void Lexer::ReadShiftLeftOp(Token &token)
382 {
383     char c = file_->GetChar();
384     char next = file_->PeekChar();
385     if (next == '<') {
386         file_->GetChar();
387         token.kind = TokenType::LEFT_SHIFT;
388         token.value = "<<";
389         return;
390     }
391 
392     std::string symbol = StringHelper::Format("%c", c);
393     auto iter = symbols_.find(symbol);
394     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
395     token.value = symbol;
396 }
397 
ReadShiftRightOp(Token & token)398 void Lexer::ReadShiftRightOp(Token &token)
399 {
400     char c = file_->GetChar();
401     char next = file_->PeekChar();
402     if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
403         file_->GetChar();
404         token.kind = TokenType::RIGHT_SHIFT;
405         token.value = ">>";
406         return;
407     }
408 
409     std::string symbol = StringHelper::Format("%c", c);
410     auto iter = symbols_.find(symbol);
411     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
412     token.value = symbol;
413 }
414 
ReadPPlusOp(Token & token)415 void Lexer::ReadPPlusOp(Token &token)
416 {
417     char c = file_->GetChar();
418     char next = file_->PeekChar();
419     if (next == '+') {
420         file_->GetChar();
421         token.kind = TokenType::PPLUS;
422         token.value = "++";
423         return;
424     }
425 
426     std::string symbol = StringHelper::Format("%c", c);
427     auto iter = symbols_.find(symbol);
428     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
429     token.value = symbol;
430 }
431 
ReadMMinusOp(Token & token)432 void Lexer::ReadMMinusOp(Token &token)
433 {
434     char c = file_->GetChar();
435     char next = file_->PeekChar();
436     if (next == '-') {
437         file_->GetChar();
438         token.kind = TokenType::MMINUS;
439         token.value = "--";
440         return;
441     }
442 
443     std::string symbol = StringHelper::Format("%c", c);
444     auto iter = symbols_.find(symbol);
445     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
446     token.value = symbol;
447 }
448 
ReadComment(Token & token)449 void Lexer::ReadComment(Token &token)
450 {
451     char c = file_->GetChar();
452     char next = file_->PeekChar();
453     if (next == '/') {
454         ReadLineComment(token);
455         return;
456     } else if (next == '*') {
457         ReadBlockComment(token);
458         return;
459     }
460 
461     std::string symbol = StringHelper::Format("%c", c);
462     auto iter = symbols_.find(symbol);
463     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
464     token.value = symbol;
465 }
466 
ReadLineComment(Token & token)467 void Lexer::ReadLineComment(Token &token)
468 {
469     StringBuilder sb;
470     char c = file_->GetChar();
471     sb.AppendFormat("/%c", c);
472 
473     while (!file_->IsEof()) {
474         c = file_->GetChar();
475         if (c == '\n') {
476             break;
477         }
478         sb.Append(c);
479     }
480 
481     token.kind = TokenType::COMMENT_LINE;
482     token.value = sb.ToString();
483 }
484 
ReadBlockComment(Token & token)485 void Lexer::ReadBlockComment(Token &token)
486 {
487     StringBuilder sb;
488     char c = file_->GetChar();
489     sb.AppendFormat("/%c", c);
490 
491     while (!file_->IsEof()) {
492         c = file_->GetChar();
493         sb.Append(c);
494 
495         if (c == '*' && file_->PeekChar() == '/') {
496             c = file_->GetChar();
497             sb.Append(c);
498             break;
499         }
500     }
501 
502     token.kind = TokenType::COMMENT_BLOCK;
503     token.value = sb.ToString();
504 }
505 
ReadSymbolToken(Token & token)506 void Lexer::ReadSymbolToken(Token &token)
507 {
508     char c = file_->GetChar();
509     std::string symbol = StringHelper::Format("%c", c);
510     auto iter = symbols_.find(symbol);
511     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
512     token.value = symbol;
513 }
514 } // namespace HDI
515 } // namespace OHOS