1 /*
2 * Copyright (c) 2022-2023 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "lexer/lexer.h"
10 #include "util/logger.h"
11 #include "util/string_builder.h"
12 #include "util/string_helper.h"
13
14 namespace OHOS {
15 namespace HDI {
16 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
17 {"boolean", TokenType::BOOLEAN },
18 {"byte", TokenType::BYTE },
19 {"short", TokenType::SHORT },
20 {"int", TokenType::INT },
21 {"long", TokenType::LONG },
22 {"String", TokenType::STRING },
23 {"float", TokenType::FLOAT },
24 {"double", TokenType::DOUBLE },
25 {"FileDescriptor", TokenType::FD },
26 {"Ashmem", TokenType::ASHMEM },
27 {"NativeBuffer", TokenType::NATIVE_BUFFER},
28 {"Pointer", TokenType::POINTER },
29 {"List", TokenType::LIST },
30 {"Map", TokenType::MAP },
31 {"SharedMemQueue", TokenType::SMQ },
32 {"char", TokenType::CHAR },
33 {"unsigned", TokenType::UNSIGNED },
34 {"enum", TokenType::ENUM },
35 {"struct", TokenType::STRUCT },
36 {"union", TokenType::UNION },
37 {"package", TokenType::PACKAGE },
38 {"sequenceable", TokenType::SEQ },
39 {"import", TokenType::IMPORT },
40 {"interface", TokenType::INTERFACE },
41 {"extends", TokenType::EXTENDS },
42 {"oneway", TokenType::ONEWAY },
43 {"callback", TokenType::CALLBACK },
44 {"full", TokenType::FULL },
45 {"lite", TokenType::LITE },
46 {"mini", TokenType::MINI },
47 {"in", TokenType::IN },
48 {"out", TokenType::OUT },
49 };
50
51 Lexer::StrTokenTypeMap Lexer::symbols_ = {
52 {".", TokenType::DOT },
53 {",", TokenType::COMMA },
54 {":", TokenType::COLON },
55 {"=", TokenType::ASSIGN },
56 {";", TokenType::SEMICOLON },
57 {"{", TokenType::BRACES_LEFT },
58 {"}", TokenType::BRACES_RIGHT },
59 {"[", TokenType::BRACKETS_LEFT },
60 {"]", TokenType::BRACKETS_RIGHT },
61 {"(", TokenType::PARENTHESES_LEFT },
62 {")", TokenType::PARENTHESES_RIGHT },
63 {"<", TokenType::ANGLE_BRACKETS_LEFT },
64 {">", TokenType::ANGLE_BRACKETS_RIGHT},
65 {"+", TokenType::ADD },
66 {"-", TokenType::SUB },
67 {"*", TokenType::STAR },
68 {"/", TokenType::SLASH },
69 {"%", TokenType::PERCENT_SIGN },
70 {"<<", TokenType::LEFT_SHIFT },
71 {">>", TokenType::RIGHT_SHIFT },
72 {"&", TokenType::AND },
73 {"^", TokenType::XOR },
74 {"|", TokenType::OR },
75 {"~", TokenType::TILDE },
76 {"++", TokenType::PPLUS },
77 {"--", TokenType::MMINUS },
78 };
79
Lexer()80 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
81
Reset(const std::string & filePath)82 bool Lexer::Reset(const std::string &filePath)
83 {
84 file_ = std::make_unique<File>(filePath, int(File::READ));
85 if (file_ == nullptr || !file_->IsValid()) {
86 return false;
87 }
88
89 havePeek_ = false;
90 InitCurToken(curToken_);
91 return true;
92 }
93
PeekToken(bool skipComment)94 Token Lexer::PeekToken(bool skipComment)
95 {
96 if (!havePeek_) {
97 ReadToken(curToken_, skipComment);
98 havePeek_ = true;
99 }
100 return curToken_;
101 }
102
GetToken(bool skipComment)103 Token Lexer::GetToken(bool skipComment)
104 {
105 if (!havePeek_) {
106 ReadToken(curToken_, skipComment);
107 }
108 havePeek_ = false;
109 return curToken_;
110 }
111
SkipCurrentLine()112 void Lexer::SkipCurrentLine()
113 {
114 while (!file_->IsEof()) {
115 char c = file_->GetChar();
116 if (c == '\n') {
117 file_->GetChar();
118 break;
119 }
120 }
121 havePeek_ = false;
122 }
123
SkipCurrentLine(char untilChar)124 bool Lexer::SkipCurrentLine(char untilChar)
125 {
126 bool ret = true;
127 while (!file_->IsEof()) {
128 int c = file_->GetChar();
129 if (c == untilChar) {
130 ret = true;
131 break;
132 }
133 if (c == '\n') {
134 file_->GetChar();
135 ret = false;
136 break;
137 }
138 }
139 havePeek_ = false;
140 return ret;
141 }
142
Skip(char untilChar)143 void Lexer::Skip(char untilChar)
144 {
145 while (!file_->IsEof()) {
146 int c = file_->GetChar();
147 if (c == untilChar) {
148 break;
149 }
150 }
151 havePeek_ = false;
152 }
153
SkipToken(TokenType tokenType)154 void Lexer::SkipToken(TokenType tokenType)
155 {
156 while (curToken_.kind != tokenType) {
157 GetToken(false);
158 }
159 }
160
SkipUntilToken(TokenType tokenType)161 void Lexer::SkipUntilToken(TokenType tokenType)
162 {
163 Token token = PeekToken();
164 while (token.kind != tokenType) {
165 GetToken(false);
166 token = PeekToken();
167 }
168 }
169
SkipEof()170 void Lexer::SkipEof()
171 {
172 while (!file_->IsEof()) {}
173 havePeek_ = false;
174 }
175
ReadToken(Token & token,bool skipComment)176 void Lexer::ReadToken(Token &token, bool skipComment)
177 {
178 if (!file_->IsEof()) {
179 InitCurToken(token);
180 }
181 while (!file_->IsEof()) {
182 char c = file_->PeekChar();
183 if (isspace(c)) {
184 file_->GetChar();
185 continue;
186 }
187 token.location.row = file_->GetCharLineNumber();
188 token.location.col = file_->GetCharColumnNumber();
189 if (isalpha(c) || c == '_') {
190 ReadId(token);
191 return;
192 } else if (isdigit(c)) {
193 ReadNum(token);
194 return;
195 } else if (c == '<') {
196 ReadShiftLeftOp(token);
197 return;
198 } else if (c == '>') {
199 ReadShiftRightOp(token);
200 return;
201 } else if (c == '+') {
202 ReadPPlusOp(token);
203 return;
204 } else if (c == '-') {
205 ReadMMinusOp(token);
206 return;
207 } else if (c == '/') {
208 ReadComment(token);
209 if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
210 InitCurToken(token);
211 continue;
212 }
213 return;
214 }
215 ReadSymbolToken(token);
216 return;
217 }
218 token.kind = TokenType::END_OF_FILE;
219 token.value = "";
220 }
221
InitCurToken(Token & token)222 void Lexer::InitCurToken(Token &token)
223 {
224 token.kind = TokenType::UNKNOWN;
225 token.location.filePath = file_->GetPath();
226 token.location.row = 1;
227 token.location.col = 1;
228 token.value = "";
229 }
230
ReadId(Token & token)231 void Lexer::ReadId(Token &token)
232 {
233 char c = file_->GetChar();
234 StringBuilder sb;
235 sb.Append(c);
236 while (!file_->IsEof()) {
237 c = file_->PeekChar();
238 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
239 c = file_->GetChar();
240 sb.Append(c);
241 continue;
242 }
243 if (isspace(c)) {
244 file_->GetChar();
245 }
246 break;
247 }
248
249 std::string key = sb.ToString();
250 auto it = keyWords_.find(key);
251 token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
252 token.value = sb.ToString();
253 }
254
ReadNum(Token & token)255 void Lexer::ReadNum(Token &token)
256 {
257 char c = file_->PeekChar();
258 if (c == '0') {
259 file_->GetChar();
260 c = file_->PeekChar();
261 if (c == 'b' || c == 'B') {
262 // binary number
263 ReadBinaryNum(token);
264 } else if (isdigit(c)) {
265 // octal number
266 return ReadOctNum(token);
267 } else if (c == 'X' || c == 'x') {
268 // hexadecimal number
269 return ReadHexNum(token);
270 } else {
271 // decimal number 0
272 token.kind = TokenType::NUM;
273 token.value = "0";
274 }
275 } else {
276 ReadDecNum(token);
277 }
278 ReadNumSuffix(token);
279 }
280
ReadBinaryNum(Token & token)281 void Lexer::ReadBinaryNum(Token &token)
282 {
283 StringBuilder sb;
284 char c = file_->GetChar(); // read 'b' or 'B'
285 sb.AppendFormat("0%c", c);
286 bool err = true;
287
288 while (!file_->IsEof()) {
289 c = file_->PeekChar();
290 if (c == '0' || c == '1') {
291 sb.Append(c);
292 file_->GetChar();
293 err = false;
294 } else {
295 break;
296 }
297 }
298
299 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
300 token.value = sb.ToString();
301 }
302
ReadOctNum(Token & token)303 void Lexer::ReadOctNum(Token &token)
304 {
305 StringBuilder sb;
306 sb.Append("0");
307 bool err = false;
308
309 while (!file_->IsEof()) {
310 char c = file_->PeekChar();
311 if (!isdigit(c)) {
312 break;
313 }
314
315 if (!(c >= '0' && c <= '7')) {
316 err = true;
317 }
318 sb.Append(c);
319 file_->GetChar();
320 }
321
322 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
323 token.value = sb.ToString();
324 }
325
ReadHexNum(Token & token)326 void Lexer::ReadHexNum(Token &token)
327 {
328 StringBuilder sb;
329 char c = file_->GetChar(); // read 'x' or 'X'
330 sb.AppendFormat("0%c", c);
331 bool err = true;
332
333 while (!file_->IsEof()) {
334 c = file_->PeekChar();
335 if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
336 sb.Append(c);
337 file_->GetChar();
338 err = false;
339 } else {
340 break;
341 }
342 }
343
344 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
345 token.value = sb.ToString();
346 }
347
ReadDecNum(Token & token)348 void Lexer::ReadDecNum(Token &token)
349 {
350 StringBuilder sb;
351 char c = file_->GetChar();
352 sb.Append(c);
353
354 while (!file_->IsEof()) {
355 c = file_->PeekChar();
356 if (!isdigit(c)) {
357 break;
358 }
359
360 sb.Append(c);
361 file_->GetChar();
362 }
363
364 token.kind = TokenType::NUM;
365 token.value = sb.ToString();
366 }
367
ReadNumSuffix(Token & token)368 void Lexer::ReadNumSuffix(Token &token)
369 {
370 while (!file_->IsEof()) {
371 char c = file_->PeekChar();
372 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
373 token.value += c;
374 file_->GetChar();
375 } else {
376 break;
377 }
378 }
379 }
380
ReadShiftLeftOp(Token & token)381 void Lexer::ReadShiftLeftOp(Token &token)
382 {
383 char c = file_->GetChar();
384 char next = file_->PeekChar();
385 if (next == '<') {
386 file_->GetChar();
387 token.kind = TokenType::LEFT_SHIFT;
388 token.value = "<<";
389 return;
390 }
391
392 std::string symbol = StringHelper::Format("%c", c);
393 auto iter = symbols_.find(symbol);
394 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
395 token.value = symbol;
396 }
397
ReadShiftRightOp(Token & token)398 void Lexer::ReadShiftRightOp(Token &token)
399 {
400 char c = file_->GetChar();
401 char next = file_->PeekChar();
402 if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
403 file_->GetChar();
404 token.kind = TokenType::RIGHT_SHIFT;
405 token.value = ">>";
406 return;
407 }
408
409 std::string symbol = StringHelper::Format("%c", c);
410 auto iter = symbols_.find(symbol);
411 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
412 token.value = symbol;
413 }
414
ReadPPlusOp(Token & token)415 void Lexer::ReadPPlusOp(Token &token)
416 {
417 char c = file_->GetChar();
418 char next = file_->PeekChar();
419 if (next == '+') {
420 file_->GetChar();
421 token.kind = TokenType::PPLUS;
422 token.value = "++";
423 return;
424 }
425
426 std::string symbol = StringHelper::Format("%c", c);
427 auto iter = symbols_.find(symbol);
428 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
429 token.value = symbol;
430 }
431
ReadMMinusOp(Token & token)432 void Lexer::ReadMMinusOp(Token &token)
433 {
434 char c = file_->GetChar();
435 char next = file_->PeekChar();
436 if (next == '-') {
437 file_->GetChar();
438 token.kind = TokenType::MMINUS;
439 token.value = "--";
440 return;
441 }
442
443 std::string symbol = StringHelper::Format("%c", c);
444 auto iter = symbols_.find(symbol);
445 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
446 token.value = symbol;
447 }
448
ReadComment(Token & token)449 void Lexer::ReadComment(Token &token)
450 {
451 char c = file_->GetChar();
452 char next = file_->PeekChar();
453 if (next == '/') {
454 ReadLineComment(token);
455 return;
456 } else if (next == '*') {
457 ReadBlockComment(token);
458 return;
459 }
460
461 std::string symbol = StringHelper::Format("%c", c);
462 auto iter = symbols_.find(symbol);
463 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
464 token.value = symbol;
465 }
466
ReadLineComment(Token & token)467 void Lexer::ReadLineComment(Token &token)
468 {
469 StringBuilder sb;
470 char c = file_->GetChar();
471 sb.AppendFormat("/%c", c);
472
473 while (!file_->IsEof()) {
474 c = file_->GetChar();
475 if (c == '\n') {
476 break;
477 }
478 sb.Append(c);
479 }
480
481 token.kind = TokenType::COMMENT_LINE;
482 token.value = sb.ToString();
483 }
484
ReadBlockComment(Token & token)485 void Lexer::ReadBlockComment(Token &token)
486 {
487 StringBuilder sb;
488 char c = file_->GetChar();
489 sb.AppendFormat("/%c", c);
490
491 while (!file_->IsEof()) {
492 c = file_->GetChar();
493 sb.Append(c);
494
495 if (c == '*' && file_->PeekChar() == '/') {
496 c = file_->GetChar();
497 sb.Append(c);
498 break;
499 }
500 }
501
502 token.kind = TokenType::COMMENT_BLOCK;
503 token.value = sb.ToString();
504 }
505
ReadSymbolToken(Token & token)506 void Lexer::ReadSymbolToken(Token &token)
507 {
508 char c = file_->GetChar();
509 std::string symbol = StringHelper::Format("%c", c);
510 auto iter = symbols_.find(symbol);
511 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
512 token.value = symbol;
513 }
514 } // namespace HDI
515 } // namespace OHOS