1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "parser/lexer.h"
17 
18 #include <utility>
19 #include <cstdlib>
20 #include <limits>
21 #include "util/string_builder.h"
22 
23 namespace OHOS {
24 namespace Idl {
Lexer()25 Lexer::Lexer()
26 {
27     InitializeKeywords();
28 }
29 
~Lexer()30 Lexer::~Lexer()
31 {
32     if (currentFile_ != nullptr) {
33         currentFile_->Close();
34     }
35 }
36 
InitializeKeywords()37 void Lexer::InitializeKeywords()
38 {
39     keywords_[String("boolean")] = Token::BOOLEAN;
40     keywords_[String("byte")] = Token::BYTE;
41     keywords_[String("char")] = Token::CHAR;
42     keywords_[String("double")] = Token::DOUBLE;
43     keywords_[String("float")] = Token::FLOAT;
44     keywords_[String("in")] = Token::IN;
45     keywords_[String("inout")] = Token::INOUT;
46     keywords_[String("int")] = Token::INTEGER;
47     keywords_[String("interface")] = Token::INTERFACE;
48     keywords_[String("List")] = Token::LIST;
49     keywords_[String("long")] = Token::LONG;
50     keywords_[String("Map")] = Token::MAP;
51     keywords_[String("oneway")] = Token::ONEWAY;
52     keywords_[String("out")] = Token::OUT;
53     keywords_[String("sequenceable")] = Token::SEQUENCEABLE;
54     keywords_[String("short")] = Token::SHORT;
55     keywords_[String("String")] = Token::STRING;
56     keywords_[String("cacheable")] = Token::CACHEABLE;
57 }
58 
OpenSourceFile(const String & filePath)59 bool Lexer::OpenSourceFile(const String& filePath)
60 {
61     currentFile_ = std::make_shared<File>(filePath, File::READ);
62     if (!currentFile_->IsValid()) {
63         return false;
64     }
65 
66     return true;
67 }
68 
GetToken(bool skipComment)69 Token Lexer::GetToken(bool skipComment)
70 {
71     if (!havePeek_) {
72         currentToken_ = ReadToken(skipComment);
73     }
74     havePeek_ = false;
75     return currentToken_;
76 }
77 
PeekToken(bool skipComment)78 Token Lexer::PeekToken(bool skipComment)
79 {
80     if (!havePeek_) {
81         currentToken_ = ReadToken(skipComment);
82         havePeek_ = true;
83     }
84     return currentToken_;
85 }
86 
ReadToken(bool skipComment)87 Token Lexer::ReadToken(bool skipComment)
88 {
89     while (!currentFile_->IsEof()) {
90         char c = currentFile_->GetChar();
91         tokenLineNo_ = currentFile_->GetCharLineNumber();
92         tokenColumnNo_ = currentFile_->GetCharColumnNumber();
93         if (IsSpace(c)) {
94             continue;
95         } else if (IsAlphabet(c) || c == '_') {
96             return ReadIdentifier(c);
97         }
98         switch (c) {
99             case '<':
100             case '>':
101             case '{':
102             case '}':
103             case '[':
104             case ']':
105             case ',':
106             case '(':
107             case ')':
108             case '.':
109             case ';':
110                 currentToken_ = token_map_[c];
111                 return currentToken_;
112             case '/':
113                 if (ReadTokenPeek(skipComment, c)) {
114                     return currentToken_;
115                 } else {
116                     continue;
117                 }
118             default:
119                 currentToken_ = Token::UNKNOWN;
120                 return currentToken_;
121         }
122     }
123     currentToken_ = Token::END_OF_FILE;
124     return currentToken_;
125 }
126 
ReadTokenPeek(bool skipComment,char letter)127 bool Lexer::ReadTokenPeek(bool skipComment, char letter)
128 {
129     if (currentFile_->PeekChar() == '/') {
130         ReadLineComment(letter);
131         if (!skipComment) {
132             return true;
133         }
134         return false;
135     } else if (currentFile_->PeekChar() == '*') {
136         ReadBlockComment(letter);
137         if (!skipComment) {
138             return true;
139         }
140         return false;
141     }
142     currentToken_ = Token::UNKNOWN;
143     return true;
144 }
145 
ReadIdentifier(char c)146 Token Lexer::ReadIdentifier(char c)
147 {
148     StringBuilder sb;
149 
150     sb.Append(c);
151     while (!currentFile_->IsEof()) {
152         c = currentFile_->PeekChar();
153         if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') {
154             c = currentFile_->GetChar();
155             sb.Append(c);
156             continue;
157         }
158         if (IsSpace(c)) {
159             currentFile_->GetChar();
160         }
161         break;
162     }
163     String key = sb.ToString();
164     auto it = keywords_.find(key);
165     if (it == keywords_.end()) {
166         identifier_ = key;
167         currentToken_ = Token::IDENTIFIER;
168     } else {
169         currentToken_ = it->second;
170     }
171     return currentToken_;
172 }
173 
ReadLineComment(char c)174 Token Lexer::ReadLineComment(char c)
175 {
176     StringBuilder sb;
177 
178     sb.Append(c);
179     while (!currentFile_->IsEof()) {
180         c = currentFile_->GetChar();
181         if (c == '\n') {
182             break;
183         }
184         sb.Append(c);
185     }
186     comment_ = sb.ToString();
187     currentToken_ = Token::COMMENT_LINE;
188     return currentToken_;
189 }
190 
ReadBlockComment(char c)191 Token Lexer::ReadBlockComment(char c)
192 {
193     StringBuilder sb;
194 
195     sb.Append(c);
196     while (!currentFile_->IsEof()) {
197         c = currentFile_->GetChar();
198         sb.Append(c);
199         if (c == '*' && currentFile_->PeekChar() == '/') {
200             c = currentFile_->GetChar();
201             sb.Append(c);
202             break;
203         }
204     }
205     comment_ = sb.ToString();
206     currentToken_ = Token::COMMENT_BLOCK;
207     return currentToken_;
208 }
209 
SkipCurrentLine()210 void Lexer::SkipCurrentLine()
211 {
212     while (!currentFile_->IsEof()) {
213         char c = currentFile_->GetChar();
214         if (c == '\n') {
215             currentFile_->GetChar();
216             return;
217         }
218     }
219 }
220 
SkipCurrentLine(char untilChar)221 bool Lexer::SkipCurrentLine(char untilChar)
222 {
223     while (!currentFile_->IsEof()) {
224         int c = currentFile_->GetChar();
225         if (c == untilChar) {
226             return true;
227         }
228         if (c == '\n') {
229             currentFile_->GetChar();
230             return false;
231         }
232     }
233     return true;
234 }
235 
TokenToChar(Token token)236 int Lexer::TokenToChar(Token token)
237 {
238     switch (token) {
239         case Token::ANGLE_BRACKETS_LEFT:
240             return '<';
241         case Token::ANGLE_BRACKETS_RIGHT:
242             return '>';
243         case Token::BRACES_LEFT:
244             return '{';
245         case Token::BRACES_RIGHT:
246             return '}';
247         case Token::BRACKETS_LEFT:
248             return '[';
249         case Token::BRACKETS_RIGHT:
250             return ']';
251         case Token::COMMA:
252             return ',';
253         case Token::DOT:
254             return '.';
255         case Token::PARENTHESES_LEFT:
256             return '(';
257         case Token::PARENTHESES_RIGHT:
258             return ')';
259         case Token::SEMICOLON:
260             return ';';
261         default:
262             return -1;
263     }
264 }
265 
DumpToken() const266 String Lexer::DumpToken() const
267 {
268     switch (currentToken_) {
269         case Token::ANGLE_BRACKETS_LEFT:
270             return "<";
271         case Token::ANGLE_BRACKETS_RIGHT:
272             return ">";
273         case Token::BOOLEAN:
274             return "boolean";
275         case Token::BRACES_LEFT:
276             return "{";
277         case Token::BRACES_RIGHT:
278             return "}";
279         case Token::BRACKETS_LEFT:
280             return "[";
281         case Token::BRACKETS_RIGHT:
282             return "]";
283         case Token::BYTE:
284             return "byte";
285         case Token::CHAR:
286             return "char";
287         case Token::COMMA:
288             return ",";
289         case Token::COMMENT_BLOCK:
290         case Token::COMMENT_LINE:
291             return comment_;
292         case Token::DOT:
293             return ".";
294         default:
295             return DumpTokenSecond();
296     }
297 }
298 
DumpTokenSecond() const299 String Lexer::DumpTokenSecond() const
300 {
301     switch (currentToken_) {
302         case Token::DOUBLE:
303             return "double";
304         case Token::END_OF_FILE:
305             return "eof";
306         case Token::FLOAT:
307             return "float";
308         case Token::IDENTIFIER:
309             return identifier_;
310         case Token::IN:
311             return "in";
312         case Token::INOUT:
313             return "inout";
314         case Token::INTEGER:
315             return "int";
316         case Token::LIST:
317             return "List";
318         case Token::LONG:
319             return "long";
320         case Token::MAP:
321             return "Map";
322         case Token::ONEWAY:
323             return "oneway";
324         case Token::OUT:
325             return "out";
326         case Token::SEQUENCEABLE:
327             return "sequenceable";
328         case Token::PARENTHESES_LEFT:
329             return "(";
330         case Token::PARENTHESES_RIGHT:
331             return ")";
332         case Token::SEMICOLON:
333             return ";";
334         case Token::SHORT:
335             return "short";
336         case Token::STRING:
337             return "String";
338         default:
339             return "unknown token";
340     }
341 }
342 
strToInt(const char * str,int strLen,int & number)343 bool Lexer::strToInt(const char *str, int strLen, int& number)
344 {
345     int result = 0;
346     int positionWeight = 1;
347     const int ten = 10;
348 
349     for (int i = strLen - 1; i >= 0; i--) {
350         if (str[i] < '0' || str[i] > '9') {
351             return false;
352         }
353         int digit = str[i] - '0';
354         if (static_cast<int64_t>(digit * positionWeight) > std::numeric_limits<int32_t>::max() - result) {
355             return false;
356         }
357         result += digit * positionWeight;
358         positionWeight *= ten;
359     }
360     number = result;
361     return true;
362 }
363 
ParseCacheable(int & cacheTime)364 bool Lexer::ParseCacheable(int& cacheTime)
365 {
366     bool ret = true;
367     StringBuilder numbersb;
368 
369     while (!currentFile_->IsEof()) {
370         char c = currentFile_->PeekChar();
371         if (IsSpace(c)) {
372             currentFile_->GetChar();
373             continue;
374         }
375         if (!IsDecimalDigital(c)) {
376             if (c != ']' && c != ',') {
377                 ret = false;
378             }
379             break;
380         }
381         numbersb.Append(c);
382         currentFile_->GetChar();
383     }
384 
385     if (ret == false) {
386         return ret;
387     }
388     String numberStr = numbersb.ToString();
389     if (numberStr.IsNull()) {
390         return false;
391     }
392 
393     ret = strToInt(numberStr.string(), numberStr.GetLength(), cacheTime);
394     return ret;
395 }
396 
397 } // namespace idl
398 } // namespace OHOS
399