1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  *
4  * HDF is dual licensed: you can use it either under the terms of
5  * the GPL, or the BSD license, at your option.
6  * See the LICENSE file in the root of this repository for complete details.
7  */
8 
9 #include "lexer.h"
10 
11 #include <sstream>
12 #include <string>
13 
14 #include "logger.h"
15 
16 using namespace OHOS::Hardware;
17 
18 static constexpr int BINARY_NUM = 2;
19 static constexpr int OCTAL_NUM = 8;
20 static constexpr int DECIMAL_NUM = 10;
21 static constexpr int HEX_NUM = 16;
22 
Lexer()23 Lexer::Lexer() : lineno_(0), lineLoc_(0) {}
24 
25 std::map<std::string, TokenType> Lexer::keyWords_ = {
26     {"#include", INCLUDE },
27     {"root",     ROOT    },
28     {"delete",   DELETE  },
29     {"template", TEMPLATE},
30 };
31 
Initialize(const std::string & sourceName)32 bool Lexer::Initialize(const std::string &sourceName)
33 {
34     srcName_ = std::make_shared<std::string>(sourceName);
35 
36     if (src_.is_open()) {
37         src_.close();
38     }
39     bufferStart_ = nullptr;
40     bufferEnd_ = nullptr;
41     lineno_ = 1;
42     lineLoc_ = 1;
43     src_.open(srcName_->c_str(), std::ifstream::binary);
44     if (!src_.is_open()) {
45         Logger().Error() << "Failed to open source file: " << srcName_->data();
46         return false;
47     }
48     return true;
49 }
50 
SetTokenCharacter(char c,Token & token)51 bool Lexer::SetTokenCharacter(char c, Token &token)
52 {
53     switch (c) {
54         case ';': /* fall-through */
55         case ',': /* fall-through */
56         case '[': /* fall-through */
57         case ']': /* fall-through */
58         case '{': /* fall-through */
59         case '}': /* fall-through */
60         case '=': /* fall-through */
61         case '&': /* fall-through */
62         case ':':
63             ConsumeChar();
64             token.type = c;
65             token.lineNo = lineno_;
66             break;
67         case '"':
68             return LexFromString(token);
69         case '+': /* fall-through */
70         case '-':
71             return LexFromNumber(token);
72         case EOF:
73             token.type = EOF;
74             break;
75         default:
76             Logger().Error() << *this << "can not recognized character '" << c << "'";
77             return false;
78     }
79     return true;
80 }
81 
Lex(Token & token)82 bool Lexer::Lex(Token &token)
83 {
84     char c;
85     InitToken(token);
86     do {
87         if (!PeekChar(c, true)) {
88             token.type = EOF;
89             return true;
90         }
91         if (c == '#') {
92             return LexInclude(token);
93         }
94         if (isalpha(c)) {
95             LexFromLiteral(token);
96             return true;
97         }
98 
99         if (IsNum(c)) {
100             return LexFromNumber(token);
101         }
102 
103         if (c == '/') {
104             if (!ProcessComment()) {
105                 return false;
106             }
107             continue;
108         }
109 
110         return SetTokenCharacter(c, token);
111     } while (true);
112 
113     return true;
114 }
115 
GetRawChar()116 char Lexer::GetRawChar()
117 {
118     if (!FillBuffer()) {
119         return EOF;
120     }
121     lineLoc_++;
122     return *bufferStart_++;
123 }
124 
GetChar(char & c,bool skipSpace)125 bool Lexer::GetChar(char &c, bool skipSpace)
126 {
127     char chr = GetRawChar();
128     if (skipSpace) {
129         while (IsSpace(chr)) {
130             chr = GetRawChar();
131         }
132     }
133 
134     if (chr == '\n') {
135         lineno_++;
136         lineLoc_ = 0;
137     }
138     c = chr;
139     return chr != EOF;
140 }
141 
PeekChar(char & c,bool skipSpace)142 bool Lexer::PeekChar(char &c, bool skipSpace)
143 {
144     if (!FillBuffer()) {
145         return false;
146     }
147 
148     if (skipSpace) {
149         while (bufferStart_ <= bufferEnd_ && (IsSpace(*bufferStart_) || *bufferStart_ == '\n')) {
150             lineLoc_++;
151             if (*bufferStart_ == '\n') {
152                 lineLoc_ = 0;
153                 lineno_++;
154             }
155             bufferStart_++;
156         }
157     }
158 
159     if (bufferStart_ > bufferEnd_) {
160         return false;
161     }
162     c = *bufferStart_;
163     return true;
164 }
165 
IsSpace(char c)166 bool Lexer::IsSpace(char c)
167 {
168     return c == ' ' || c == '\t' || c == '\r';
169 }
170 
FillBuffer()171 bool Lexer::FillBuffer()
172 {
173     if (bufferStart_ != nullptr && bufferStart_ <= bufferEnd_) {
174         return true;
175     }
176     auto size = src_.readsome(buffer_, BUFFER_SIZE);
177     if (size == 0) {
178         return false;
179     }
180     bufferStart_ = buffer_;
181     bufferEnd_ = bufferStart_ + size - 1;
182     return true;
183 }
184 
ProcessComment()185 bool Lexer::ProcessComment()
186 {
187     char c = 0;
188     ConsumeChar(); // skip first '/'
189     if (!GetChar(c)) {
190         Logger().Error() << *this << "unterminated comment";
191         return false;
192     }
193 
194     if (c == '/') {
195         while (c != '\n' && GetChar(c)) {}
196         if (c != '\n' && c != EOF) {
197             Logger().Error() << *this << "unterminated signal line comment";
198             return false;
199         }
200     } else if (c == '*') {
201         while (GetChar(c)) {
202             if (c == '*' && GetChar(c) && c == '/') {
203                 return true;
204             }
205         }
206         if (c != '/') {
207             Logger().Error() << *this << "unterminated multi-line comment";
208             return false;
209         }
210     } else {
211         Logger().Error() << *this << "invalid character";
212         return false;
213     }
214 
215     return true;
216 }
217 
GetSourceName() const218 std::shared_ptr<std::string> Lexer::GetSourceName() const
219 {
220     return srcName_;
221 }
222 
GetLineno() const223 int32_t Lexer::GetLineno() const
224 {
225     return lineno_;
226 }
227 
GetLineLoc() const228 int32_t Lexer::GetLineLoc() const
229 {
230     return lineLoc_;
231 }
232 
operator <<(std::ostream & stream,const Lexer & p)233 std::ostream &OHOS::Hardware::operator<<(std::ostream &stream, const Lexer &p)
234 {
235     return stream << p.GetSourceName()->data() << ":" << p.GetLineno() << ":" << p.GetLineLoc() << ": ";
236 }
237 
InitToken(Token & token) const238 void Lexer::InitToken(Token &token) const
239 {
240     token.type = 0;
241     token.numval = 0;
242     token.strval.clear();
243     token.src = srcName_;
244     token.lineNo = lineno_;
245 }
246 
LexFromString(Token & token)247 bool Lexer::LexFromString(Token &token)
248 {
249     char c;
250     GetChar(c, false); // skip first '"'
251     std::string value;
252     while (GetChar(c, false) && c != '"') {
253         if (c == '\\') {
254             GetChar(c, false);
255             if (c != '\"') {
256                 Logger().Error() << *this << "Invalid escape character.";
257                 return false;
258             }
259         }
260         value.push_back(c);
261     }
262 
263     if (c != '"') {
264         Logger().Error() << *this << "unterminated string";
265         return false;
266     }
267     token.type = STRING;
268     token.strval = std::move(value);
269     token.lineNo = lineno_;
270     return true;
271 }
272 
LexHexAndBinaryNum(std::string & value,char & c,uint64_t & v)273 void Lexer::LexHexAndBinaryNum(std::string &value, char &c, uint64_t &v)
274 {
275     switch (c) {
276         case 'x': // fall-through
277         case 'X': // hex number
278             ConsumeChar();
279             while (PeekChar(c, false) && (IsNum(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
280                 value.push_back(c);
281                 ConsumeChar();
282             }
283             v = static_cast<uint64_t>(strtoll(value.data(), nullptr, HEX_NUM));
284             break;
285         case 'b': // binary number
286             ConsumeChar();
287             while (PeekChar(c, false) && (c == '0' || c == '1')) {
288                 value.push_back(c);
289                 ConsumeChar();
290             }
291             v = static_cast<uint64_t>(strtoll(value.data(), nullptr, BINARY_NUM));
292             break;
293         default:; // fall-through
294     }
295 }
296 
LexFromNumber(Token & token)297 bool Lexer::LexFromNumber(Token &token)
298 {
299     std::string value;
300     char c = 0;
301     uint64_t v = 0;
302     errno = 0;
303 
304     GetChar(c, false);
305     switch (c) {
306         case '0':
307             if (!PeekChar(c, true)) {
308                 break;
309             }
310 
311             if (IsNum(c)) { // Octal number
312                 while (PeekChar(c) && IsNum(c)) {
313                     ConsumeChar();
314                     value.push_back(c);
315                 }
316                 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, OCTAL_NUM));
317                 break;
318             }
319             LexHexAndBinaryNum(value, c, v);
320             break;
321         case '+': // fall-through
322         case '-': // fall-through, signed decimal number
323         default:  // unsigned decimal number
324             value.push_back(c);
325             while (PeekChar(c, true) && IsNum(c)) {
326                 ConsumeChar();
327                 value.push_back(c);
328             }
329             v = static_cast<uint64_t>(strtoll(value.data(), nullptr, DECIMAL_NUM));
330             break;
331     }
332 
333     if (errno != 0) {
334         Logger().Error() << *this << "illegal number: " << value.data();
335         return false;
336     }
337     token.type = NUMBER;
338     token.numval = v;
339     token.lineNo = lineno_;
340     return true;
341 }
342 
LexFromLiteral(Token & token)343 void Lexer::LexFromLiteral(Token &token)
344 {
345     std::string value;
346     char c;
347 
348     while (PeekChar(c, false) && !IsSpace(c)) {
349         if (!isalnum(c) && c != '_' && c != '.' && c != '\\') {
350             break;
351         }
352         value.push_back(c);
353         ConsumeChar();
354     }
355 
356     do {
357         if (value == "true") {
358             token.type = NUMBER;
359             token.numval = 1;
360             break;
361         } else if (value == "false") {
362             token.type = NUMBER;
363             token.numval = 0;
364             break;
365         }
366         auto keyword = keyWords_.find(value);
367         if (keyword != keyWords_.end()) {
368             token.type = keyword->second;
369             break;
370         }
371 
372         if (value.find('.') != std::string::npos) {
373             token.type = REF_PATH;
374         } else {
375             token.type = LITERAL;
376         }
377     } while (false);
378 
379     token.strval = std::move(value);
380     token.lineNo = lineno_;
381 }
382 
ConsumeChar()383 void Lexer::ConsumeChar()
384 {
385     char c;
386     (void)GetChar(c, false);
387 }
388 
IsNum(char c)389 bool Lexer::IsNum(char c)
390 {
391     return c >= '0' && c <= '9';
392 }
393 
LexInclude(Token & token)394 bool Lexer::LexInclude(Token &token)
395 {
396     ConsumeChar();
397     LexFromLiteral(token);
398     if (token.strval != "include") {
399         return false;
400     }
401 
402     token.type = INCLUDE;
403     return true;
404 }
405