1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "lexer.h"
10
11 #include <sstream>
12 #include <string>
13
14 #include "logger.h"
15
16 using namespace OHOS::Hardware;
17
18 static constexpr int BINARY_NUM = 2;
19 static constexpr int OCTAL_NUM = 8;
20 static constexpr int DECIMAL_NUM = 10;
21 static constexpr int HEX_NUM = 16;
22
Lexer()23 Lexer::Lexer() : lineno_(0), lineLoc_(0) {}
24
25 std::map<std::string, TokenType> Lexer::keyWords_ = {
26 {"#include", INCLUDE },
27 {"root", ROOT },
28 {"delete", DELETE },
29 {"template", TEMPLATE},
30 };
31
Initialize(const std::string & sourceName)32 bool Lexer::Initialize(const std::string &sourceName)
33 {
34 srcName_ = std::make_shared<std::string>(sourceName);
35
36 if (src_.is_open()) {
37 src_.close();
38 }
39 bufferStart_ = nullptr;
40 bufferEnd_ = nullptr;
41 lineno_ = 1;
42 lineLoc_ = 1;
43 src_.open(srcName_->c_str(), std::ifstream::binary);
44 if (!src_.is_open()) {
45 Logger().Error() << "Failed to open source file: " << srcName_->data();
46 return false;
47 }
48 return true;
49 }
50
SetTokenCharacter(char c,Token & token)51 bool Lexer::SetTokenCharacter(char c, Token &token)
52 {
53 switch (c) {
54 case ';': /* fall-through */
55 case ',': /* fall-through */
56 case '[': /* fall-through */
57 case ']': /* fall-through */
58 case '{': /* fall-through */
59 case '}': /* fall-through */
60 case '=': /* fall-through */
61 case '&': /* fall-through */
62 case ':':
63 ConsumeChar();
64 token.type = c;
65 token.lineNo = lineno_;
66 break;
67 case '"':
68 return LexFromString(token);
69 case '+': /* fall-through */
70 case '-':
71 return LexFromNumber(token);
72 case EOF:
73 token.type = EOF;
74 break;
75 default:
76 Logger().Error() << *this << "can not recognized character '" << c << "'";
77 return false;
78 }
79 return true;
80 }
81
Lex(Token & token)82 bool Lexer::Lex(Token &token)
83 {
84 char c;
85 InitToken(token);
86 do {
87 if (!PeekChar(c, true)) {
88 token.type = EOF;
89 return true;
90 }
91 if (c == '#') {
92 return LexInclude(token);
93 }
94 if (isalpha(c)) {
95 LexFromLiteral(token);
96 return true;
97 }
98
99 if (IsNum(c)) {
100 return LexFromNumber(token);
101 }
102
103 if (c == '/') {
104 if (!ProcessComment()) {
105 return false;
106 }
107 continue;
108 }
109
110 return SetTokenCharacter(c, token);
111 } while (true);
112
113 return true;
114 }
115
GetRawChar()116 char Lexer::GetRawChar()
117 {
118 if (!FillBuffer()) {
119 return EOF;
120 }
121 lineLoc_++;
122 return *bufferStart_++;
123 }
124
GetChar(char & c,bool skipSpace)125 bool Lexer::GetChar(char &c, bool skipSpace)
126 {
127 char chr = GetRawChar();
128 if (skipSpace) {
129 while (IsSpace(chr)) {
130 chr = GetRawChar();
131 }
132 }
133
134 if (chr == '\n') {
135 lineno_++;
136 lineLoc_ = 0;
137 }
138 c = chr;
139 return chr != EOF;
140 }
141
PeekChar(char & c,bool skipSpace)142 bool Lexer::PeekChar(char &c, bool skipSpace)
143 {
144 if (!FillBuffer()) {
145 return false;
146 }
147
148 if (skipSpace) {
149 while (bufferStart_ <= bufferEnd_ && (IsSpace(*bufferStart_) || *bufferStart_ == '\n')) {
150 lineLoc_++;
151 if (*bufferStart_ == '\n') {
152 lineLoc_ = 0;
153 lineno_++;
154 }
155 bufferStart_++;
156 }
157 }
158
159 if (bufferStart_ > bufferEnd_) {
160 return false;
161 }
162 c = *bufferStart_;
163 return true;
164 }
165
IsSpace(char c)166 bool Lexer::IsSpace(char c)
167 {
168 return c == ' ' || c == '\t' || c == '\r';
169 }
170
FillBuffer()171 bool Lexer::FillBuffer()
172 {
173 if (bufferStart_ != nullptr && bufferStart_ <= bufferEnd_) {
174 return true;
175 }
176 auto size = src_.readsome(buffer_, BUFFER_SIZE);
177 if (size == 0) {
178 return false;
179 }
180 bufferStart_ = buffer_;
181 bufferEnd_ = bufferStart_ + size - 1;
182 return true;
183 }
184
ProcessComment()185 bool Lexer::ProcessComment()
186 {
187 char c = 0;
188 ConsumeChar(); // skip first '/'
189 if (!GetChar(c)) {
190 Logger().Error() << *this << "unterminated comment";
191 return false;
192 }
193
194 if (c == '/') {
195 while (c != '\n' && GetChar(c)) {}
196 if (c != '\n' && c != EOF) {
197 Logger().Error() << *this << "unterminated signal line comment";
198 return false;
199 }
200 } else if (c == '*') {
201 while (GetChar(c)) {
202 if (c == '*' && GetChar(c) && c == '/') {
203 return true;
204 }
205 }
206 if (c != '/') {
207 Logger().Error() << *this << "unterminated multi-line comment";
208 return false;
209 }
210 } else {
211 Logger().Error() << *this << "invalid character";
212 return false;
213 }
214
215 return true;
216 }
217
GetSourceName() const218 std::shared_ptr<std::string> Lexer::GetSourceName() const
219 {
220 return srcName_;
221 }
222
GetLineno() const223 int32_t Lexer::GetLineno() const
224 {
225 return lineno_;
226 }
227
GetLineLoc() const228 int32_t Lexer::GetLineLoc() const
229 {
230 return lineLoc_;
231 }
232
operator <<(std::ostream & stream,const Lexer & p)233 std::ostream &OHOS::Hardware::operator<<(std::ostream &stream, const Lexer &p)
234 {
235 return stream << p.GetSourceName()->data() << ":" << p.GetLineno() << ":" << p.GetLineLoc() << ": ";
236 }
237
InitToken(Token & token) const238 void Lexer::InitToken(Token &token) const
239 {
240 token.type = 0;
241 token.numval = 0;
242 token.strval.clear();
243 token.src = srcName_;
244 token.lineNo = lineno_;
245 }
246
LexFromString(Token & token)247 bool Lexer::LexFromString(Token &token)
248 {
249 char c;
250 GetChar(c, false); // skip first '"'
251 std::string value;
252 while (GetChar(c, false) && c != '"') {
253 if (c == '\\') {
254 GetChar(c, false);
255 if (c != '\"') {
256 Logger().Error() << *this << "Invalid escape character.";
257 return false;
258 }
259 }
260 value.push_back(c);
261 }
262
263 if (c != '"') {
264 Logger().Error() << *this << "unterminated string";
265 return false;
266 }
267 token.type = STRING;
268 token.strval = std::move(value);
269 token.lineNo = lineno_;
270 return true;
271 }
272
LexHexAndBinaryNum(std::string & value,char & c,uint64_t & v)273 void Lexer::LexHexAndBinaryNum(std::string &value, char &c, uint64_t &v)
274 {
275 switch (c) {
276 case 'x': // fall-through
277 case 'X': // hex number
278 ConsumeChar();
279 while (PeekChar(c, false) && (IsNum(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
280 value.push_back(c);
281 ConsumeChar();
282 }
283 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, HEX_NUM));
284 break;
285 case 'b': // binary number
286 ConsumeChar();
287 while (PeekChar(c, false) && (c == '0' || c == '1')) {
288 value.push_back(c);
289 ConsumeChar();
290 }
291 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, BINARY_NUM));
292 break;
293 default:; // fall-through
294 }
295 }
296
LexFromNumber(Token & token)297 bool Lexer::LexFromNumber(Token &token)
298 {
299 std::string value;
300 char c = 0;
301 uint64_t v = 0;
302 errno = 0;
303
304 GetChar(c, false);
305 switch (c) {
306 case '0':
307 if (!PeekChar(c, true)) {
308 break;
309 }
310
311 if (IsNum(c)) { // Octal number
312 while (PeekChar(c) && IsNum(c)) {
313 ConsumeChar();
314 value.push_back(c);
315 }
316 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, OCTAL_NUM));
317 break;
318 }
319 LexHexAndBinaryNum(value, c, v);
320 break;
321 case '+': // fall-through
322 case '-': // fall-through, signed decimal number
323 default: // unsigned decimal number
324 value.push_back(c);
325 while (PeekChar(c, true) && IsNum(c)) {
326 ConsumeChar();
327 value.push_back(c);
328 }
329 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, DECIMAL_NUM));
330 break;
331 }
332
333 if (errno != 0) {
334 Logger().Error() << *this << "illegal number: " << value.data();
335 return false;
336 }
337 token.type = NUMBER;
338 token.numval = v;
339 token.lineNo = lineno_;
340 return true;
341 }
342
LexFromLiteral(Token & token)343 void Lexer::LexFromLiteral(Token &token)
344 {
345 std::string value;
346 char c;
347
348 while (PeekChar(c, false) && !IsSpace(c)) {
349 if (!isalnum(c) && c != '_' && c != '.' && c != '\\') {
350 break;
351 }
352 value.push_back(c);
353 ConsumeChar();
354 }
355
356 do {
357 if (value == "true") {
358 token.type = NUMBER;
359 token.numval = 1;
360 break;
361 } else if (value == "false") {
362 token.type = NUMBER;
363 token.numval = 0;
364 break;
365 }
366 auto keyword = keyWords_.find(value);
367 if (keyword != keyWords_.end()) {
368 token.type = keyword->second;
369 break;
370 }
371
372 if (value.find('.') != std::string::npos) {
373 token.type = REF_PATH;
374 } else {
375 token.type = LITERAL;
376 }
377 } while (false);
378
379 token.strval = std::move(value);
380 token.lineNo = lineno_;
381 }
382
ConsumeChar()383 void Lexer::ConsumeChar()
384 {
385 char c;
386 (void)GetChar(c, false);
387 }
388
IsNum(char c)389 bool Lexer::IsNum(char c)
390 {
391 return c >= '0' && c <= '9';
392 }
393
LexInclude(Token & token)394 bool Lexer::LexInclude(Token &token)
395 {
396 ConsumeChar();
397 LexFromLiteral(token);
398 if (token.strval != "include") {
399 return false;
400 }
401
402 token.type = INCLUDE;
403 return true;
404 }
405