1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "schema_utils.h"
17
18 #include <cctype>
19 #include <cfloat>
20 #include <cmath>
21 #include <map>
22
23 #include "db_errno.h"
24 #include "log_print.h"
25 #include "schema_constant.h"
26
27 namespace DistributedDB {
28 namespace {
IsLegalFieldCharacter(char character)29 bool IsLegalFieldCharacter(char character)
30 {
31 return (std::isalnum(character) || character == '_');
32 }
TrimFiled(std::string & inString)33 void TrimFiled(std::string &inString)
34 {
35 inString.erase(0, inString.find_first_not_of("\r\t "));
36 size_t temp = inString.find_last_not_of("\r\t ");
37 if (temp < inString.size()) {
38 inString.erase(temp + 1);
39 }
40 }
41
42 // TYPE, [NOT NULL,] [DEFAULT X]
43 // DEFAULT at last
44 // State transition matrix
45 const int STATE_TRANSFER[8][6] = { // 5 type input and 7 type state
46 // blank, NOT NULL, DEFAULT, OTHER AlNUM, COMMA
47 {0, -1, -1, 1, -1}, // state 0: empty
48 {1, -1, -1, 1, 2}, // state 1: only type
49 {2, 3, 5, -1, -1}, // state 2: alnum ,
50 {3, -1, -1, -1, 4}, // state 3: alnum , notnull
51 {4, -1, 5, -1, -1}, // state 4: alnum , notnull ,
52 {6, -1, -1, -1, -1}, // state 5: finish with DEFAULT
53 {6, -1, -1, 7, -1}, // state 6: finish with DEFAULT and blank
54 {7, 7, 7, 7, 7}, // state 7: finish with DEFAULT and blank and no matter what value
55 };
56 enum StateTransferColNum {
57 COLUMN_ILLEGAL = -1,
58 COLUMN_BLANK,
59 COLUMN_NOT_NULL,
60 COLUMN_DEFAULT,
61 COLUMN_OTHER_ALNUM,
62 COLUMN_COMMA,
63 };
64 } // namespace
65
66 // compare function can make sure not to cross the border, pos < oriContent.size() - 1
67 // Get symbol type and Converts to the corresponding column of the state transition matrix
MakeTrans(const std::string & oriContent,size_t & pos)68 int SchemaUtils::MakeTrans(const std::string &oriContent, size_t &pos)
69 {
70 if (isspace(oriContent[pos])) {
71 return COLUMN_BLANK;
72 } else if (oriContent.compare(pos, SchemaConstant::KEYWORD_ATTR_NOT_NULL.size(),
73 SchemaConstant::KEYWORD_ATTR_NOT_NULL) == 0) {
74 pos = pos + SchemaConstant::KEYWORD_ATTR_NOT_NULL.size() - 1;
75 return COLUMN_NOT_NULL;
76 } else if (oriContent.compare(pos, SchemaConstant::KEYWORD_ATTR_DEFAULT.size(),
77 SchemaConstant::KEYWORD_ATTR_DEFAULT) == 0) {
78 pos = pos + SchemaConstant::KEYWORD_ATTR_DEFAULT.size() - 1;
79 return COLUMN_DEFAULT;
80 } else if (std::isalnum(oriContent[pos]) || oriContent[pos] == '\'' ||
81 oriContent[pos] == '+' || oriContent[pos] == '-') {
82 return COLUMN_OTHER_ALNUM;
83 } else if (oriContent[pos] == ',') {
84 return COLUMN_COMMA;
85 } else {
86 return COLUMN_ILLEGAL;
87 }
88 }
89
90 // Use DFA to check and Parsing
91 // You can get the corresponding state meaning in the state transition matrix STATE_TRANSFER
SplitSchemaAttribute(const std::string & inAttrString,std::vector<std::string> & outAttrString)92 int SchemaUtils::SplitSchemaAttribute(const std::string &inAttrString, std::vector<std::string> &outAttrString)
93 {
94 int state = 0;
95 outAttrString.resize(3); // attribute have 3 type keywords
96 for (size_t i = 0; i < inAttrString.size(); i++) {
97 int id = MakeTrans(inAttrString, i);
98 if (id < 0) {
99 LOGD("Split Schema Attribute err, Contains unrecognized content [%c]", inAttrString[i]);
100 return -E_SCHEMA_PARSE_FAIL;
101 }
102 state = STATE_TRANSFER[state][id];
103 if (state < 0) {
104 LOGD("Split Schema Attribute err, err state [%d]", state);
105 return -E_SCHEMA_PARSE_FAIL;
106 }
107 switch (state) {
108 case 1: // state 1 :Indicates that only type information is currently available
109 outAttrString[0].push_back(inAttrString[i]);
110 break;
111 case 3: // state 3 :Gets the NOT_NULL keyword
112 outAttrString[1] = SchemaConstant::KEYWORD_ATTR_NOT_NULL;
113 break;
114 case 7: // state 7 :Contains complete information
115 // Get default string. Now transfer matrix can ensure > 1, but you should pay attention when fix it
116 if (i <= 1) {
117 LOGE("default string size must be over 1.");
118 return -E_SCHEMA_PARSE_FAIL;
119 }
120 outAttrString[2] = inAttrString.substr(i - 1); // 2 is index
121 return E_OK;
122 default:
123 break;
124 }
125 }
126 // Only these states are legal, The meaning of the state can be seen in the matrix STATE_TRANSFER explanation
127 if (!(state == 1 || state == 3 || state == 7)) { // 1 is state; 3 is state; 7 is state;
128 LOGD("Split Schema Attribute err, err state [%d]", state);
129 return -E_SCHEMA_PARSE_FAIL;
130 }
131 return E_OK;
132 }
133
TransToBool(const std::string & defaultContent,SchemaAttribute & outAttr)134 int SchemaUtils::TransToBool(const std::string &defaultContent, SchemaAttribute &outAttr)
135 {
136 // Have been trim
137 if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_TRUE) == 0) {
138 outAttr.defaultValue.boolValue = true;
139 return E_OK;
140 } else if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_FALSE) == 0) {
141 outAttr.defaultValue.boolValue = false;
142 return E_OK;
143 }
144 LOGE("Default value can not transform to bool!!");
145 return -E_SCHEMA_PARSE_FAIL;
146 }
147
TransToString(const std::string & defaultContent,SchemaAttribute & outAttr)148 int SchemaUtils::TransToString(const std::string &defaultContent, SchemaAttribute &outAttr)
149 {
150 // Have been trim, Strip leading and trailing '
151 if (defaultContent.size() > 1 && defaultContent.front() == '\'' && defaultContent.back() == '\'') {
152 outAttr.defaultValue.stringValue = defaultContent.substr(1, defaultContent.size() - 2); // 2: trim trailing
153 if (outAttr.defaultValue.stringValue.size() > SchemaConstant::SCHEMA_DEFAULT_STRING_SIZE_LIMIT) {
154 return -E_SCHEMA_PARSE_FAIL;
155 }
156 return E_OK;
157 }
158 LOGE("Substandard format! Default value can not transform to string!!");
159 return -E_SCHEMA_PARSE_FAIL;
160 }
161
TransToInteger(const std::string & defaultContent,SchemaAttribute & outAttr)162 int SchemaUtils::TransToInteger(const std::string &defaultContent, SchemaAttribute &outAttr)
163 {
164 // defaultContent can not be null
165 if (defaultContent.empty()) {
166 return -E_SCHEMA_PARSE_FAIL;
167 }
168 int transRes = strtol(defaultContent.c_str(), nullptr, 10); // 10: decimal
169 std::string resReview = std::to_string(transRes);
170 if (defaultContent.compare(defaultContent.find_first_not_of("+- "), defaultContent.size(),
171 resReview, resReview.find_first_not_of("+- "), resReview.size()) == 0) {
172 // Check the sign of the number
173 if ((defaultContent[0] == '-' && resReview[0] == '-') ||
174 (defaultContent[0] != '-' && resReview[0] != '-') ||
175 transRes == 0) {
176 outAttr.defaultValue.integerValue = transRes;
177 return E_OK;
178 }
179 }
180 LOGE("Default value can not transform to Integer!!");
181 return -E_SCHEMA_PARSE_FAIL;
182 }
183
TransToLong(const std::string & defaultContent,SchemaAttribute & outAttr)184 int SchemaUtils::TransToLong(const std::string &defaultContent, SchemaAttribute &outAttr)
185 {
186 // defaultContent can not be null
187 if (defaultContent.empty()) {
188 return -E_SCHEMA_PARSE_FAIL;
189 }
190 int64_t transRes = strtoll(defaultContent.c_str(), nullptr, 10); // 10: decimal
191 std::string resReview = std::to_string(transRes);
192 if (defaultContent.compare(defaultContent.find_first_not_of("+- "), defaultContent.size(),
193 resReview, resReview.find_first_not_of("+- "), resReview.size()) == 0) {
194 // Check the sign of the number
195 if ((defaultContent[0] == '-' && resReview[0] == '-') ||
196 (defaultContent[0] != '-' && resReview[0] != '-') ||
197 transRes == 0) {
198 outAttr.defaultValue.longValue = transRes;
199 return E_OK;
200 }
201 }
202
203 LOGE("Default value[%s] can not transform to LONG!!", resReview.c_str());
204 return -E_SCHEMA_PARSE_FAIL;
205 }
206
TransToDouble(const std::string & defaultContent,SchemaAttribute & outAttr)207 int SchemaUtils::TransToDouble(const std::string &defaultContent, SchemaAttribute &outAttr)
208 {
209 // defaultContent can not be null
210 if (defaultContent.empty()) {
211 return -E_SCHEMA_PARSE_FAIL;
212 }
213
214 // Disable scientific notation
215 int dotCount = 0;
216 for (const auto &iter : defaultContent) {
217 if (!(std::isdigit(iter) || iter == '.' || iter == '-' || iter == '+')) {
218 LOGE("Default value to double, exist invalid symbol[%c]", iter);
219 return -E_SCHEMA_PARSE_FAIL;
220 }
221 if (iter == '.') {
222 dotCount++;
223 }
224 if (dotCount > 1) {
225 LOGE("Default value to double, exist invalid extra dot");
226 return -E_SCHEMA_PARSE_FAIL;
227 }
228 }
229
230 char *end = nullptr;
231 double transRes = std::strtod(defaultContent.c_str(), &end);
232 // Double exist problems with accuracy, overflow is subject to the legality of the c++ conversion.
233 if (transRes > -HUGE_VAL && transRes < HUGE_VAL && std::isfinite(transRes)) {
234 // Cleared blank
235 if (end != &defaultContent.back() + 1) {
236 LOGD("Termination of parsing due to exception symbol");
237 return -E_SCHEMA_PARSE_FAIL;
238 }
239 outAttr.defaultValue.doubleValue = transRes;
240 return E_OK;
241 }
242 LOGE("Default value can not transform to double, overflow double max!");
243 return -E_SCHEMA_PARSE_FAIL;
244 }
245
TransformDefaultValue(std::string & defaultContent,SchemaAttribute & outAttr)246 int SchemaUtils::TransformDefaultValue(std::string &defaultContent, SchemaAttribute &outAttr)
247 {
248 TrimFiled(defaultContent);
249 if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_NULL) == 0 && outAttr.hasNotNullConstraint) {
250 LOGE("NOT NULL and DEFAULT null Simultaneously");
251 return -E_SCHEMA_PARSE_FAIL;
252 } else if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_NULL) == 0) {
253 outAttr.hasDefaultValue = false;
254 return E_OK;
255 }
256
257 int errCode = E_OK;
258 switch (outAttr.type) {
259 case FieldType::LEAF_FIELD_BOOL:
260 errCode = TransToBool(defaultContent, outAttr);
261 break;
262 case FieldType::LEAF_FIELD_INTEGER:
263 errCode = TransToInteger(defaultContent, outAttr);
264 break;
265 case FieldType::LEAF_FIELD_LONG:
266 errCode = TransToLong(defaultContent, outAttr);
267 break;
268 case FieldType::LEAF_FIELD_DOUBLE:
269 errCode = TransToDouble(defaultContent, outAttr);
270 break;
271 case FieldType::LEAF_FIELD_STRING:
272 errCode = TransToString(defaultContent, outAttr);
273 break;
274 default:
275 LOGE("Unrecognized or unsupported type, please check!!");
276 errCode = -E_SCHEMA_PARSE_FAIL;
277 break;
278 }
279
280 LOGD("SchemaAttribute type is [%d], transfer result is [%d]", static_cast<int>(outAttr.type), errCode);
281 return errCode;
282 }
283
ParseAndCheckSchemaAttribute(const std::string & inAttrString,SchemaAttribute & outAttr,bool useAffinity)284 int SchemaUtils::ParseAndCheckSchemaAttribute(const std::string &inAttrString, SchemaAttribute &outAttr,
285 bool useAffinity)
286 {
287 if (inAttrString.empty()) {
288 return -E_SCHEMA_PARSE_FAIL;
289 }
290 std::string tempinAttrString = inAttrString;
291 TrimFiled(tempinAttrString);
292
293 std::vector<std::string> attrContext;
294 int errCode = SplitSchemaAttribute(inAttrString, attrContext);
295 if (errCode != E_OK) {
296 LOGD("Syntax error, please check!");
297 return errCode;
298 }
299 errCode = ParseSchemaAttribute(attrContext, outAttr, useAffinity);
300 if (errCode != E_OK) {
301 LOGD("Grammatical error, please check!");
302 return errCode;
303 }
304
305 return E_OK;
306 }
307
ParseSchemaAttribute(std::vector<std::string> & attrContext,SchemaAttribute & outAttr,bool useAffinity)308 int SchemaUtils::ParseSchemaAttribute(std::vector<std::string> &attrContext, SchemaAttribute &outAttr, bool useAffinity)
309 {
310 // Currently supported types
311 static const std::map<std::string, FieldType> FIELD_TYPE_DIC = {
312 {SchemaConstant::KEYWORD_TYPE_BOOL, FieldType::LEAF_FIELD_BOOL},
313 {SchemaConstant::KEYWORD_TYPE_INTEGER, FieldType::LEAF_FIELD_INTEGER},
314 {SchemaConstant::KEYWORD_TYPE_LONG, FieldType::LEAF_FIELD_LONG},
315 {SchemaConstant::KEYWORD_TYPE_DOUBLE, FieldType::LEAF_FIELD_DOUBLE},
316 {SchemaConstant::KEYWORD_TYPE_STRING, FieldType::LEAF_FIELD_STRING},
317 };
318
319 // After split attribute? attrContext include 3 type field
320 if (attrContext.size() < 3) {
321 LOGE("No parsing preprocessing!!");
322 return -E_SCHEMA_PARSE_FAIL;
323 }
324 TrimFiled(attrContext[0]);
325 if (!useAffinity) {
326 if (FIELD_TYPE_DIC.find(attrContext[0]) == FIELD_TYPE_DIC.end()) {
327 LOGE("Errno schema field type [%s]!!", attrContext[0].c_str());
328 return -E_SCHEMA_PARSE_FAIL;
329 } else {
330 outAttr.type = FIELD_TYPE_DIC.at(attrContext[0]);
331 }
332 } else {
333 outAttr.type = FieldType::LEAF_FIELD_NULL;
334 outAttr.customFieldType = attrContext[0];
335 }
336
337 outAttr.hasNotNullConstraint = !attrContext[1].empty();
338
339 // if DEFAULT value context exist, fix hasDefaultValue flag, 2nd represents the default value
340 if (attrContext[2].empty()) {
341 outAttr.hasDefaultValue = false;
342 } else {
343 outAttr.hasDefaultValue = true;
344 int errCode = TransformDefaultValue(attrContext[2], outAttr); // 2nd element is DEFAULT value
345 if (errCode != E_OK) {
346 LOGE("Default value is malformed!!");
347 return -E_SCHEMA_PARSE_FAIL;
348 }
349 }
350 return E_OK;
351 }
352
353 namespace {
354 // Check prefix and attempt to find any illegal, returns E_OK if nothing illegal and an hasPrefix indicator.
CheckDollarDotPrefix(const std::string & inPathStr,bool & hasPrefix)355 int CheckDollarDotPrefix(const std::string &inPathStr, bool &hasPrefix)
356 {
357 if (inPathStr.empty()) {
358 return -E_SCHEMA_PARSE_FAIL;
359 }
360 if (inPathStr.size() >= std::string("$.").size()) {
361 // In this case, $. prefix may exist, but also may not exist.
362 if (inPathStr[0] == '$' && inPathStr[1] == '.') { // 1 for second char
363 // $. prefix may exist
364 hasPrefix = true;
365 return E_OK;
366 }
367 if (inPathStr[0] == '$' && inPathStr[1] != '.') { // 1 for second char
368 return -E_SCHEMA_PARSE_FAIL;
369 }
370 if (inPathStr[1] == '$') { // 1 for second char
371 return -E_SCHEMA_PARSE_FAIL;
372 }
373 }
374 // here, inPathStr not empty, has at least one char, should not begin with '.'
375 if (inPathStr[0] == '.') {
376 return -E_SCHEMA_PARSE_FAIL;
377 }
378 hasPrefix = false;
379 return E_OK;
380 }
381 }
382
ParseAndCheckFieldPath(const std::string & inPathString,FieldPath & outPath,bool permitPrefix)383 int SchemaUtils::ParseAndCheckFieldPath(const std::string &inPathString, FieldPath &outPath, bool permitPrefix)
384 {
385 std::string tempInPathString = inPathString;
386 TrimFiled(tempInPathString);
387 bool hasPrefix = false;
388 int errCode = CheckDollarDotPrefix(tempInPathString, hasPrefix);
389 if (errCode != E_OK) {
390 LOGE("CheckDollarDotPrefix Fail.");
391 return errCode;
392 }
393
394 if (!permitPrefix && hasPrefix) {
395 LOGE("Not permit $. prefix.");
396 return -E_SCHEMA_PARSE_FAIL;
397 }
398
399 if (!hasPrefix) {
400 tempInPathString = std::string("$.") + tempInPathString;
401 }
402
403 for (size_t curPos = 1; curPos < tempInPathString.size();) {
404 if (curPos + 1 == tempInPathString.size()) {
405 LOGE("Dot at end will generate empty illegal path!");
406 return -E_SCHEMA_PARSE_FAIL;
407 }
408 size_t nextPointPos = tempInPathString.find_first_of(".", curPos + 1);
409 outPath.push_back(tempInPathString.substr(curPos + 1, nextPointPos - curPos - 1));
410 curPos = nextPointPos;
411 }
412
413 if (outPath.size() > SchemaConstant::SCHEMA_FEILD_PATH_DEPTH_MAX) {
414 LOGE("Parse Schema Index depth illegality!");
415 return -E_SCHEMA_PARSE_FAIL;
416 }
417
418 for (const auto &iter : outPath) {
419 if (CheckFieldName(iter) != E_OK) {
420 LOGE("Parse Schema Index field illegality!");
421 return -E_SCHEMA_PARSE_FAIL;
422 }
423 }
424 return E_OK;
425 }
426
CheckFieldName(const FieldName & inName)427 int SchemaUtils::CheckFieldName(const FieldName &inName)
428 {
429 if (inName.empty() || inName.size() > SchemaConstant::SCHEMA_FEILD_NAME_LENGTH_MAX) {
430 LOGE("Schema FieldName have invalid size!");
431 return -E_SCHEMA_PARSE_FAIL;
432 }
433
434 // The first letter must be a number or an underscore
435 if (!(std::isalpha(inName[0]) || inName[0] == '_')) {
436 LOGE("Schema FieldName begin with un support symbol!");
437 return -E_SCHEMA_PARSE_FAIL;
438 }
439
440 // Must consist of numeric underscore letters
441 for (const auto &iter : inName) {
442 if (!(IsLegalFieldCharacter(iter))) {
443 LOGE("Schema FieldName exist un support symbol!");
444 return -E_SCHEMA_PARSE_FAIL;
445 }
446 }
447
448 return E_OK;
449 }
450
Strip(const std::string & inString)451 std::string SchemaUtils::Strip(const std::string &inString)
452 {
453 std::string stripRes = inString;
454 TrimFiled(stripRes);
455 return stripRes;
456 }
457
StripNameSpace(const std::string & inFullName)458 std::string SchemaUtils::StripNameSpace(const std::string &inFullName)
459 {
460 auto pos = inFullName.find_last_of('.');
461 if (pos == std::string::npos) { // No '.', so no namespace
462 return inFullName;
463 }
464 return inFullName.substr(pos + 1);
465 }
466
FieldTypeString(FieldType inType)467 std::string SchemaUtils::FieldTypeString(FieldType inType)
468 {
469 static std::map<FieldType, std::string> fieldTypeMapString = {
470 {FieldType::LEAF_FIELD_NULL, "NULL"},
471 {FieldType::LEAF_FIELD_BOOL, "BOOL"},
472 {FieldType::LEAF_FIELD_INTEGER, "INTEGER"},
473 {FieldType::LEAF_FIELD_LONG, "LONG"},
474 {FieldType::LEAF_FIELD_DOUBLE, "DOUBLE"},
475 {FieldType::LEAF_FIELD_STRING, "STRING"},
476 {FieldType::LEAF_FIELD_ARRAY, "ARRAY"},
477 {FieldType::LEAF_FIELD_OBJECT, "LEAF_OBJECT"},
478 {FieldType::INTERNAL_FIELD_OBJECT, "INTERNAL_OBJECT"},
479 };
480 return fieldTypeMapString[inType];
481 }
482
SchemaTypeString(SchemaType inType)483 std::string SchemaUtils::SchemaTypeString(SchemaType inType)
484 {
485 static std::map<SchemaType, std::string> schemaTypeMapString {
486 {SchemaType::NONE, "NONE"},
487 {SchemaType::JSON, "JSON-SCHEMA"},
488 {SchemaType::FLATBUFFER, "FLATBUFFER-SCHEMA"},
489 {SchemaType::RELATIVE, "RELATIVE"},
490 {SchemaType::UNRECOGNIZED, "UNRECOGNIZED"},
491 };
492 return schemaTypeMapString[inType];
493 }
494
FieldPathString(const FieldPath & inPath)495 std::string SchemaUtils::FieldPathString(const FieldPath &inPath)
496 {
497 std::string outString = "$";
498 for (const auto &entry : inPath) {
499 outString += ".";
500 outString += entry;
501 }
502 return outString;
503 }
504
TransTrackerSchemaToLower(const TrackerSchema & srcSchema,TrackerSchema & destSchema)505 void SchemaUtils::TransTrackerSchemaToLower(const TrackerSchema &srcSchema, TrackerSchema &destSchema)
506 {
507 std::string tableName(srcSchema.tableName.length(), ' ');
508 std::transform(srcSchema.tableName.begin(), srcSchema.tableName.end(), tableName.begin(), ::tolower);
509 destSchema.tableName = tableName;
510 std::string extendName(srcSchema.extendColName.length(), ' ');
511 std::transform(srcSchema.extendColName.begin(), srcSchema.extendColName.end(), extendName.begin(), ::tolower);
512 destSchema.extendColName = extendName;
513 for (const auto &srcName : srcSchema.trackerColNames) {
514 std::string colName(srcName.length(), ' ');
515 std::transform(srcName.begin(), srcName.end(), colName.begin(), ::tolower);
516 destSchema.trackerColNames.insert(colName);
517 }
518 }
519 } // namespace DistributedDB
520