1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "schema_utils.h"
17 
18 #include <cctype>
19 #include <cfloat>
20 #include <cmath>
21 #include <map>
22 
23 #include "db_errno.h"
24 #include "log_print.h"
25 #include "schema_constant.h"
26 
27 namespace DistributedDB {
28 namespace {
IsLegalFieldCharacter(char character)29     bool IsLegalFieldCharacter(char character)
30     {
31         return (std::isalnum(character) || character == '_');
32     }
TrimFiled(std::string & inString)33     void TrimFiled(std::string &inString)
34     {
35         inString.erase(0, inString.find_first_not_of("\r\t "));
36         size_t temp = inString.find_last_not_of("\r\t ");
37         if (temp < inString.size()) {
38             inString.erase(temp + 1);
39         }
40     }
41 
42     // TYPE, [NOT NULL,] [DEFAULT X]
43     // DEFAULT at last
44     // State transition matrix
45     const int STATE_TRANSFER[8][6] = { // 5 type input and 7 type state
46         // blank, NOT NULL, DEFAULT, OTHER AlNUM, COMMA
47         {0, -1, -1, 1, -1},     // state 0: empty
48         {1, -1, -1, 1, 2},      // state 1: only type
49         {2, 3, 5, -1, -1},      // state 2: alnum ,
50         {3, -1, -1, -1, 4},     // state 3: alnum , notnull
51         {4, -1, 5, -1, -1},     // state 4: alnum , notnull ,
52         {6, -1, -1, -1, -1},    // state 5: finish with DEFAULT
53         {6, -1, -1, 7, -1},     // state 6: finish with DEFAULT and blank
54         {7, 7, 7, 7, 7},        // state 7: finish with DEFAULT and blank and no matter what value
55     };
56     enum StateTransferColNum {
57         COLUMN_ILLEGAL = -1,
58         COLUMN_BLANK,
59         COLUMN_NOT_NULL,
60         COLUMN_DEFAULT,
61         COLUMN_OTHER_ALNUM,
62         COLUMN_COMMA,
63     };
64 } // namespace
65 
66 // compare function can make sure not to cross the border, pos < oriContent.size() - 1
67 // Get symbol type and Converts to the corresponding column of the state transition matrix
MakeTrans(const std::string & oriContent,size_t & pos)68 int SchemaUtils::MakeTrans(const std::string &oriContent, size_t &pos)
69 {
70     if (isspace(oriContent[pos])) {
71         return COLUMN_BLANK;
72     } else if (oriContent.compare(pos, SchemaConstant::KEYWORD_ATTR_NOT_NULL.size(),
73         SchemaConstant::KEYWORD_ATTR_NOT_NULL) == 0) {
74         pos = pos + SchemaConstant::KEYWORD_ATTR_NOT_NULL.size() - 1;
75         return COLUMN_NOT_NULL;
76     } else if (oriContent.compare(pos, SchemaConstant::KEYWORD_ATTR_DEFAULT.size(),
77         SchemaConstant::KEYWORD_ATTR_DEFAULT) == 0) {
78         pos = pos + SchemaConstant::KEYWORD_ATTR_DEFAULT.size() - 1;
79         return COLUMN_DEFAULT;
80     } else if (std::isalnum(oriContent[pos]) || oriContent[pos] == '\'' ||
81         oriContent[pos] == '+' || oriContent[pos] == '-') {
82         return COLUMN_OTHER_ALNUM;
83     } else if (oriContent[pos] == ',') {
84         return COLUMN_COMMA;
85     } else {
86         return COLUMN_ILLEGAL;
87     }
88 }
89 
90 // Use DFA to check and Parsing
91 // You can get the corresponding state meaning in the state transition matrix STATE_TRANSFER
SplitSchemaAttribute(const std::string & inAttrString,std::vector<std::string> & outAttrString)92 int SchemaUtils::SplitSchemaAttribute(const std::string &inAttrString, std::vector<std::string> &outAttrString)
93 {
94     int state = 0;
95     outAttrString.resize(3); // attribute have 3 type keywords
96     for (size_t i = 0; i < inAttrString.size(); i++) {
97         int id = MakeTrans(inAttrString, i);
98         if (id < 0) {
99             LOGD("Split Schema Attribute err, Contains unrecognized content [%c]", inAttrString[i]);
100             return -E_SCHEMA_PARSE_FAIL;
101         }
102         state = STATE_TRANSFER[state][id];
103         if (state < 0) {
104             LOGD("Split Schema Attribute err, err state [%d]", state);
105             return -E_SCHEMA_PARSE_FAIL;
106         }
107         switch (state) {
108             case 1: // state 1 :Indicates that only type information is currently available
109                 outAttrString[0].push_back(inAttrString[i]);
110                 break;
111             case 3: // state 3 :Gets the NOT_NULL keyword
112                 outAttrString[1] = SchemaConstant::KEYWORD_ATTR_NOT_NULL;
113                 break;
114             case 7: // state 7 :Contains complete information
115                 // Get default string. Now transfer matrix can ensure > 1, but you should pay attention when fix it
116                 if (i <= 1) {
117                     LOGE("default string size must be over 1.");
118                     return -E_SCHEMA_PARSE_FAIL;
119                 }
120                 outAttrString[2] = inAttrString.substr(i - 1); // 2 is index
121                 return E_OK;
122             default:
123                 break;
124         }
125     }
126     // Only these states are legal, The meaning of the state can be seen in the matrix STATE_TRANSFER explanation
127     if (!(state == 1 || state == 3 || state == 7)) { // 1 is state; 3 is state; 7 is state;
128         LOGD("Split Schema Attribute err, err state [%d]", state);
129         return -E_SCHEMA_PARSE_FAIL;
130     }
131     return E_OK;
132 }
133 
TransToBool(const std::string & defaultContent,SchemaAttribute & outAttr)134 int SchemaUtils::TransToBool(const std::string &defaultContent, SchemaAttribute &outAttr)
135 {
136     // Have been trim
137     if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_TRUE) == 0) {
138         outAttr.defaultValue.boolValue = true;
139         return E_OK;
140     } else if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_FALSE) == 0) {
141         outAttr.defaultValue.boolValue = false;
142         return E_OK;
143     }
144     LOGE("Default value can not transform to bool!!");
145     return -E_SCHEMA_PARSE_FAIL;
146 }
147 
TransToString(const std::string & defaultContent,SchemaAttribute & outAttr)148 int SchemaUtils::TransToString(const std::string &defaultContent, SchemaAttribute &outAttr)
149 {
150     // Have been trim, Strip leading and trailing '
151     if (defaultContent.size() > 1 && defaultContent.front() == '\'' && defaultContent.back() == '\'') {
152         outAttr.defaultValue.stringValue = defaultContent.substr(1, defaultContent.size() - 2); // 2: trim trailing
153         if (outAttr.defaultValue.stringValue.size() > SchemaConstant::SCHEMA_DEFAULT_STRING_SIZE_LIMIT) {
154             return -E_SCHEMA_PARSE_FAIL;
155         }
156         return E_OK;
157     }
158     LOGE("Substandard format! Default value can not transform to string!!");
159     return -E_SCHEMA_PARSE_FAIL;
160 }
161 
TransToInteger(const std::string & defaultContent,SchemaAttribute & outAttr)162 int SchemaUtils::TransToInteger(const std::string &defaultContent, SchemaAttribute &outAttr)
163 {
164     // defaultContent can not be null
165     if (defaultContent.empty()) {
166         return -E_SCHEMA_PARSE_FAIL;
167     }
168     int transRes = strtol(defaultContent.c_str(), nullptr, 10); // 10: decimal
169     std::string resReview = std::to_string(transRes);
170     if (defaultContent.compare(defaultContent.find_first_not_of("+- "), defaultContent.size(),
171         resReview, resReview.find_first_not_of("+- "), resReview.size()) == 0) {
172         // Check the sign of the number
173         if ((defaultContent[0] == '-' && resReview[0] == '-') ||
174             (defaultContent[0] != '-' && resReview[0] != '-') ||
175             transRes == 0) {
176             outAttr.defaultValue.integerValue = transRes;
177             return E_OK;
178         }
179     }
180     LOGE("Default value can not transform to Integer!!");
181     return -E_SCHEMA_PARSE_FAIL;
182 }
183 
TransToLong(const std::string & defaultContent,SchemaAttribute & outAttr)184 int SchemaUtils::TransToLong(const std::string &defaultContent, SchemaAttribute &outAttr)
185 {
186     // defaultContent can not be null
187     if (defaultContent.empty()) {
188         return -E_SCHEMA_PARSE_FAIL;
189     }
190     int64_t transRes = strtoll(defaultContent.c_str(), nullptr, 10); // 10: decimal
191     std::string resReview = std::to_string(transRes);
192     if (defaultContent.compare(defaultContent.find_first_not_of("+- "), defaultContent.size(),
193         resReview, resReview.find_first_not_of("+- "), resReview.size()) == 0) {
194         // Check the sign of the number
195         if ((defaultContent[0] == '-' && resReview[0] == '-') ||
196             (defaultContent[0] != '-' && resReview[0] != '-') ||
197             transRes == 0) {
198             outAttr.defaultValue.longValue = transRes;
199             return E_OK;
200         }
201     }
202 
203     LOGE("Default value[%s] can not transform to LONG!!", resReview.c_str());
204     return -E_SCHEMA_PARSE_FAIL;
205 }
206 
TransToDouble(const std::string & defaultContent,SchemaAttribute & outAttr)207 int SchemaUtils::TransToDouble(const std::string &defaultContent, SchemaAttribute &outAttr)
208 {
209     // defaultContent can not be null
210     if (defaultContent.empty()) {
211         return -E_SCHEMA_PARSE_FAIL;
212     }
213 
214     // Disable scientific notation
215     int dotCount = 0;
216     for (const auto &iter : defaultContent) {
217         if (!(std::isdigit(iter) || iter == '.' || iter == '-' || iter == '+')) {
218             LOGE("Default value to double, exist invalid symbol[%c]", iter);
219             return -E_SCHEMA_PARSE_FAIL;
220         }
221         if (iter == '.') {
222             dotCount++;
223         }
224         if (dotCount > 1) {
225             LOGE("Default value to double, exist invalid extra dot");
226             return -E_SCHEMA_PARSE_FAIL;
227         }
228     }
229 
230     char *end = nullptr;
231     double transRes = std::strtod(defaultContent.c_str(), &end);
232     // Double exist problems with accuracy, overflow is subject to the legality of the c++ conversion.
233     if (transRes > -HUGE_VAL && transRes < HUGE_VAL && std::isfinite(transRes)) {
234         // Cleared blank
235         if (end != &defaultContent.back() + 1) {
236             LOGD("Termination of parsing due to exception symbol");
237             return -E_SCHEMA_PARSE_FAIL;
238         }
239         outAttr.defaultValue.doubleValue = transRes;
240         return E_OK;
241     }
242     LOGE("Default value can not transform to double, overflow double max!");
243     return -E_SCHEMA_PARSE_FAIL;
244 }
245 
TransformDefaultValue(std::string & defaultContent,SchemaAttribute & outAttr)246 int SchemaUtils::TransformDefaultValue(std::string &defaultContent, SchemaAttribute &outAttr)
247 {
248     TrimFiled(defaultContent);
249     if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_NULL) == 0 && outAttr.hasNotNullConstraint) {
250         LOGE("NOT NULL and DEFAULT null Simultaneously");
251         return -E_SCHEMA_PARSE_FAIL;
252     } else if (defaultContent.compare(SchemaConstant::KEYWORD_ATTR_VALUE_NULL) == 0) {
253         outAttr.hasDefaultValue = false;
254         return E_OK;
255     }
256 
257     int errCode = E_OK;
258     switch (outAttr.type) {
259         case FieldType::LEAF_FIELD_BOOL:
260             errCode = TransToBool(defaultContent, outAttr);
261             break;
262         case FieldType::LEAF_FIELD_INTEGER:
263             errCode = TransToInteger(defaultContent, outAttr);
264             break;
265         case FieldType::LEAF_FIELD_LONG:
266             errCode = TransToLong(defaultContent, outAttr);
267             break;
268         case FieldType::LEAF_FIELD_DOUBLE:
269             errCode = TransToDouble(defaultContent, outAttr);
270             break;
271         case FieldType::LEAF_FIELD_STRING:
272             errCode = TransToString(defaultContent, outAttr);
273             break;
274         default:
275             LOGE("Unrecognized or unsupported type, please check!!");
276             errCode = -E_SCHEMA_PARSE_FAIL;
277             break;
278     }
279 
280     LOGD("SchemaAttribute type is [%d], transfer result is [%d]", static_cast<int>(outAttr.type), errCode);
281     return errCode;
282 }
283 
ParseAndCheckSchemaAttribute(const std::string & inAttrString,SchemaAttribute & outAttr,bool useAffinity)284 int SchemaUtils::ParseAndCheckSchemaAttribute(const std::string &inAttrString, SchemaAttribute &outAttr,
285     bool useAffinity)
286 {
287     if (inAttrString.empty()) {
288         return -E_SCHEMA_PARSE_FAIL;
289     }
290     std::string tempinAttrString = inAttrString;
291     TrimFiled(tempinAttrString);
292 
293     std::vector<std::string> attrContext;
294     int errCode = SplitSchemaAttribute(inAttrString, attrContext);
295     if (errCode != E_OK) {
296         LOGD("Syntax error, please check!");
297         return errCode;
298     }
299     errCode = ParseSchemaAttribute(attrContext, outAttr, useAffinity);
300     if (errCode != E_OK) {
301         LOGD("Grammatical error, please check!");
302         return errCode;
303     }
304 
305     return E_OK;
306 }
307 
ParseSchemaAttribute(std::vector<std::string> & attrContext,SchemaAttribute & outAttr,bool useAffinity)308 int SchemaUtils::ParseSchemaAttribute(std::vector<std::string> &attrContext, SchemaAttribute &outAttr, bool useAffinity)
309 {
310     // Currently supported types
311     static const std::map<std::string, FieldType> FIELD_TYPE_DIC = {
312         {SchemaConstant::KEYWORD_TYPE_BOOL, FieldType::LEAF_FIELD_BOOL},
313         {SchemaConstant::KEYWORD_TYPE_INTEGER, FieldType::LEAF_FIELD_INTEGER},
314         {SchemaConstant::KEYWORD_TYPE_LONG, FieldType::LEAF_FIELD_LONG},
315         {SchemaConstant::KEYWORD_TYPE_DOUBLE, FieldType::LEAF_FIELD_DOUBLE},
316         {SchemaConstant::KEYWORD_TYPE_STRING, FieldType::LEAF_FIELD_STRING},
317     };
318 
319     // After split attribute? attrContext include 3 type field
320     if (attrContext.size() < 3) {
321         LOGE("No parsing preprocessing!!");
322         return -E_SCHEMA_PARSE_FAIL;
323     }
324     TrimFiled(attrContext[0]);
325     if (!useAffinity) {
326         if (FIELD_TYPE_DIC.find(attrContext[0]) == FIELD_TYPE_DIC.end()) {
327             LOGE("Errno schema field type [%s]!!", attrContext[0].c_str());
328             return -E_SCHEMA_PARSE_FAIL;
329         } else {
330             outAttr.type = FIELD_TYPE_DIC.at(attrContext[0]);
331         }
332     } else {
333         outAttr.type = FieldType::LEAF_FIELD_NULL;
334         outAttr.customFieldType = attrContext[0];
335     }
336 
337     outAttr.hasNotNullConstraint = !attrContext[1].empty();
338 
339     // if DEFAULT value context exist, fix hasDefaultValue flag, 2nd represents the default value
340     if (attrContext[2].empty()) {
341         outAttr.hasDefaultValue = false;
342     } else {
343         outAttr.hasDefaultValue = true;
344         int errCode = TransformDefaultValue(attrContext[2], outAttr); // 2nd element is DEFAULT value
345         if (errCode != E_OK) {
346             LOGE("Default value is malformed!!");
347             return -E_SCHEMA_PARSE_FAIL;
348         }
349     }
350     return E_OK;
351 }
352 
353 namespace {
354 // Check prefix and attempt to find any illegal, returns E_OK if nothing illegal and an hasPrefix indicator.
CheckDollarDotPrefix(const std::string & inPathStr,bool & hasPrefix)355 int CheckDollarDotPrefix(const std::string &inPathStr, bool &hasPrefix)
356 {
357     if (inPathStr.empty()) {
358         return -E_SCHEMA_PARSE_FAIL;
359     }
360     if (inPathStr.size() >= std::string("$.").size()) {
361         // In this case, $. prefix may exist, but also may not exist.
362         if (inPathStr[0] == '$' && inPathStr[1] == '.') { // 1 for second char
363             // $. prefix may exist
364             hasPrefix = true;
365             return E_OK;
366         }
367         if (inPathStr[0] == '$' && inPathStr[1] != '.') { // 1 for second char
368             return -E_SCHEMA_PARSE_FAIL;
369         }
370         if (inPathStr[1] == '$') { // 1 for second char
371             return -E_SCHEMA_PARSE_FAIL;
372         }
373     }
374     // here, inPathStr not empty, has at least one char, should not begin with '.'
375     if (inPathStr[0] == '.') {
376         return -E_SCHEMA_PARSE_FAIL;
377     }
378     hasPrefix = false;
379     return E_OK;
380 }
381 }
382 
ParseAndCheckFieldPath(const std::string & inPathString,FieldPath & outPath,bool permitPrefix)383 int SchemaUtils::ParseAndCheckFieldPath(const std::string &inPathString, FieldPath &outPath, bool permitPrefix)
384 {
385     std::string tempInPathString = inPathString;
386     TrimFiled(tempInPathString);
387     bool hasPrefix = false;
388     int errCode = CheckDollarDotPrefix(tempInPathString, hasPrefix);
389     if (errCode != E_OK) {
390         LOGE("CheckDollarDotPrefix Fail.");
391         return errCode;
392     }
393 
394     if (!permitPrefix && hasPrefix) {
395         LOGE("Not permit $. prefix.");
396         return -E_SCHEMA_PARSE_FAIL;
397     }
398 
399     if (!hasPrefix) {
400         tempInPathString = std::string("$.") + tempInPathString;
401     }
402 
403     for (size_t curPos = 1; curPos < tempInPathString.size();) {
404         if (curPos + 1 == tempInPathString.size()) {
405             LOGE("Dot at end will generate empty illegal path!");
406             return -E_SCHEMA_PARSE_FAIL;
407         }
408         size_t nextPointPos = tempInPathString.find_first_of(".", curPos + 1);
409         outPath.push_back(tempInPathString.substr(curPos + 1, nextPointPos - curPos - 1));
410         curPos = nextPointPos;
411     }
412 
413     if (outPath.size() > SchemaConstant::SCHEMA_FEILD_PATH_DEPTH_MAX) {
414         LOGE("Parse Schema Index  depth illegality!");
415         return -E_SCHEMA_PARSE_FAIL;
416     }
417 
418     for (const auto &iter : outPath) {
419         if (CheckFieldName(iter) != E_OK) {
420             LOGE("Parse Schema Index field illegality!");
421             return -E_SCHEMA_PARSE_FAIL;
422         }
423     }
424     return E_OK;
425 }
426 
CheckFieldName(const FieldName & inName)427 int SchemaUtils::CheckFieldName(const FieldName &inName)
428 {
429     if (inName.empty() || inName.size() > SchemaConstant::SCHEMA_FEILD_NAME_LENGTH_MAX) {
430         LOGE("Schema FieldName have invalid size!");
431         return -E_SCHEMA_PARSE_FAIL;
432     }
433 
434     // The first letter must be a number or an underscore
435     if (!(std::isalpha(inName[0]) || inName[0] == '_')) {
436         LOGE("Schema FieldName begin with un support symbol!");
437         return -E_SCHEMA_PARSE_FAIL;
438     }
439 
440     // Must consist of numeric underscore letters
441     for (const auto &iter : inName) {
442         if (!(IsLegalFieldCharacter(iter))) {
443             LOGE("Schema FieldName exist un support symbol!");
444             return -E_SCHEMA_PARSE_FAIL;
445         }
446     }
447 
448     return E_OK;
449 }
450 
Strip(const std::string & inString)451 std::string SchemaUtils::Strip(const std::string &inString)
452 {
453     std::string stripRes = inString;
454     TrimFiled(stripRes);
455     return stripRes;
456 }
457 
StripNameSpace(const std::string & inFullName)458 std::string SchemaUtils::StripNameSpace(const std::string &inFullName)
459 {
460     auto pos = inFullName.find_last_of('.');
461     if (pos == std::string::npos) { // No '.', so no namespace
462         return inFullName;
463     }
464     return inFullName.substr(pos + 1);
465 }
466 
FieldTypeString(FieldType inType)467 std::string SchemaUtils::FieldTypeString(FieldType inType)
468 {
469     static std::map<FieldType, std::string> fieldTypeMapString = {
470         {FieldType::LEAF_FIELD_NULL, "NULL"},
471         {FieldType::LEAF_FIELD_BOOL, "BOOL"},
472         {FieldType::LEAF_FIELD_INTEGER, "INTEGER"},
473         {FieldType::LEAF_FIELD_LONG, "LONG"},
474         {FieldType::LEAF_FIELD_DOUBLE, "DOUBLE"},
475         {FieldType::LEAF_FIELD_STRING, "STRING"},
476         {FieldType::LEAF_FIELD_ARRAY, "ARRAY"},
477         {FieldType::LEAF_FIELD_OBJECT, "LEAF_OBJECT"},
478         {FieldType::INTERNAL_FIELD_OBJECT, "INTERNAL_OBJECT"},
479     };
480     return fieldTypeMapString[inType];
481 }
482 
SchemaTypeString(SchemaType inType)483 std::string SchemaUtils::SchemaTypeString(SchemaType inType)
484 {
485     static std::map<SchemaType, std::string> schemaTypeMapString {
486         {SchemaType::NONE, "NONE"},
487         {SchemaType::JSON, "JSON-SCHEMA"},
488         {SchemaType::FLATBUFFER, "FLATBUFFER-SCHEMA"},
489         {SchemaType::RELATIVE, "RELATIVE"},
490         {SchemaType::UNRECOGNIZED, "UNRECOGNIZED"},
491     };
492     return schemaTypeMapString[inType];
493 }
494 
FieldPathString(const FieldPath & inPath)495 std::string SchemaUtils::FieldPathString(const FieldPath &inPath)
496 {
497     std::string outString = "$";
498     for (const auto &entry : inPath) {
499         outString += ".";
500         outString += entry;
501     }
502     return outString;
503 }
504 
TransTrackerSchemaToLower(const TrackerSchema & srcSchema,TrackerSchema & destSchema)505 void SchemaUtils::TransTrackerSchemaToLower(const TrackerSchema &srcSchema, TrackerSchema &destSchema)
506 {
507     std::string tableName(srcSchema.tableName.length(), ' ');
508     std::transform(srcSchema.tableName.begin(), srcSchema.tableName.end(), tableName.begin(), ::tolower);
509     destSchema.tableName = tableName;
510     std::string extendName(srcSchema.extendColName.length(), ' ');
511     std::transform(srcSchema.extendColName.begin(), srcSchema.extendColName.end(), extendName.begin(), ::tolower);
512     destSchema.extendColName = extendName;
513     for (const auto &srcName : srcSchema.trackerColNames) {
514         std::string colName(srcName.length(), ' ');
515         std::transform(srcName.begin(), srcName.end(), colName.begin(), ::tolower);
516         destSchema.trackerColNames.insert(colName);
517     }
518 }
519 } // namespace DistributedDB
520