1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <random>
16 #include <openssl/rand.h>
17 #include "distributeddb_data_generator.h"
18 
19 namespace DistributedDBDataGenerator {
GenerateRecord(unsigned int keyNo,DistributedDB::Entry & entry,std::vector<uint8_t> keyPrefix)20 void GenerateRecord(unsigned int keyNo, DistributedDB::Entry &entry,
21     std::vector<uint8_t> keyPrefix)
22 {
23     std::string cntStr = std::to_string(keyNo);
24     entry.key = keyPrefix;
25     entry.value = { 'v' };
26     for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
27         entry.key.push_back(*cntStrIt);
28         entry.value.push_back(*cntStrIt);
29     }
30 }
31 
GenerateCharSet(std::vector<uint8_t> & charSet)32 void GenerateCharSet(std::vector<uint8_t> &charSet)
33 {
34     for (uint8_t ch = '0'; ch <= '9'; ch++) {
35         charSet.push_back(ch);
36     }
37     for (uint8_t ch = 'A'; ch <= 'Z'; ch++) {
38         charSet.push_back(ch);
39     }
40     for (uint8_t ch = 'a'; ch <= 'z'; ch++) {
41         charSet.push_back(ch);
42     }
43 }
44 
GenerateAlphaNumUnderlineCharSet(std::vector<uint8_t> & charSet)45 void GenerateAlphaNumUnderlineCharSet(std::vector<uint8_t> &charSet)
46 {
47     GenerateCharSet(charSet);
48     charSet.push_back('_');
49 }
50 
GenerateSpecialCharSet(std::vector<uint8_t> & charSet)51 void GenerateSpecialCharSet(std::vector<uint8_t> &charSet)
52 {
53     charSet.push_back('\\');
54     charSet.push_back('/');
55     charSet.push_back('&');
56     charSet.push_back('^');
57     charSet.push_back('%');
58     charSet.push_back('#');
59     charSet.push_back('-');
60 }
61 
GenerateFixedLenRandString(unsigned int neededLen,RandType randType,std::string & genString)62 void GenerateFixedLenRandString(unsigned int neededLen, RandType randType, std::string &genString)
63 {
64     genString.clear();
65     std::vector<uint8_t> charSet;
66     if (randType == RandType::ALPHA_NUM) {
67         GenerateCharSet(charSet);
68     } else if (randType == RandType::ALPHA_NUM_UNDERLINE) {
69         GenerateAlphaNumUnderlineCharSet(charSet);
70     } else if (randType == RandType::SPECIAL) {
71         GenerateSpecialCharSet(charSet);
72     }
73 
74     for (unsigned int index = 0; index < neededLen; ++index) {
75         // the randIdx range is from 0 to (charSet.length() - 1) which is the elements quantity of charSet
76         int randIdx = GetRandInt(0, charSet.size() - 1);
77         genString.push_back(charSet[randIdx]);
78     }
79 }
80 
GenerateRandRecord(DistributedDB::Entry & entry,EntrySize & entrySize,unsigned int keyNo)81 void GenerateRandRecord(DistributedDB::Entry &entry, EntrySize &entrySize, unsigned int keyNo)
82 {
83     std::string cntStr = std::to_string(keyNo);
84     unsigned int len = cntStr.length();
85     if ((entrySize.keySize < len) || (entrySize.valSize < len)) {
86         MST_LOG("ERROR:The size of key or value given is too small!");
87         return;
88     }
89     std::vector<uint8_t> charSet;
90     GenerateCharSet(charSet);
91     for (unsigned int i = 0; i < entrySize.keySize - len; i++) {
92         int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
93         entry.key.push_back(charSet[seed]);
94     }
95     if (entrySize.valSize < ONE_K_LONG_STRING) {
96         for (unsigned int i = 0; i < entrySize.valSize - len; i++) {
97             int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
98             entry.value.assign(entrySize.valSize - len, charSet[seed]);
99         }
100     } else {
101         int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
102         entry.value.assign(entrySize.valSize - len, charSet[seed]);
103     }
104     for (auto ch = cntStr.begin(); ch != cntStr.end(); ch++) {
105         entry.key.push_back(*ch);
106         entry.value.push_back(*ch);
107     }
108 }
109 
GenerateLongRecord(unsigned int keyNo,DistributedDB::Entry & entry,const std::vector<uint8_t> & keyPrefix)110 void GenerateLongRecord(unsigned int keyNo, DistributedDB::Entry &entry,
111     const std::vector<uint8_t> &keyPrefix)
112 {
113     std::string cntStr = std::to_string(keyNo);
114     entry.key = keyPrefix;
115     entry.value.assign(ONE_K_LONG_STRING, 'v');
116     for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
117         entry.key.push_back(*cntStrIt);
118         entry.value.push_back(*cntStrIt);
119     }
120 }
121 
GenerateRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch,const std::vector<uint8_t> keyPrifix)122 void GenerateRecords(unsigned int recordNum, unsigned int start, std::vector<DistributedDB::Key> &allKeys,
123     std::vector<DistributedDB::Entry> &entriesBatch, const std::vector<uint8_t> keyPrifix)
124 {
125     DistributedDB::Entry entryCurrent;
126     for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
127         GenerateRecord(cnt, entryCurrent, keyPrifix);
128         allKeys.push_back(entryCurrent.key);
129         entriesBatch.push_back(entryCurrent);
130     }
131 }
132 
GenerateMaxBigRecord(unsigned int keyNo,DistributedDB::Entry & entry,const std::vector<uint8_t> & keyPrefix,unsigned int num)133 void GenerateMaxBigRecord(unsigned int keyNo, DistributedDB::Entry &entry,
134     const std::vector<uint8_t> &keyPrefix, unsigned int num)
135 {
136     std::string cntStr = std::to_string(keyNo);
137     entry.key = keyPrefix;
138     entry.value.assign(FOUR_M_LONG_STRING, ('v' - num));
139     for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
140         entry.key.push_back(*cntStrIt);
141     }
142     std::vector<uint8_t> keyTail;
143     keyTail.assign(ONE_K_LONG_STRING - entry.key.size(), ('k' + num));
144     for (auto iter = keyTail.begin(); iter != keyTail.end(); ++iter) {
145         entry.key.push_back(*iter);
146     }
147 }
148 
GenerateMaxBigRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch)149 bool GenerateMaxBigRecords(unsigned int recordNum, unsigned int start,
150     std::vector<DistributedDB::Key> &allKeys, std::vector<DistributedDB::Entry> &entriesBatch)
151 {
152     if (recordNum > (255 - 'k')) { // acs ii code has 255 element
153         MST_LOG("Record generate failed, character is over ASCII, please use other method !");
154         return false;
155     } else {
156         DistributedDB::Entry entryCurrent;
157         for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
158             GenerateMaxBigRecord(cnt, entryCurrent, K_SEARCH_3, (cnt - start));
159             allKeys.push_back(entryCurrent.key);
160             entriesBatch.push_back(entryCurrent);
161         }
162     }
163     return true;
164 }
165 
GenerateTenThousandRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch)166 void GenerateTenThousandRecords(unsigned int recordNum, unsigned int start,
167     std::vector<DistributedDB::Key> &allKeys, std::vector<DistributedDB::Entry> &entriesBatch)
168 {
169     DistributedDB::Entry entryCurrent;
170     for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
171         GenerateLongRecord(cnt, entryCurrent, K_SEARCH_5);
172         allKeys.push_back(entryCurrent.key);
173         entriesBatch.push_back(entryCurrent);
174     }
175 }
176 
GenerateNormalAsciiRecords(DistributedDB::Entry & entry)177 void GenerateNormalAsciiRecords(DistributedDB::Entry &entry)
178 {
179     entry.key.clear();
180     entry.value.clear();
181     for (uint8_t lowc = 'a'; lowc <= 'z'; ++lowc) {
182         entry.key.push_back(lowc);
183         entry.value.push_back(lowc);
184     }
185     for (uint8_t bigc = 'A'; bigc <= 'Z'; ++bigc) {
186         entry.key.push_back(bigc);
187         entry.value.push_back(bigc);
188     }
189     for (uint8_t numc = '0'; numc <= '9'; ++numc) {
190         entry.key.push_back(numc);
191         entry.value.push_back(numc);
192     }
193 }
194 
GenerateFullAsciiRecords(DistributedDB::Entry & entry)195 void GenerateFullAsciiRecords(DistributedDB::Entry &entry)
196 {
197     entry.key.clear();
198     entry.value.clear();
199     for (uint8_t lowc = 255; lowc > 0; --lowc) { // acs ii code has 255 element
200         entry.key.push_back(lowc);
201         entry.value.push_back(lowc);
202     }
203 }
204 
GenerateBiggistKeyRecords(DistributedDB::Entry & entry)205 void GenerateBiggistKeyRecords(DistributedDB::Entry &entry)
206 {
207     entry.key.clear();
208     entry.value.clear();
209     for (auto lowc = ONE_K_LONG_STRING; lowc > 0; --lowc) {
210         entry.key.push_back(lowc);
211     }
212     entry.value.push_back('v');
213 }
214 
GenerateFixedLenKVRecord(unsigned int serialNo,unsigned int keyLen,uint8_t keyFilledChr,unsigned int valueLen,uint8_t valueFilledChr)215 DistributedDB::Entry GenerateFixedLenKVRecord(unsigned int serialNo, unsigned int keyLen, uint8_t keyFilledChr,
216     unsigned int valueLen, uint8_t valueFilledChr)
217 {
218     DistributedDB::Entry entry;
219     std::string serialNoStr = std::to_string(serialNo);
220     entry.key.assign(keyLen - serialNoStr.length(), keyFilledChr);
221     entry.value.assign(valueLen - serialNoStr.length(), valueFilledChr);
222     for (unsigned int index = 0; index < serialNoStr.size(); ++index) {
223         entry.key.push_back(serialNoStr[index]);
224         entry.value.push_back(serialNoStr[index]);
225     }
226     return entry;
227 }
228 
GenerateFixedRecords(std::vector<DistributedDB::Entry> & entries,std::vector<DistributedDB::Key> & allKeys,int recordNum,unsigned int keySize,unsigned int valSize)229 void GenerateFixedRecords(std::vector<DistributedDB::Entry> &entries,
230     std::vector<DistributedDB::Key> &allKeys, int recordNum, unsigned int keySize, unsigned int valSize)
231 {
232     DistributedDB::Entry entry;
233     for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
234         std::string cntStr = std::to_string(cnt);
235         int len = cntStr.length();
236         entry.key.assign((keySize - len), 'k');
237         entry.value.assign((valSize - len), 'v');
238         for (auto cntIt = cntStr.begin(); cntIt != cntStr.end(); ++cntIt) {
239             entry.key.push_back(*cntIt);
240             entry.value.push_back(*cntIt);
241         }
242         allKeys.push_back(entry.key);
243         entries.push_back(entry);
244         entry.key.clear();
245         entry.value.clear();
246     }
247 }
248 
GenerateOneRecordForImage(int entryNo,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & val,DistributedDB::Entry & entry)249 void GenerateOneRecordForImage(int entryNo, const EntrySize &entrySize,
250     const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &val, DistributedDB::Entry &entry)
251 {
252     std::vector<uint8_t> charSet;
253     GenerateCharSet(charSet);
254     std::string ind = std::to_string(entryNo);
255     unsigned int len = ind.length();
256     for (auto ch = IMAGE_VALUE_PRE.begin(); ch != IMAGE_VALUE_PRE.end(); ch++) {
257         entry.value.push_back(*ch);
258     }
259     if ((entrySize.keySize < len) || (entrySize.valSize < (len + IMAGE_VALUE_PRE.size()))) {
260         MST_LOG("ERROR:The size of key or value given is too small!");
261         return;
262     }
263     entry.key = keyPrefix;
264     for (unsigned int cnt = 0; cnt < (entrySize.keySize - len - keyPrefix.size()); cnt++) {
265         entry.key.push_back(charSet[GetRandInt(0, 61)]); // randrom in 61 of 0-9,A-Z,a-z.
266     }
267     for (unsigned int it = 0; it < (entrySize.valSize - len - IMAGE_VALUE_PRE.size()); it++) {
268         entry.value.push_back(val[0]);
269     }
270     entry.key.insert(entry.key.end(), ind.begin(), ind.end());
271     entry.value.insert(entry.value.end(), ind.begin(), ind.end());
272 }
273 
GenerateRecordsForImage(std::vector<DistributedDB::Entry> & entries,EntrySize & entrySize,int num,std::vector<uint8_t> keyPrefix,std::vector<uint8_t> val)274 void GenerateRecordsForImage(std::vector<DistributedDB::Entry> &entries, EntrySize &entrySize,
275     int num, std::vector<uint8_t> keyPrefix, std::vector<uint8_t> val)
276 {
277     for (int index = 1; index <= num; index++) {
278         DistributedDB::Entry entry;
279         GenerateOneRecordForImage(index, entrySize, keyPrefix, val, entry);
280         entries.push_back(entry);
281     }
282 }
283 
GenerateAppointPrefixAndSizeRecord(int recordNo,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix,DistributedDB::Entry & entry)284 void GenerateAppointPrefixAndSizeRecord(int recordNo, const EntrySize &entrySize,
285     const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix, DistributedDB::Entry &entry)
286 {
287     std::string recNo = std::to_string(recordNo);
288     unsigned int len = recNo.length();
289     if ((entrySize.keySize < keyPrefix.size() + len) || (entrySize.valSize < valPrefix.size() + len)) {
290         MST_LOG("ERROR:The size of key or value given is too small!");
291         return;
292     }
293     entry.key = keyPrefix;
294     entry.value = valPrefix;
295     entry.key.insert(entry.key.end(), entrySize.keySize - keyPrefix.size() - len, '0');
296     entry.value.insert(entry.value.end(), entrySize.valSize - valPrefix.size() - len, '0');
297 
298     entry.key.insert(entry.key.end(), recNo.begin(), recNo.end());
299     entry.value.insert(entry.value.end(), recNo.begin(), recNo.end());
300 }
301 
GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> & entries,const EntrySize & entrySize,int num,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)302 void GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> &entries, const EntrySize &entrySize,
303     int num, const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix)
304 {
305     entries.clear();
306     DistributedDB::Entry entry;
307     for (int index = 1; index <= num; index++) {
308         GenerateAppointPrefixAndSizeRecord(index, entrySize, keyPrefix, valPrefix, entry);
309         entries.push_back(entry);
310     }
311 }
312 
GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> & entries,int startpoint,const NumberSize param,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)313 void GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> &entries, int startpoint,
314     const NumberSize param, const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix)
315 {
316     entries.clear();
317     DistributedDB::Entry entry;
318     for (int index = startpoint; index < startpoint + param.recordsNumber; index++) {
319         GenerateAppointPrefixAndSizeRecord(index, param.entrySize, keyPrefix, valPrefix, entry);
320         entries.push_back(entry);
321     }
322 }
323 
GetRandInt(const int randMin,const int randMax)324 int GetRandInt(const int randMin, const int randMax)
325 {
326     std::random_device randDev;
327     std::mt19937 genRand(randDev());
328     std::uniform_int_distribution<int> disRand(randMin, randMax);
329     return disRand(genRand);
330 }
331 
GenerateFixedLenRandRecords(std::vector<DistributedDB::Entry> & entries,std::vector<DistributedDB::Key> & allKeys,int recordNum,unsigned int keySize,unsigned int valSize)332 void GenerateFixedLenRandRecords(std::vector<DistributedDB::Entry> &entries,
333     std::vector<DistributedDB::Key> &allKeys, int recordNum, unsigned int keySize, unsigned int valSize)
334 {
335     entries.clear();
336     allKeys.clear();
337     int idx = 0;
338     DistributedDB::Entry entry;
339     std::vector<uint8_t> charSet;
340     GenerateCharSet(charSet);
341     for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
342         std::string cntStr = std::to_string(cnt);
343         int len = cntStr.length();
344         entry.key.push_back('k');
345         entry.value.push_back('v');
346         for (unsigned int operCnt = 0; operCnt < keySize - len - 1; ++operCnt) {
347             idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
348             entry.key.push_back(charSet[idx]);
349         }
350         for (unsigned int operCnt = 0; operCnt < valSize - len - 1; ++operCnt) {
351             idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
352             entry.value.push_back(charSet[idx]);
353         }
354         entry.key.insert(entry.key.end(), cntStr.begin(), cntStr.end());
355         entry.value.insert(entry.value.end(), cntStr.begin(), cntStr.end());
356 
357         allKeys.push_back(entry.key);
358         entries.push_back(entry);
359         entry.key.clear();
360         entry.value.clear();
361     }
362 }
363 
GenerateFixedLenRandRecords(std::vector<DistributedDB::Key> & allKeys,int recordNum,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)364 std::vector<DistributedDB::Entry> GenerateFixedLenRandRecords(std::vector<DistributedDB::Key> &allKeys,
365     int recordNum, const EntrySize &entrySize, const std::vector<uint8_t> &keyPrefix,
366     const std::vector<uint8_t> &valPrefix)
367 {
368     std::vector<DistributedDB::Entry> entries;
369     allKeys.clear();
370     int idx = 0;
371     DistributedDB::Entry entry;
372     std::vector<uint8_t> charSet;
373     GenerateCharSet(charSet);
374     for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
375         std::string cntStr = std::to_string(cnt);
376         int len = cntStr.length();
377         entry.key = keyPrefix;
378         entry.value = valPrefix;
379 
380         entry.key.insert(entry.key.end(), entrySize.keySize - keyPrefix.size() - len, '0');
381 
382         for (unsigned int operCnt = 0; operCnt < entrySize.valSize - valPrefix.size() - len; ++operCnt) {
383             idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
384             entry.value.push_back(charSet[idx]);
385         }
386 
387         entry.key.insert(entry.key.end(), cntStr.begin(), cntStr.end());
388         entry.value.insert(entry.value.end(), cntStr.begin(), cntStr.end());
389 
390         allKeys.push_back(entry.key);
391         entries.push_back(entry);
392         entry.key.clear();
393         entry.value.clear();
394     }
395     return entries;
396 }
397 
GetDbType(const int type)398 const std::string GetDbType(const int type)
399 {
400     switch(type) {
401         case UNENCRYPTED_DISK_DB:
402             return std::string("UnencrpytedDiskDB");
403         case ENCRYPTED_DISK_DB:
404             return std::string("EncrpytedDiskDB");
405         case MEMORY_DB:
406             return std::string("MemoryDB");
407         default:
408             return std::string("ErrorType");
409     }
410 }
411 
GenerateRandomRecords(std::vector<DistributedDB::Entry> & entries,EntrySize & entrySize,int num)412 void GenerateRandomRecords(std::vector<DistributedDB::Entry> &entries, EntrySize &entrySize, int num)
413 {
414     for (int index = 0; index < num; index++) {
415         DistributedDB::Entry entry;
416         entry.key.resize(entrySize.keySize);
417         RAND_bytes(entry.key.data(), entrySize.keySize);
418         entry.value.resize(entrySize.valSize);
419         RAND_bytes(entry.value.data(), entrySize.valSize);
420         entries.push_back(entry);
421     }
422 }
423 // Get long schema define with long default x or fields' num
GetLongSchemaDefine(LongDefine & param,std::string & longDefine)424 void GetLongSchemaDefine(LongDefine &param, std::string &longDefine)
425 {
426     std::vector<std::string> defaultStr;
427     std::string longString;
428     for (int index = 1; index <= param.recordNum; index++) {
429         std::string ind = std::to_string(index);
430         int len = ind.length();
431         if (param.recordSize < (len + 1)) {
432             MST_LOG("ERROR:The size of key or value given is too small!");
433             return;
434         }
435         longString.assign(param.recordSize - len, param.prefix);
436         longString.append(ind);
437         defaultStr.push_back(longString);
438         longString.clear();
439     }
440     longDefine.append("{");
441     for (int index = 0; index < param.recordNum; index++) {
442         longDefine = longDefine + "\"field" + std::to_string(index) + "\":" + "\"STRING,NOT NULL,DEFAULT " + \
443             "'" + defaultStr[index] + "'\",";
444     }
445     longDefine.erase(longDefine.size() - 1, 1);
446     longDefine.append("}");
447     MST_LOG("longDefine.size() is %zu", longDefine.size());
448 }
449 // splice different string to schema
SpliceToSchema(const std::string & version,const std::string & mode,const std::string & define,const std::string & index,const std::string & skipSize)450 const std::string SpliceToSchema(const std::string &version, const std::string &mode,
451     const std::string &define, const std::string &index, const std::string &skipSize)
452 {
453     std::string schema;
454     std::string middleString;
455     if (index.empty() && skipSize.empty()) {
456         middleString = "";
457     } else if (!index.empty() && skipSize.empty()) {
458         middleString = middleString + ",\"SCHEMA_INDEXES\":" + index;
459     } else if (index.empty() && !skipSize.empty()) {
460         middleString = middleString + ",\"SCHEMA_SKIPSIZE\":" + skipSize;
461     } else {
462         middleString = middleString + ",\"SCHEMA_INDEXES\":" + index + "," + "\"SCHEMA_SKIPSIZE\":" + skipSize;
463     }
464     schema = schema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + version + "\"" + "," +
465         "\"SCHEMA_MODE\":" + "\"" + mode + "\"" + "," +
466         "\"SCHEMA_DEFINE\"" + ":"  + define + middleString + "}";
467     return schema;
468 }
469 // the size of field is 64B, of DEFAULT x is 4K, all of them is valid
GenerateLongValidSchema(Schema & validSchema,std::vector<std::string> & schema)470 void GenerateLongValidSchema(Schema &validSchema, std::vector<std::string> &schema)
471 {
472     std::string validLongSchema;
473     LongDefine param;
474     param.recordNum = ONE_RECORD;
475     param.recordSize = FOUR_K_LONG_STRING;
476     param.prefix = 'k';
477     GetLongSchemaDefine(param, validLongSchema);
478     validLongSchema.replace(2, 6, KEY_SIXTYFOUR_BYTE, 'a'); // the 6 str starting at 2 is being replaced.
479     std::string splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
480         validLongSchema, validSchema.index.at(0));
481     schema.push_back(splicSchema);
482 }
483 // the num of field is 257 with repeat field name, the num of index is 32, but it is valid
GenerateLargeValidSchema(Schema & validSchema,std::vector<std::string> & schema)484 void GenerateLargeValidSchema(Schema &validSchema, std::vector<std::string> &schema)
485 {
486     std::string validLargeSchema;
487     LongDefine param;
488     param.recordNum = TWO_FIVE_SIX_RECORDS;
489     param.recordSize = KEY_SIX_BYTE;
490     param.prefix = 'k';
491     GetLongSchemaDefine(param, validLargeSchema);
492     validLargeSchema.erase(validLargeSchema.size() - 1, 1);
493     validLargeSchema.append(",\"field0\":\"STRING,NOT NULL,DEFAULT 'kkkkk1'\"}");
494     std::string splicSchema, largeIndexRes, largeIndex;
495     for (int index = 0; index < KEY_THIRTYTWO_BYTE; index++) {
496         largeIndexRes = largeIndexRes + "\"$.field" + std::to_string(index) + "\",";
497     }
498     largeIndexRes.erase(largeIndexRes.size() - 1, 1);
499     largeIndex = largeIndex + "[" + largeIndexRes + "]";
500     splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
501         validLargeSchema, largeIndex);
502     schema.push_back(splicSchema);
503 }
504 
GetValidSchema(Schema & validSchema,bool hasIndex)505 std::vector<std::string> GetValidSchema(Schema &validSchema, bool hasIndex)
506 {
507     std::vector<std::string> schema;
508     for (auto iter1 = validSchema.version.begin(); iter1 != validSchema.version.end(); iter1++) {
509         for (auto iter2 = validSchema.mode.begin(); iter2 != validSchema.mode.end(); iter2++) {
510             std::string splicSchema;
511             if (hasIndex) {
512                 for (auto iter3 = validSchema.index.begin(); iter3 != validSchema.index.end(); iter3++) {
513                     splicSchema = SpliceToSchema(*iter1, *iter2, validSchema.define.at(0), *iter3);
514                     schema.push_back(splicSchema);
515                 }
516             } else {
517                 for (auto iter3 = validSchema.define.begin(); iter3 != validSchema.define.end(); iter3++) {
518                     splicSchema = SpliceToSchema(*iter1, *iter2, *iter3, validSchema.index.at(0));
519                     schema.push_back(splicSchema);
520                 }
521             }
522         }
523     }
524     GenerateLongValidSchema(validSchema, schema);
525     GenerateLargeValidSchema(validSchema, schema);
526     std::string schemaWithoutIndex;
527     schemaWithoutIndex = schemaWithoutIndex + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
528         "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
529         "\"SCHEMA_DEFINE\"" + ":" + validSchema.define.at(0) + "}";
530     schema.push_back(schemaWithoutIndex);
531     MST_LOG("The number of valid schema is %zd", schema.size());
532     return schema;
533 }
534 
GetLongIndex(Schema & validSchema,std::vector<std::string> & schema)535 void GetLongIndex(Schema &validSchema, std::vector<std::string> &schema)
536 {
537     std::string validLargeSchema, largeIndexRes, largeIndex, splicSchema;
538     LongDefine param;
539     param.recordNum = FIFTY_RECORDS;
540     param.recordSize = KEY_SIX_BYTE;
541     param.prefix = 'k';
542     GetLongSchemaDefine(param, validLargeSchema);
543     for (int index = 0; index <= KEY_THIRTYTWO_BYTE; index++) {
544         largeIndexRes = largeIndexRes + "\"$.field" + std::to_string(index) + "\",";
545     }
546     largeIndexRes.erase(largeIndexRes.size() - 1, 1);
547     largeIndex = largeIndex + "[" + largeIndexRes + "]";
548     splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
549         validLargeSchema, largeIndex);
550     schema.push_back(splicSchema);
551 }
GenarateOtherInvalidSchema(Schema & validSchema,std::map<int,std::vector<std::string>> & result)552 void GenarateOtherInvalidSchema(Schema &validSchema, std::map<int, std::vector<std::string>> &result)
553 {
554     // exist no Metafield or lack of Metafield
555     std::string invalidSchema;
556     std::vector<std::string> schema;
557     invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
558         "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
559         "\"SCHEMA_DEFINE\"" + ":"  + validSchema.define.at(0) + "," +
560         "\"SCHEMA_NOTE\"" + ":" + "[]" + "}";
561     schema.push_back(invalidSchema);
562     invalidSchema.clear();
563     invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
564         "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "}";
565     schema.push_back(invalidSchema);
566 
567     // the schema is invalid that is a Json object
568     invalidSchema.clear();
569     invalidSchema = invalidSchema + "[" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
570         "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "]";
571     schema.push_back(invalidSchema);
572 
573     // if the schema is \",nullptr,space,tab or enter or other not match Json, it is invalid
574     schema.push_back("\"");
575     schema.push_back(" ");
576     schema.push_back("\t");
577     schema.push_back("\r");
578     invalidSchema.clear();
579     invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
580         "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
581         "\"SCHEMA_DEFINE\"" + ":"  + validSchema.define.at(0) + "," + "}";
582     schema.push_back(invalidSchema);
583 
584     // if the schema's size is over 512K, it is invalid
585     invalidSchema.clear();
586     LongDefine param;
587     param.recordNum = TWO_FIVE_SIX_RECORDS;
588     param.recordSize = TWO_K_LONG_STRING;
589     param.prefix = 'k';
590     GetLongSchemaDefine(param, invalidSchema);
591     std::string splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
592         invalidSchema, validSchema.index.at(0));
593     schema.push_back(splicSchema);
594 
595     // if the num of index is over 32, it is invalid
596     GetLongIndex(validSchema, schema);
597     result[4] = schema; // this invalid scenes' index is 4.
598 }
599 
GetInvalidSchema(Schema & invalidSchema,Schema & validSchema,bool hasIndex)600 std::map<int, std::vector<std::string>> GetInvalidSchema(Schema &invalidSchema, Schema &validSchema, bool hasIndex)
601 {
602     std::map<int, std::vector<std::string>> result;
603     std::vector<std::string> schema;
604     std::string splicSchema;
605     for (auto iter = invalidSchema.version.begin(); iter != invalidSchema.version.end(); iter++) {
606         splicSchema = SpliceToSchema(*iter, validSchema.mode.at(0), validSchema.define.at(0), validSchema.index.at(0));
607         schema.push_back(splicSchema);
608     }
609     result[0] = schema;
610     schema.clear();
611     for (auto iter = invalidSchema.mode.begin(); iter != invalidSchema.mode.end(); iter++) {
612         splicSchema = SpliceToSchema(validSchema.version.at(0), *iter, validSchema.define.at(0),
613             validSchema.index.at(0));
614         schema.push_back(splicSchema);
615     }
616     result[1] = schema;
617     schema.clear();
618     if (hasIndex) {
619         for (auto iter = invalidSchema.index.begin(); iter != invalidSchema.index.end(); iter++) {
620             splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
621                 validSchema.define.at(0), *iter);
622             schema.push_back(splicSchema);
623         }
624         result[3] = schema; // 3 is the invalid SCHEMA_INDEX.
625     } else {
626         for (auto iter = invalidSchema.define.begin(); iter != invalidSchema.define.end(); iter++) {
627             splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0), *iter, "[]");
628             schema.push_back(splicSchema);
629         }
630         result[2] = schema; // 2 is the invalid SCHEMA_DEFINE.
631     }
632     GenarateOtherInvalidSchema(validSchema, result);
633     MST_LOG("The number of invalid schema is %zd", result[0].size() + result[1].size() +
634         result[2].size() + result[3].size() + result[4].size()); // 1, 2, 3, 4 are the index of invalid field.
635     return result;
636 }
637 } // end of namespace DistributedDBDataGenerator