1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <random>
16 #include <openssl/rand.h>
17 #include "distributeddb_data_generator.h"
18
19 namespace DistributedDBDataGenerator {
GenerateRecord(unsigned int keyNo,DistributedDB::Entry & entry,std::vector<uint8_t> keyPrefix)20 void GenerateRecord(unsigned int keyNo, DistributedDB::Entry &entry,
21 std::vector<uint8_t> keyPrefix)
22 {
23 std::string cntStr = std::to_string(keyNo);
24 entry.key = keyPrefix;
25 entry.value = { 'v' };
26 for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
27 entry.key.push_back(*cntStrIt);
28 entry.value.push_back(*cntStrIt);
29 }
30 }
31
GenerateCharSet(std::vector<uint8_t> & charSet)32 void GenerateCharSet(std::vector<uint8_t> &charSet)
33 {
34 for (uint8_t ch = '0'; ch <= '9'; ch++) {
35 charSet.push_back(ch);
36 }
37 for (uint8_t ch = 'A'; ch <= 'Z'; ch++) {
38 charSet.push_back(ch);
39 }
40 for (uint8_t ch = 'a'; ch <= 'z'; ch++) {
41 charSet.push_back(ch);
42 }
43 }
44
GenerateAlphaNumUnderlineCharSet(std::vector<uint8_t> & charSet)45 void GenerateAlphaNumUnderlineCharSet(std::vector<uint8_t> &charSet)
46 {
47 GenerateCharSet(charSet);
48 charSet.push_back('_');
49 }
50
GenerateSpecialCharSet(std::vector<uint8_t> & charSet)51 void GenerateSpecialCharSet(std::vector<uint8_t> &charSet)
52 {
53 charSet.push_back('\\');
54 charSet.push_back('/');
55 charSet.push_back('&');
56 charSet.push_back('^');
57 charSet.push_back('%');
58 charSet.push_back('#');
59 charSet.push_back('-');
60 }
61
GenerateFixedLenRandString(unsigned int neededLen,RandType randType,std::string & genString)62 void GenerateFixedLenRandString(unsigned int neededLen, RandType randType, std::string &genString)
63 {
64 genString.clear();
65 std::vector<uint8_t> charSet;
66 if (randType == RandType::ALPHA_NUM) {
67 GenerateCharSet(charSet);
68 } else if (randType == RandType::ALPHA_NUM_UNDERLINE) {
69 GenerateAlphaNumUnderlineCharSet(charSet);
70 } else if (randType == RandType::SPECIAL) {
71 GenerateSpecialCharSet(charSet);
72 }
73
74 for (unsigned int index = 0; index < neededLen; ++index) {
75 // the randIdx range is from 0 to (charSet.length() - 1) which is the elements quantity of charSet
76 int randIdx = GetRandInt(0, charSet.size() - 1);
77 genString.push_back(charSet[randIdx]);
78 }
79 }
80
GenerateRandRecord(DistributedDB::Entry & entry,EntrySize & entrySize,unsigned int keyNo)81 void GenerateRandRecord(DistributedDB::Entry &entry, EntrySize &entrySize, unsigned int keyNo)
82 {
83 std::string cntStr = std::to_string(keyNo);
84 unsigned int len = cntStr.length();
85 if ((entrySize.keySize < len) || (entrySize.valSize < len)) {
86 MST_LOG("ERROR:The size of key or value given is too small!");
87 return;
88 }
89 std::vector<uint8_t> charSet;
90 GenerateCharSet(charSet);
91 for (unsigned int i = 0; i < entrySize.keySize - len; i++) {
92 int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
93 entry.key.push_back(charSet[seed]);
94 }
95 if (entrySize.valSize < ONE_K_LONG_STRING) {
96 for (unsigned int i = 0; i < entrySize.valSize - len; i++) {
97 int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
98 entry.value.assign(entrySize.valSize - len, charSet[seed]);
99 }
100 } else {
101 int seed = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
102 entry.value.assign(entrySize.valSize - len, charSet[seed]);
103 }
104 for (auto ch = cntStr.begin(); ch != cntStr.end(); ch++) {
105 entry.key.push_back(*ch);
106 entry.value.push_back(*ch);
107 }
108 }
109
GenerateLongRecord(unsigned int keyNo,DistributedDB::Entry & entry,const std::vector<uint8_t> & keyPrefix)110 void GenerateLongRecord(unsigned int keyNo, DistributedDB::Entry &entry,
111 const std::vector<uint8_t> &keyPrefix)
112 {
113 std::string cntStr = std::to_string(keyNo);
114 entry.key = keyPrefix;
115 entry.value.assign(ONE_K_LONG_STRING, 'v');
116 for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
117 entry.key.push_back(*cntStrIt);
118 entry.value.push_back(*cntStrIt);
119 }
120 }
121
GenerateRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch,const std::vector<uint8_t> keyPrifix)122 void GenerateRecords(unsigned int recordNum, unsigned int start, std::vector<DistributedDB::Key> &allKeys,
123 std::vector<DistributedDB::Entry> &entriesBatch, const std::vector<uint8_t> keyPrifix)
124 {
125 DistributedDB::Entry entryCurrent;
126 for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
127 GenerateRecord(cnt, entryCurrent, keyPrifix);
128 allKeys.push_back(entryCurrent.key);
129 entriesBatch.push_back(entryCurrent);
130 }
131 }
132
GenerateMaxBigRecord(unsigned int keyNo,DistributedDB::Entry & entry,const std::vector<uint8_t> & keyPrefix,unsigned int num)133 void GenerateMaxBigRecord(unsigned int keyNo, DistributedDB::Entry &entry,
134 const std::vector<uint8_t> &keyPrefix, unsigned int num)
135 {
136 std::string cntStr = std::to_string(keyNo);
137 entry.key = keyPrefix;
138 entry.value.assign(FOUR_M_LONG_STRING, ('v' - num));
139 for (auto cntStrIt = cntStr.begin(); cntStrIt != cntStr.end(); ++cntStrIt) {
140 entry.key.push_back(*cntStrIt);
141 }
142 std::vector<uint8_t> keyTail;
143 keyTail.assign(ONE_K_LONG_STRING - entry.key.size(), ('k' + num));
144 for (auto iter = keyTail.begin(); iter != keyTail.end(); ++iter) {
145 entry.key.push_back(*iter);
146 }
147 }
148
GenerateMaxBigRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch)149 bool GenerateMaxBigRecords(unsigned int recordNum, unsigned int start,
150 std::vector<DistributedDB::Key> &allKeys, std::vector<DistributedDB::Entry> &entriesBatch)
151 {
152 if (recordNum > (255 - 'k')) { // acs ii code has 255 element
153 MST_LOG("Record generate failed, character is over ASCII, please use other method !");
154 return false;
155 } else {
156 DistributedDB::Entry entryCurrent;
157 for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
158 GenerateMaxBigRecord(cnt, entryCurrent, K_SEARCH_3, (cnt - start));
159 allKeys.push_back(entryCurrent.key);
160 entriesBatch.push_back(entryCurrent);
161 }
162 }
163 return true;
164 }
165
GenerateTenThousandRecords(unsigned int recordNum,unsigned int start,std::vector<DistributedDB::Key> & allKeys,std::vector<DistributedDB::Entry> & entriesBatch)166 void GenerateTenThousandRecords(unsigned int recordNum, unsigned int start,
167 std::vector<DistributedDB::Key> &allKeys, std::vector<DistributedDB::Entry> &entriesBatch)
168 {
169 DistributedDB::Entry entryCurrent;
170 for (unsigned int cnt = start; cnt < start + recordNum; ++cnt) {
171 GenerateLongRecord(cnt, entryCurrent, K_SEARCH_5);
172 allKeys.push_back(entryCurrent.key);
173 entriesBatch.push_back(entryCurrent);
174 }
175 }
176
GenerateNormalAsciiRecords(DistributedDB::Entry & entry)177 void GenerateNormalAsciiRecords(DistributedDB::Entry &entry)
178 {
179 entry.key.clear();
180 entry.value.clear();
181 for (uint8_t lowc = 'a'; lowc <= 'z'; ++lowc) {
182 entry.key.push_back(lowc);
183 entry.value.push_back(lowc);
184 }
185 for (uint8_t bigc = 'A'; bigc <= 'Z'; ++bigc) {
186 entry.key.push_back(bigc);
187 entry.value.push_back(bigc);
188 }
189 for (uint8_t numc = '0'; numc <= '9'; ++numc) {
190 entry.key.push_back(numc);
191 entry.value.push_back(numc);
192 }
193 }
194
GenerateFullAsciiRecords(DistributedDB::Entry & entry)195 void GenerateFullAsciiRecords(DistributedDB::Entry &entry)
196 {
197 entry.key.clear();
198 entry.value.clear();
199 for (uint8_t lowc = 255; lowc > 0; --lowc) { // acs ii code has 255 element
200 entry.key.push_back(lowc);
201 entry.value.push_back(lowc);
202 }
203 }
204
GenerateBiggistKeyRecords(DistributedDB::Entry & entry)205 void GenerateBiggistKeyRecords(DistributedDB::Entry &entry)
206 {
207 entry.key.clear();
208 entry.value.clear();
209 for (auto lowc = ONE_K_LONG_STRING; lowc > 0; --lowc) {
210 entry.key.push_back(lowc);
211 }
212 entry.value.push_back('v');
213 }
214
GenerateFixedLenKVRecord(unsigned int serialNo,unsigned int keyLen,uint8_t keyFilledChr,unsigned int valueLen,uint8_t valueFilledChr)215 DistributedDB::Entry GenerateFixedLenKVRecord(unsigned int serialNo, unsigned int keyLen, uint8_t keyFilledChr,
216 unsigned int valueLen, uint8_t valueFilledChr)
217 {
218 DistributedDB::Entry entry;
219 std::string serialNoStr = std::to_string(serialNo);
220 entry.key.assign(keyLen - serialNoStr.length(), keyFilledChr);
221 entry.value.assign(valueLen - serialNoStr.length(), valueFilledChr);
222 for (unsigned int index = 0; index < serialNoStr.size(); ++index) {
223 entry.key.push_back(serialNoStr[index]);
224 entry.value.push_back(serialNoStr[index]);
225 }
226 return entry;
227 }
228
GenerateFixedRecords(std::vector<DistributedDB::Entry> & entries,std::vector<DistributedDB::Key> & allKeys,int recordNum,unsigned int keySize,unsigned int valSize)229 void GenerateFixedRecords(std::vector<DistributedDB::Entry> &entries,
230 std::vector<DistributedDB::Key> &allKeys, int recordNum, unsigned int keySize, unsigned int valSize)
231 {
232 DistributedDB::Entry entry;
233 for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
234 std::string cntStr = std::to_string(cnt);
235 int len = cntStr.length();
236 entry.key.assign((keySize - len), 'k');
237 entry.value.assign((valSize - len), 'v');
238 for (auto cntIt = cntStr.begin(); cntIt != cntStr.end(); ++cntIt) {
239 entry.key.push_back(*cntIt);
240 entry.value.push_back(*cntIt);
241 }
242 allKeys.push_back(entry.key);
243 entries.push_back(entry);
244 entry.key.clear();
245 entry.value.clear();
246 }
247 }
248
GenerateOneRecordForImage(int entryNo,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & val,DistributedDB::Entry & entry)249 void GenerateOneRecordForImage(int entryNo, const EntrySize &entrySize,
250 const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &val, DistributedDB::Entry &entry)
251 {
252 std::vector<uint8_t> charSet;
253 GenerateCharSet(charSet);
254 std::string ind = std::to_string(entryNo);
255 unsigned int len = ind.length();
256 for (auto ch = IMAGE_VALUE_PRE.begin(); ch != IMAGE_VALUE_PRE.end(); ch++) {
257 entry.value.push_back(*ch);
258 }
259 if ((entrySize.keySize < len) || (entrySize.valSize < (len + IMAGE_VALUE_PRE.size()))) {
260 MST_LOG("ERROR:The size of key or value given is too small!");
261 return;
262 }
263 entry.key = keyPrefix;
264 for (unsigned int cnt = 0; cnt < (entrySize.keySize - len - keyPrefix.size()); cnt++) {
265 entry.key.push_back(charSet[GetRandInt(0, 61)]); // randrom in 61 of 0-9,A-Z,a-z.
266 }
267 for (unsigned int it = 0; it < (entrySize.valSize - len - IMAGE_VALUE_PRE.size()); it++) {
268 entry.value.push_back(val[0]);
269 }
270 entry.key.insert(entry.key.end(), ind.begin(), ind.end());
271 entry.value.insert(entry.value.end(), ind.begin(), ind.end());
272 }
273
GenerateRecordsForImage(std::vector<DistributedDB::Entry> & entries,EntrySize & entrySize,int num,std::vector<uint8_t> keyPrefix,std::vector<uint8_t> val)274 void GenerateRecordsForImage(std::vector<DistributedDB::Entry> &entries, EntrySize &entrySize,
275 int num, std::vector<uint8_t> keyPrefix, std::vector<uint8_t> val)
276 {
277 for (int index = 1; index <= num; index++) {
278 DistributedDB::Entry entry;
279 GenerateOneRecordForImage(index, entrySize, keyPrefix, val, entry);
280 entries.push_back(entry);
281 }
282 }
283
GenerateAppointPrefixAndSizeRecord(int recordNo,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix,DistributedDB::Entry & entry)284 void GenerateAppointPrefixAndSizeRecord(int recordNo, const EntrySize &entrySize,
285 const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix, DistributedDB::Entry &entry)
286 {
287 std::string recNo = std::to_string(recordNo);
288 unsigned int len = recNo.length();
289 if ((entrySize.keySize < keyPrefix.size() + len) || (entrySize.valSize < valPrefix.size() + len)) {
290 MST_LOG("ERROR:The size of key or value given is too small!");
291 return;
292 }
293 entry.key = keyPrefix;
294 entry.value = valPrefix;
295 entry.key.insert(entry.key.end(), entrySize.keySize - keyPrefix.size() - len, '0');
296 entry.value.insert(entry.value.end(), entrySize.valSize - valPrefix.size() - len, '0');
297
298 entry.key.insert(entry.key.end(), recNo.begin(), recNo.end());
299 entry.value.insert(entry.value.end(), recNo.begin(), recNo.end());
300 }
301
GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> & entries,const EntrySize & entrySize,int num,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)302 void GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> &entries, const EntrySize &entrySize,
303 int num, const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix)
304 {
305 entries.clear();
306 DistributedDB::Entry entry;
307 for (int index = 1; index <= num; index++) {
308 GenerateAppointPrefixAndSizeRecord(index, entrySize, keyPrefix, valPrefix, entry);
309 entries.push_back(entry);
310 }
311 }
312
GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> & entries,int startpoint,const NumberSize param,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)313 void GenerateAppointPrefixAndSizeRecords(std::vector<DistributedDB::Entry> &entries, int startpoint,
314 const NumberSize param, const std::vector<uint8_t> &keyPrefix, const std::vector<uint8_t> &valPrefix)
315 {
316 entries.clear();
317 DistributedDB::Entry entry;
318 for (int index = startpoint; index < startpoint + param.recordsNumber; index++) {
319 GenerateAppointPrefixAndSizeRecord(index, param.entrySize, keyPrefix, valPrefix, entry);
320 entries.push_back(entry);
321 }
322 }
323
GetRandInt(const int randMin,const int randMax)324 int GetRandInt(const int randMin, const int randMax)
325 {
326 std::random_device randDev;
327 std::mt19937 genRand(randDev());
328 std::uniform_int_distribution<int> disRand(randMin, randMax);
329 return disRand(genRand);
330 }
331
GenerateFixedLenRandRecords(std::vector<DistributedDB::Entry> & entries,std::vector<DistributedDB::Key> & allKeys,int recordNum,unsigned int keySize,unsigned int valSize)332 void GenerateFixedLenRandRecords(std::vector<DistributedDB::Entry> &entries,
333 std::vector<DistributedDB::Key> &allKeys, int recordNum, unsigned int keySize, unsigned int valSize)
334 {
335 entries.clear();
336 allKeys.clear();
337 int idx = 0;
338 DistributedDB::Entry entry;
339 std::vector<uint8_t> charSet;
340 GenerateCharSet(charSet);
341 for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
342 std::string cntStr = std::to_string(cnt);
343 int len = cntStr.length();
344 entry.key.push_back('k');
345 entry.value.push_back('v');
346 for (unsigned int operCnt = 0; operCnt < keySize - len - 1; ++operCnt) {
347 idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
348 entry.key.push_back(charSet[idx]);
349 }
350 for (unsigned int operCnt = 0; operCnt < valSize - len - 1; ++operCnt) {
351 idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
352 entry.value.push_back(charSet[idx]);
353 }
354 entry.key.insert(entry.key.end(), cntStr.begin(), cntStr.end());
355 entry.value.insert(entry.value.end(), cntStr.begin(), cntStr.end());
356
357 allKeys.push_back(entry.key);
358 entries.push_back(entry);
359 entry.key.clear();
360 entry.value.clear();
361 }
362 }
363
GenerateFixedLenRandRecords(std::vector<DistributedDB::Key> & allKeys,int recordNum,const EntrySize & entrySize,const std::vector<uint8_t> & keyPrefix,const std::vector<uint8_t> & valPrefix)364 std::vector<DistributedDB::Entry> GenerateFixedLenRandRecords(std::vector<DistributedDB::Key> &allKeys,
365 int recordNum, const EntrySize &entrySize, const std::vector<uint8_t> &keyPrefix,
366 const std::vector<uint8_t> &valPrefix)
367 {
368 std::vector<DistributedDB::Entry> entries;
369 allKeys.clear();
370 int idx = 0;
371 DistributedDB::Entry entry;
372 std::vector<uint8_t> charSet;
373 GenerateCharSet(charSet);
374 for (int cnt = DEFAULT_START; cnt <= recordNum; ++cnt) {
375 std::string cntStr = std::to_string(cnt);
376 int len = cntStr.length();
377 entry.key = keyPrefix;
378 entry.value = valPrefix;
379
380 entry.key.insert(entry.key.end(), entrySize.keySize - keyPrefix.size() - len, '0');
381
382 for (unsigned int operCnt = 0; operCnt < entrySize.valSize - valPrefix.size() - len; ++operCnt) {
383 idx = GetRandInt(0, 61); // the seed range is from 0 to 61 which is the elements quantity of charSet
384 entry.value.push_back(charSet[idx]);
385 }
386
387 entry.key.insert(entry.key.end(), cntStr.begin(), cntStr.end());
388 entry.value.insert(entry.value.end(), cntStr.begin(), cntStr.end());
389
390 allKeys.push_back(entry.key);
391 entries.push_back(entry);
392 entry.key.clear();
393 entry.value.clear();
394 }
395 return entries;
396 }
397
GetDbType(const int type)398 const std::string GetDbType(const int type)
399 {
400 switch(type) {
401 case UNENCRYPTED_DISK_DB:
402 return std::string("UnencrpytedDiskDB");
403 case ENCRYPTED_DISK_DB:
404 return std::string("EncrpytedDiskDB");
405 case MEMORY_DB:
406 return std::string("MemoryDB");
407 default:
408 return std::string("ErrorType");
409 }
410 }
411
GenerateRandomRecords(std::vector<DistributedDB::Entry> & entries,EntrySize & entrySize,int num)412 void GenerateRandomRecords(std::vector<DistributedDB::Entry> &entries, EntrySize &entrySize, int num)
413 {
414 for (int index = 0; index < num; index++) {
415 DistributedDB::Entry entry;
416 entry.key.resize(entrySize.keySize);
417 RAND_bytes(entry.key.data(), entrySize.keySize);
418 entry.value.resize(entrySize.valSize);
419 RAND_bytes(entry.value.data(), entrySize.valSize);
420 entries.push_back(entry);
421 }
422 }
423 // Get long schema define with long default x or fields' num
GetLongSchemaDefine(LongDefine & param,std::string & longDefine)424 void GetLongSchemaDefine(LongDefine ¶m, std::string &longDefine)
425 {
426 std::vector<std::string> defaultStr;
427 std::string longString;
428 for (int index = 1; index <= param.recordNum; index++) {
429 std::string ind = std::to_string(index);
430 int len = ind.length();
431 if (param.recordSize < (len + 1)) {
432 MST_LOG("ERROR:The size of key or value given is too small!");
433 return;
434 }
435 longString.assign(param.recordSize - len, param.prefix);
436 longString.append(ind);
437 defaultStr.push_back(longString);
438 longString.clear();
439 }
440 longDefine.append("{");
441 for (int index = 0; index < param.recordNum; index++) {
442 longDefine = longDefine + "\"field" + std::to_string(index) + "\":" + "\"STRING,NOT NULL,DEFAULT " + \
443 "'" + defaultStr[index] + "'\",";
444 }
445 longDefine.erase(longDefine.size() - 1, 1);
446 longDefine.append("}");
447 MST_LOG("longDefine.size() is %zu", longDefine.size());
448 }
449 // splice different string to schema
SpliceToSchema(const std::string & version,const std::string & mode,const std::string & define,const std::string & index,const std::string & skipSize)450 const std::string SpliceToSchema(const std::string &version, const std::string &mode,
451 const std::string &define, const std::string &index, const std::string &skipSize)
452 {
453 std::string schema;
454 std::string middleString;
455 if (index.empty() && skipSize.empty()) {
456 middleString = "";
457 } else if (!index.empty() && skipSize.empty()) {
458 middleString = middleString + ",\"SCHEMA_INDEXES\":" + index;
459 } else if (index.empty() && !skipSize.empty()) {
460 middleString = middleString + ",\"SCHEMA_SKIPSIZE\":" + skipSize;
461 } else {
462 middleString = middleString + ",\"SCHEMA_INDEXES\":" + index + "," + "\"SCHEMA_SKIPSIZE\":" + skipSize;
463 }
464 schema = schema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + version + "\"" + "," +
465 "\"SCHEMA_MODE\":" + "\"" + mode + "\"" + "," +
466 "\"SCHEMA_DEFINE\"" + ":" + define + middleString + "}";
467 return schema;
468 }
469 // the size of field is 64B, of DEFAULT x is 4K, all of them is valid
GenerateLongValidSchema(Schema & validSchema,std::vector<std::string> & schema)470 void GenerateLongValidSchema(Schema &validSchema, std::vector<std::string> &schema)
471 {
472 std::string validLongSchema;
473 LongDefine param;
474 param.recordNum = ONE_RECORD;
475 param.recordSize = FOUR_K_LONG_STRING;
476 param.prefix = 'k';
477 GetLongSchemaDefine(param, validLongSchema);
478 validLongSchema.replace(2, 6, KEY_SIXTYFOUR_BYTE, 'a'); // the 6 str starting at 2 is being replaced.
479 std::string splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
480 validLongSchema, validSchema.index.at(0));
481 schema.push_back(splicSchema);
482 }
483 // the num of field is 257 with repeat field name, the num of index is 32, but it is valid
GenerateLargeValidSchema(Schema & validSchema,std::vector<std::string> & schema)484 void GenerateLargeValidSchema(Schema &validSchema, std::vector<std::string> &schema)
485 {
486 std::string validLargeSchema;
487 LongDefine param;
488 param.recordNum = TWO_FIVE_SIX_RECORDS;
489 param.recordSize = KEY_SIX_BYTE;
490 param.prefix = 'k';
491 GetLongSchemaDefine(param, validLargeSchema);
492 validLargeSchema.erase(validLargeSchema.size() - 1, 1);
493 validLargeSchema.append(",\"field0\":\"STRING,NOT NULL,DEFAULT 'kkkkk1'\"}");
494 std::string splicSchema, largeIndexRes, largeIndex;
495 for (int index = 0; index < KEY_THIRTYTWO_BYTE; index++) {
496 largeIndexRes = largeIndexRes + "\"$.field" + std::to_string(index) + "\",";
497 }
498 largeIndexRes.erase(largeIndexRes.size() - 1, 1);
499 largeIndex = largeIndex + "[" + largeIndexRes + "]";
500 splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
501 validLargeSchema, largeIndex);
502 schema.push_back(splicSchema);
503 }
504
GetValidSchema(Schema & validSchema,bool hasIndex)505 std::vector<std::string> GetValidSchema(Schema &validSchema, bool hasIndex)
506 {
507 std::vector<std::string> schema;
508 for (auto iter1 = validSchema.version.begin(); iter1 != validSchema.version.end(); iter1++) {
509 for (auto iter2 = validSchema.mode.begin(); iter2 != validSchema.mode.end(); iter2++) {
510 std::string splicSchema;
511 if (hasIndex) {
512 for (auto iter3 = validSchema.index.begin(); iter3 != validSchema.index.end(); iter3++) {
513 splicSchema = SpliceToSchema(*iter1, *iter2, validSchema.define.at(0), *iter3);
514 schema.push_back(splicSchema);
515 }
516 } else {
517 for (auto iter3 = validSchema.define.begin(); iter3 != validSchema.define.end(); iter3++) {
518 splicSchema = SpliceToSchema(*iter1, *iter2, *iter3, validSchema.index.at(0));
519 schema.push_back(splicSchema);
520 }
521 }
522 }
523 }
524 GenerateLongValidSchema(validSchema, schema);
525 GenerateLargeValidSchema(validSchema, schema);
526 std::string schemaWithoutIndex;
527 schemaWithoutIndex = schemaWithoutIndex + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
528 "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
529 "\"SCHEMA_DEFINE\"" + ":" + validSchema.define.at(0) + "}";
530 schema.push_back(schemaWithoutIndex);
531 MST_LOG("The number of valid schema is %zd", schema.size());
532 return schema;
533 }
534
GetLongIndex(Schema & validSchema,std::vector<std::string> & schema)535 void GetLongIndex(Schema &validSchema, std::vector<std::string> &schema)
536 {
537 std::string validLargeSchema, largeIndexRes, largeIndex, splicSchema;
538 LongDefine param;
539 param.recordNum = FIFTY_RECORDS;
540 param.recordSize = KEY_SIX_BYTE;
541 param.prefix = 'k';
542 GetLongSchemaDefine(param, validLargeSchema);
543 for (int index = 0; index <= KEY_THIRTYTWO_BYTE; index++) {
544 largeIndexRes = largeIndexRes + "\"$.field" + std::to_string(index) + "\",";
545 }
546 largeIndexRes.erase(largeIndexRes.size() - 1, 1);
547 largeIndex = largeIndex + "[" + largeIndexRes + "]";
548 splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
549 validLargeSchema, largeIndex);
550 schema.push_back(splicSchema);
551 }
GenarateOtherInvalidSchema(Schema & validSchema,std::map<int,std::vector<std::string>> & result)552 void GenarateOtherInvalidSchema(Schema &validSchema, std::map<int, std::vector<std::string>> &result)
553 {
554 // exist no Metafield or lack of Metafield
555 std::string invalidSchema;
556 std::vector<std::string> schema;
557 invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
558 "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
559 "\"SCHEMA_DEFINE\"" + ":" + validSchema.define.at(0) + "," +
560 "\"SCHEMA_NOTE\"" + ":" + "[]" + "}";
561 schema.push_back(invalidSchema);
562 invalidSchema.clear();
563 invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
564 "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "}";
565 schema.push_back(invalidSchema);
566
567 // the schema is invalid that is a Json object
568 invalidSchema.clear();
569 invalidSchema = invalidSchema + "[" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
570 "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "]";
571 schema.push_back(invalidSchema);
572
573 // if the schema is \",nullptr,space,tab or enter or other not match Json, it is invalid
574 schema.push_back("\"");
575 schema.push_back(" ");
576 schema.push_back("\t");
577 schema.push_back("\r");
578 invalidSchema.clear();
579 invalidSchema = invalidSchema + "{" + "\"SCHEMA_VERSION\"" + ":" + "\"" + validSchema.version.at(0) +
580 "\"" + "," + "\"SCHEMA_MODE\"" + ":" + "\"" + validSchema.mode.at(0) + "\"" + "," +
581 "\"SCHEMA_DEFINE\"" + ":" + validSchema.define.at(0) + "," + "}";
582 schema.push_back(invalidSchema);
583
584 // if the schema's size is over 512K, it is invalid
585 invalidSchema.clear();
586 LongDefine param;
587 param.recordNum = TWO_FIVE_SIX_RECORDS;
588 param.recordSize = TWO_K_LONG_STRING;
589 param.prefix = 'k';
590 GetLongSchemaDefine(param, invalidSchema);
591 std::string splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
592 invalidSchema, validSchema.index.at(0));
593 schema.push_back(splicSchema);
594
595 // if the num of index is over 32, it is invalid
596 GetLongIndex(validSchema, schema);
597 result[4] = schema; // this invalid scenes' index is 4.
598 }
599
GetInvalidSchema(Schema & invalidSchema,Schema & validSchema,bool hasIndex)600 std::map<int, std::vector<std::string>> GetInvalidSchema(Schema &invalidSchema, Schema &validSchema, bool hasIndex)
601 {
602 std::map<int, std::vector<std::string>> result;
603 std::vector<std::string> schema;
604 std::string splicSchema;
605 for (auto iter = invalidSchema.version.begin(); iter != invalidSchema.version.end(); iter++) {
606 splicSchema = SpliceToSchema(*iter, validSchema.mode.at(0), validSchema.define.at(0), validSchema.index.at(0));
607 schema.push_back(splicSchema);
608 }
609 result[0] = schema;
610 schema.clear();
611 for (auto iter = invalidSchema.mode.begin(); iter != invalidSchema.mode.end(); iter++) {
612 splicSchema = SpliceToSchema(validSchema.version.at(0), *iter, validSchema.define.at(0),
613 validSchema.index.at(0));
614 schema.push_back(splicSchema);
615 }
616 result[1] = schema;
617 schema.clear();
618 if (hasIndex) {
619 for (auto iter = invalidSchema.index.begin(); iter != invalidSchema.index.end(); iter++) {
620 splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0),
621 validSchema.define.at(0), *iter);
622 schema.push_back(splicSchema);
623 }
624 result[3] = schema; // 3 is the invalid SCHEMA_INDEX.
625 } else {
626 for (auto iter = invalidSchema.define.begin(); iter != invalidSchema.define.end(); iter++) {
627 splicSchema = SpliceToSchema(validSchema.version.at(0), validSchema.mode.at(0), *iter, "[]");
628 schema.push_back(splicSchema);
629 }
630 result[2] = schema; // 2 is the invalid SCHEMA_DEFINE.
631 }
632 GenarateOtherInvalidSchema(validSchema, result);
633 MST_LOG("The number of invalid schema is %zd", result[0].size() + result[1].size() +
634 result[2].size() + result[3].size() + result[4].size()); // 1, 2, 3, 4 are the index of invalid field.
635 return result;
636 }
637 } // end of namespace DistributedDBDataGenerator