1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "blocks_diff.h"
17 #include "scope_guard.h"
18 #include <cstdio>
19 #include <iostream>
20 #include <vector>
21 #include "update_diff.h"
22
23 using namespace Hpackage;
24 using namespace std;
25 using namespace Updater;
26
27 namespace UpdatePatch {
28 #define SWAP(a, b) auto swapTmp = (a); (a) = (b); (b) = swapTmp
29 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
30 #define SET_BUFFER(y, buffer, index) \
31 (buffer)[index] = static_cast<uint8_t>((y) % 256); (y) -= (buffer)[index]; (y) = (y) / 256
32
33 constexpr uint32_t BUCKET_SIZE = 256;
34 constexpr uint32_t MULTIPLE_TWO = 2;
35 constexpr int64_t BLOCK_SCORE = 8;
36 constexpr int64_t MIN_LENGTH = 16;
37
WriteLE64(const BlockBuffer & buffer,int64_t value)38 static void WriteLE64(const BlockBuffer &buffer, int64_t value)
39 {
40 int32_t index = 0;
41 auto y = static_cast<uint64_t>((value < 0) ? -value : value);
42 SET_BUFFER(y, buffer.buffer, index);
43 index++;
44 SET_BUFFER(y, buffer.buffer, index);
45 index++;
46 SET_BUFFER(y, buffer.buffer, index);
47 index++;
48 SET_BUFFER(y, buffer.buffer, index);
49 index++;
50 SET_BUFFER(y, buffer.buffer, index);
51 index++;
52 SET_BUFFER(y, buffer.buffer, index);
53 index++;
54 SET_BUFFER(y, buffer.buffer, index);
55 index++;
56 SET_BUFFER(y, buffer.buffer, index);
57 if (value < 0) {
58 buffer.buffer[index] |= 0x80;
59 }
60 }
61
MakePatch(const std::string & oldFileName,const std::string & newFileName,const std::string & patchFileName)62 int32_t BlocksDiff::MakePatch(const std::string &oldFileName, const std::string &newFileName,
63 const std::string &patchFileName)
64 {
65 PATCH_LOGI("BlocksDiff::MakePatch %s ", patchFileName.c_str());
66 std::fstream patchFile(patchFileName, std::ios::out | std::ios::trunc | std::ios::binary);
67 if (patchFile.fail()) {
68 PATCH_LOGE("Failed to open %s", strerror(errno));
69 return -1;
70 }
71
72 MemMapInfo oldBuffer {};
73 MemMapInfo newBuffer {};
74 int32_t ret = PatchMapFile(oldFileName, oldBuffer);
75 int32_t ret1 = PatchMapFile(newFileName, newBuffer);
76 if (ret != 0 || ret1 != 0) {
77 PATCH_LOGE("Failed to open %s", newFileName.c_str());
78 return -1;
79 }
80 BlockBuffer newInfo = {newBuffer.memory, newBuffer.length};
81 BlockBuffer oldInfo = {oldBuffer.memory, oldBuffer.length};
82 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(patchFile, 0);
83 size_t patchSize = 0;
84 ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
85 if (ret != PATCH_SUCCESS) {
86 PATCH_LOGE("Failed to generate patch");
87 return ret;
88 }
89 PATCH_DEBUG("MakePatch %zu", static_cast<size_t>(patchFile.tellp()));
90 patchFile.close();
91 PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
92 return ret;
93 }
94
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<uint8_t> & patchData,size_t offset,size_t & patchSize)95 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
96 const BlockBuffer &oldInfo, std::vector<uint8_t> &patchData, size_t offset, size_t &patchSize)
97 {
98 if (patchData.empty()) {
99 patchData.resize(IGMDIFF_LIMIT_UNIT);
100 }
101 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksBufferDiff>(patchData, offset);
102 int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
103 if (ret != PATCH_SUCCESS) {
104 PATCH_LOGE("Failed to generate patch");
105 return ret;
106 }
107 if (patchData.size() < patchSize) {
108 PATCH_LOGE("Failed to make block patch");
109 return -1;
110 }
111 patchData.resize(patchSize);
112 PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
113 return ret;
114 }
115
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::fstream & patchFile,size_t & patchSize)116 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
117 const BlockBuffer &oldInfo, std::fstream &patchFile, size_t &patchSize)
118 {
119 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(
120 patchFile, static_cast<size_t>(patchFile.tellp()));
121 int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
122 if (ret != PATCH_SUCCESS) {
123 PATCH_LOGE("Failed to generate patch");
124 return ret;
125 }
126 PATCH_LOGI("BlocksDiff::MakePatch success %zu patchFile %zu",
127 patchSize, static_cast<size_t>(patchFile.tellp()));
128 return ret;
129 }
130
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,size_t & patchSize)131 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo, const BlockBuffer &oldInfo, size_t &patchSize)
132 {
133 if (suffixArray_ == nullptr) {
134 suffixArray_.reset(new SuffixArray<int32_t>());
135 if (suffixArray_ == nullptr) {
136 PATCH_LOGE("Failed to create SuffixArray");
137 return -1;
138 }
139 suffixArray_->Init(oldInfo);
140 }
141
142 std::vector<ControlData> controlDatas;
143 int32_t ret = GetCtrlDatas(newInfo, oldInfo, controlDatas);
144 if (ret != 0) {
145 PATCH_LOGE("Failed to get control data");
146 return ret;
147 }
148
149 return WritePatchData(controlDatas, newInfo, patchSize);
150 }
151
WritePatchData(const std::vector<ControlData> & controlDatas,const BlockBuffer & newInfo,size_t & patchSize)152 int32_t BlocksDiff::WritePatchData(const std::vector<ControlData> &controlDatas,
153 const BlockBuffer &newInfo, size_t &patchSize)
154 {
155 patchSize = 0;
156 int32_t ret = WritePatchHeader(0, 0, 0, patchSize);
157 if (ret != 0) {
158 PATCH_LOGE("Failed to write patch header");
159 return ret;
160 }
161
162 size_t controlSize = patchSize;
163 ret = WriteControlData(controlDatas, patchSize);
164 if (ret != BZ_OK) {
165 PATCH_LOGE("Failed to write control data");
166 return ret;
167 }
168 controlSize = patchSize - controlSize;
169
170 // 写diff数据
171 size_t diffDataSize = patchSize;
172 ret = WriteDiffData(controlDatas, patchSize);
173 if (ret != BZ_OK) {
174 PATCH_LOGE("Failed to write diff data");
175 return ret;
176 }
177 diffDataSize = patchSize - diffDataSize;
178
179 size_t extraDataSize = patchSize;
180 ret = WriteExtraData(controlDatas, patchSize);
181 if (ret != BZ_OK) {
182 PATCH_LOGE("Failed to write extra data");
183 return ret;
184 }
185 extraDataSize = patchSize - extraDataSize;
186
187 // write real data
188 size_t headerLen = 0;
189 ret = WritePatchHeader(controlSize, diffDataSize, newInfo.length, headerLen);
190 if (ret != 0) {
191 PATCH_LOGE("Failed to write patch header");
192 return ret;
193 }
194 PATCH_DEBUG("MakePatch success patchSize:%zu controlSize:%zu diffDataSize:%zu, extraDataSize:%zu",
195 patchSize, controlSize, diffDataSize, extraDataSize);
196 return 0;
197 }
198
CreateBZip2Adapter(size_t patchOffset)199 std::unique_ptr<DeflateAdapter> BlocksBufferDiff::CreateBZip2Adapter(size_t patchOffset)
200 {
201 std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZipBuffer2Adapter>(patchData_,
202 offset_ + patchOffset);
203 if (bzip2Adapter == nullptr) {
204 PATCH_LOGE("Failed to create bzip2Adapter");
205 return nullptr;
206 }
207 bzip2Adapter->Open();
208 return bzip2Adapter;
209 }
210
CreateBZip2Adapter(size_t patchOffset)211 std::unique_ptr<DeflateAdapter> BlocksStreamDiff::CreateBZip2Adapter(size_t patchOffset)
212 {
213 std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZip2StreamAdapter>(stream_);
214 if (bzip2Adapter == nullptr) {
215 PATCH_LOGE("Failed to create bzip2Adapter");
216 return nullptr;
217 }
218 bzip2Adapter->Open();
219 return bzip2Adapter;
220 }
221
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)222 int32_t BlocksBufferDiff::WritePatchHeader(int64_t controlSize,
223 int64_t diffDataSize, int64_t newSize, size_t &headerLen)
224 {
225 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t) + sizeof(int64_t) + sizeof(int64_t);
226 if (patchData_.size() <= headerLen + offset_) {
227 PATCH_LOGE("Invalid patch size");
228 return -1;
229 }
230
231 int32_t ret = memcpy_s(patchData_.data() + offset_, patchData_.size(), BSDIFF_MAGIC,
232 std::char_traits<char>::length(BSDIFF_MAGIC));
233 if (ret != 0) {
234 PATCH_LOGE("Failed to copy magic");
235 return ret;
236 }
237 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC);
238 BlockBuffer data = {patchData_.data() + offset_ + headerLen, patchData_.size()};
239 WriteLE64(data, controlSize);
240 headerLen += sizeof(int64_t);
241 BlockBuffer diffData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
242 WriteLE64(diffData, diffDataSize);
243 headerLen += sizeof(int64_t);
244 BlockBuffer newData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
245 WriteLE64(newData, newSize);
246 headerLen += sizeof(int64_t);
247 return 0;
248 }
249
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)250 int32_t BlocksStreamDiff::WritePatchHeader(int64_t controlSize,
251 int64_t diffDataSize, int64_t newSize, size_t &headerLen)
252 {
253 PATCH_DEBUG("WritePatchHeader %zu", static_cast<size_t>(stream_.tellp()));
254 #ifdef __aarch64__
255 if (offset_ > static_cast<size_t>(numeric_limits<std::fstream::off_type>::max())) {
256 PATCH_LOGE("offset_ error");
257 return -1;
258 }
259 #endif
260 stream_.seekp(offset_, std::ios::beg);
261 stream_.write(BSDIFF_MAGIC, std::char_traits<char>::length(BSDIFF_MAGIC));
262 PkgBuffer buffer(sizeof(int64_t));
263 WriteLE64(buffer, controlSize);
264 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
265 WriteLE64(buffer, diffDataSize);
266 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
267 WriteLE64(buffer, newSize);
268 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
269 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t) + sizeof(int64_t) + sizeof(int64_t);
270 stream_.seekp(0, std::ios::end);
271 return 0;
272 }
273
ComputeOldScore(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & oldScore,int64_t & matchLen)274 void BlocksDiff::ComputeOldScore(const BlockBuffer &newInfo,
275 const BlockBuffer &oldInfo, int64_t &oldScore, int64_t &matchLen)
276 {
277 int64_t newSize = static_cast<int64_t>(newInfo.length);
278 for (int64_t begin = currentOffset_ += matchLen; currentOffset_ < newSize; currentOffset_++) {
279 BlockBuffer newBuff = {newInfo.buffer + currentOffset_, newInfo.length - currentOffset_};
280 matchLen = suffixArray_->Search(newBuff, { oldInfo.buffer, oldInfo.length }, 0, oldInfo.length, matchPos_);
281 for (; begin < currentOffset_ + matchLen; begin++) {
282 if ((begin + lastOffset_ < static_cast<int64_t>(oldInfo.length))
283 && (oldInfo.buffer[begin + lastOffset_] == newInfo.buffer[begin])) {
284 oldScore++;
285 }
286 }
287 if (((matchLen == oldScore) && (matchLen != 0)) || (matchLen > (oldScore + BLOCK_SCORE))) {
288 break;
289 }
290 if ((currentOffset_ + lastOffset_ < static_cast<int64_t>(oldInfo.length)) &&
291 (oldInfo.buffer[currentOffset_ + lastOffset_] == newInfo.buffer[currentOffset_])) {
292 oldScore--;
293 }
294 }
295 }
296
ComputeLength(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & lengthFront,int64_t & lengthBack)297 void BlocksDiff::ComputeLength(const BlockBuffer &newInfo,
298 const BlockBuffer &oldInfo, int64_t &lengthFront, int64_t &lengthBack)
299 {
300 lengthFront = 0;
301 lengthBack = 0;
302 int64_t i = 0;
303 int64_t s = 0;
304 int64_t tmp = 0;
305 while (((lastScan_ + i) < currentOffset_) && ((lastPos_ + i) < static_cast<int64_t>(oldInfo.length))) {
306 if (oldInfo.buffer[lastPos_ + i] == newInfo.buffer[lastScan_ + i]) {
307 s++;
308 }
309 i++;
310 if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthFront)) {
311 tmp = s;
312 lengthFront = i;
313 }
314 }
315 s = 0;
316 tmp = 0;
317 if (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
318 for (i = 1; (currentOffset_ >= lastScan_ + i) && (matchPos_ >= i); i++) {
319 if (oldInfo.buffer[matchPos_ - i] == newInfo.buffer[currentOffset_ - i]) {
320 s++;
321 }
322 if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthBack)) {
323 tmp = s;
324 lengthBack = i;
325 }
326 }
327 }
328
329 if (lastScan_ + lengthFront > currentOffset_ - lengthBack) {
330 int64_t lens = 0;
331 int64_t overlap = (lastScan_ + lengthFront) - (currentOffset_ - lengthBack);
332 s = 0;
333 tmp = 0;
334 for (i = 0; i < overlap; i++) {
335 if (newInfo.buffer[lastScan_ + lengthFront - overlap + i] ==
336 oldInfo.buffer[lastPos_ + lengthFront - overlap + i]) {
337 s++;
338 }
339 if (newInfo.buffer[currentOffset_ - lengthBack + i] == oldInfo.buffer[matchPos_ - lengthBack + i]) {
340 s--;
341 }
342 if (s > tmp) {
343 tmp = s;
344 lens = i + 1;
345 }
346 }
347 lengthFront += lens - overlap;
348 lengthBack -= lens;
349 }
350 }
351
GetCtrlDatas(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<ControlData> & controlDatas)352 int32_t BlocksDiff::GetCtrlDatas(const BlockBuffer &newInfo,
353 const BlockBuffer &oldInfo, std::vector<ControlData> &controlDatas)
354 {
355 int64_t matchLen = 0;
356 while (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
357 int64_t oldScore = 0;
358 int64_t lenFront = 0;
359 int64_t lenBack = 0;
360 ComputeOldScore(newInfo, oldInfo, oldScore, matchLen);
361 if ((matchLen == oldScore) && (currentOffset_ != static_cast<int64_t>(newInfo.length))) {
362 continue;
363 }
364 ComputeLength(newInfo, oldInfo, lenFront, lenBack);
365
366 // save ctrl data
367 ControlData ctrlData;
368 ctrlData.diffLength = lenFront;
369 ctrlData.extraLength = (currentOffset_ - lenBack) - (lastScan_ + lenFront);
370 ctrlData.offsetIncrement = (matchPos_ - lenBack) - (lastPos_ + lenFront);
371 ctrlData.diffNewStart = &newInfo.buffer[lastScan_];
372 ctrlData.diffOldStart = &oldInfo.buffer[lastPos_];
373 ctrlData.extraNewStart = &newInfo.buffer[lastScan_ + lenFront];
374 controlDatas.push_back(ctrlData);
375 lastScan_ = currentOffset_ - lenBack;
376 lastPos_ = matchPos_ - lenBack;
377 lastOffset_ = matchPos_ - currentOffset_;
378 }
379 return 0;
380 }
381
WriteControlData(const std::vector<ControlData> controlDatas,size_t & patchSize)382 int32_t BlocksDiff::WriteControlData(const std::vector<ControlData> controlDatas, size_t &patchSize)
383 {
384 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
385 if (bzip2Adapter == nullptr) {
386 PATCH_LOGE("Failed to create bzip2Adapter");
387 return -1;
388 }
389 bzip2Adapter->Open();
390 int32_t ret = 0;
391 uint8_t buffer[sizeof(int64_t)] = {0};
392 BlockBuffer srcData = {buffer, sizeof(int64_t)};
393 PATCH_DEBUG("WriteControlData patchSize %zu controlDatas %zu", patchSize, controlDatas.size());
394 std::vector<int64_t> data;
395 ON_SCOPE_EXIT(closeBzip2Adapter) {
396 bzip2Adapter->Close();
397 };
398 for (size_t i = 0; i < controlDatas.size(); i++) {
399 WriteLE64(srcData, controlDatas[i].diffLength);
400 ret = bzip2Adapter->WriteData(srcData);
401 if (ret != 0) {
402 PATCH_LOGE("Failed to write data");
403 return ret;
404 }
405 WriteLE64(srcData, controlDatas[i].extraLength);
406 ret = bzip2Adapter->WriteData(srcData);
407 if (ret != 0) {
408 PATCH_LOGE("Failed to write data");
409 return ret;
410 }
411 WriteLE64(srcData, controlDatas[i].offsetIncrement);
412 ret = bzip2Adapter->WriteData(srcData);
413 if (ret != 0) {
414 PATCH_LOGE("WriteControlData : Failed to write data");
415 return ret;
416 }
417 }
418 size_t dataSize = 0;
419 ret = bzip2Adapter->FlushData(dataSize);
420 if (ret != 0) {
421 PATCH_LOGE("Failed to FlushData %d", ret);
422 return ret;
423 }
424 patchSize += dataSize;
425 PATCH_DEBUG("WriteControlData exit patchSize %zu", patchSize);
426 return 0;
427 }
428
WriteDiffData(const std::vector<ControlData> controlDatas,size_t & patchSize)429 int32_t BlocksDiff::WriteDiffData(const std::vector<ControlData> controlDatas, size_t &patchSize)
430 {
431 PATCH_DEBUG("WriteDiffData patchSize %zu", patchSize);
432 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
433 if (bzip2Adapter == nullptr) {
434 PATCH_LOGE("Failed to create bzip2Adapter");
435 return -1;
436 }
437 bzip2Adapter->Open();
438 ON_SCOPE_EXIT(closeBzip2Adapter) {
439 bzip2Adapter->Close();
440 };
441
442 std::vector<uint8_t> diffData(IGMDIFF_LIMIT_UNIT, 0);
443 int32_t ret = 0;
444 for (size_t i = 0; i < controlDatas.size(); i++) {
445 if (controlDatas[i].diffLength <= 0) {
446 continue;
447 }
448
449 int64_t offset = 0;
450 while (offset < controlDatas[i].diffLength) {
451 int64_t cpyLen = static_cast<int64_t>(IGMDIFF_LIMIT_UNIT);
452 cpyLen = (offset + cpyLen > controlDatas[i].diffLength) ? (controlDatas[i].diffLength - offset) : cpyLen;
453 for (int64_t k = 0; k < cpyLen; k++) {
454 diffData[k] = controlDatas[i].diffNewStart[offset + k] - controlDatas[i].diffOldStart[offset + k];
455 }
456
457 BlockBuffer srcData = {reinterpret_cast<uint8_t*>(diffData.data()), static_cast<size_t>(cpyLen)};
458 ret = bzip2Adapter->WriteData(srcData);
459 if (ret != 0) {
460 PATCH_LOGE("Failed to write data");
461 return ret;
462 }
463 offset += cpyLen;
464 }
465 }
466 size_t dataSize = 0;
467 ret = bzip2Adapter->FlushData(dataSize);
468 if (ret != 0) {
469 PATCH_LOGE("Failed to FlushData %d", ret);
470 return ret;
471 }
472 patchSize += dataSize;
473 PATCH_DEBUG("WriteDiffData exit patchSize %zu dataSize %zu ", patchSize, dataSize);
474 return 0;
475 }
476
WriteExtraData(const std::vector<ControlData> controlDatas,size_t & patchSize)477 int32_t BlocksDiff::WriteExtraData(const std::vector<ControlData> controlDatas, size_t &patchSize)
478 {
479 PATCH_DEBUG("WriteExtraData patchSize %zu ", patchSize);
480 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
481 if (bzip2Adapter == nullptr) {
482 PATCH_LOGE("Failed to create bzip2Adapter");
483 return -1;
484 }
485 bzip2Adapter->Open();
486 ON_SCOPE_EXIT(closeBzip2Adapter) {
487 bzip2Adapter->Close();
488 };
489 int32_t ret = 0;
490 for (size_t i = 0; i < controlDatas.size(); i++) {
491 if (controlDatas[i].extraLength <= 0) {
492 continue;
493 }
494 BlockBuffer srcData = {controlDatas[i].extraNewStart, static_cast<size_t>(controlDatas[i].extraLength)};
495 ret = bzip2Adapter->WriteData(srcData);
496 if (ret != 0) {
497 PATCH_LOGE("WriteExtraData : Failed to write data");
498 return ret;
499 }
500 }
501 size_t dataSize = 0;
502 ret = bzip2Adapter->FlushData(dataSize);
503 if (ret != 0) {
504 PATCH_LOGE("Failed to FlushData %d", ret);
505 return ret;
506 }
507 patchSize += dataSize;
508 PATCH_DEBUG("WriteExtraData exit patchSize %zu", patchSize);
509 return 0;
510 }
511
512 template<class DataType>
Init(const BlockBuffer & oldInfo)513 void SuffixArray<DataType>::Init(const BlockBuffer &oldInfo)
514 {
515 std::vector<DataType> suffixArrayTemp;
516 std::vector<DataType> buckets;
517 InitBuckets(oldInfo, buckets, suffixArrayTemp);
518 DataType i = 0;
519 DataType h = 0;
520 DataType len = 0;
521 for (h = 1; suffixArray_[0] != -(static_cast<DataType>(oldInfo.length) + 1); h += h) {
522 len = 0;
523 for (i = 0; i < (static_cast<DataType>(oldInfo.length) + 1);) {
524 if (suffixArray_[i] < 0) {
525 len -= suffixArray_[i];
526 i -= suffixArray_[i];
527 } else {
528 suffixArray_[i - len] = len != 0 ? -len : suffixArray_[i - len];
529 len = suffixArrayTemp[suffixArray_[i]] + 1 - i;
530 Split(suffixArrayTemp, i, len, h);
531 i += len;
532 len = 0;
533 }
534 }
535 if (len != 0) {
536 suffixArray_[i - len] = -len;
537 }
538 }
539
540 for (i = 0; i < static_cast<DataType>(oldInfo.length) + 1; i++) {
541 suffixArray_[suffixArrayTemp[i]] = i;
542 }
543
544 PATCH_DEBUG("SuffixArray::Init %d finish", static_cast<int>(oldInfo.length));
545 }
546
547 template<class DataType>
SplitForLess(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)548 void SuffixArray<DataType>::SplitForLess(std::vector<DataType> &suffixArrayTemp,
549 DataType start, DataType len, DataType h)
550 {
551 DataType j = 0;
552 for (DataType k = start; k < start + len; k += j) {
553 j = 1;
554 DataType x = suffixArrayTemp[suffixArray_[k] + h];
555 for (DataType i = 1; k + i < start + len; i++) {
556 if (suffixArrayTemp[suffixArray_[k + i] + h] < x) {
557 x = suffixArrayTemp[suffixArray_[k + i] + h];
558 j = 0;
559 }
560 if (suffixArrayTemp[suffixArray_[k + i] + h] == x) {
561 SWAP(suffixArray_[k + j], suffixArray_[k + i]);
562 j++;
563 }
564 }
565 for (DataType i = 0; i < j; i++) {
566 suffixArrayTemp[suffixArray_[k + i]] = k + j - 1;
567 }
568 if (j == 1) {
569 suffixArray_[k] = -1;
570 }
571 }
572 }
573
574 template<class DataType>
Split(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)575 void SuffixArray<DataType>::Split(std::vector<DataType> &suffixArrayTemp, DataType start, DataType len, DataType h)
576 {
577 if (len < MIN_LENGTH) {
578 SplitForLess(suffixArrayTemp, start, len, h);
579 return;
580 }
581
582 DataType x = suffixArrayTemp[suffixArray_[start + len / MULTIPLE_TWO] + h];
583 DataType jj = 0;
584 DataType kk = 0;
585 for (DataType i = start; i < (start + len); i++) {
586 jj = (suffixArrayTemp[suffixArray_[i] + h] < x) ? (jj + 1) : jj;
587 kk = (suffixArrayTemp[suffixArray_[i] + h] == x) ? (kk + 1) : kk;
588 }
589 jj += start;
590 kk += jj;
591 DataType i = start;
592 DataType j = 0;
593 DataType k = 0;
594 while (i < jj) {
595 if (suffixArrayTemp[suffixArray_[i] + h] < x) {
596 i++;
597 } else if (suffixArrayTemp[suffixArray_[i] + h] == x) {
598 SWAP(suffixArray_[i], suffixArray_[jj + j]);
599 j++;
600 } else {
601 SWAP(suffixArray_[i], suffixArray_[kk + k]);
602 k++;
603 }
604 }
605 while (jj + j < kk) {
606 if (suffixArrayTemp[suffixArray_[jj + j] + h] == x) {
607 j++;
608 } else {
609 SWAP(suffixArray_[jj + j], suffixArray_[kk + k]);
610 k++;
611 }
612 }
613 if (jj > start) {
614 Split(suffixArrayTemp, start, jj - start, h);
615 }
616
617 for (i = 0; i < kk - jj; i++) {
618 suffixArrayTemp[suffixArray_[jj + i]] = kk - 1;
619 }
620 if (jj == kk - 1) {
621 suffixArray_[jj] = -1;
622 }
623 if (start + len > kk) {
624 Split(suffixArrayTemp, kk, start + len - kk, h);
625 }
626 }
627
628 template<class DataType>
MatchLength(const BlockBuffer & oldBuffer,const BlockBuffer & newBuffer) const629 int64_t SuffixArray<DataType>::MatchLength(const BlockBuffer &oldBuffer, const BlockBuffer &newBuffer) const
630 {
631 int64_t i = 0;
632 for (; (i < static_cast<int64_t>(oldBuffer.length)) && (i < static_cast<int64_t>(newBuffer.length)); i++) {
633 if (oldBuffer.buffer[i] != newBuffer.buffer[i]) {
634 break;
635 }
636 }
637 return i;
638 }
639
640 template<class DataType>
Search(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t start,int64_t end,int64_t & pos) const641 int64_t SuffixArray<DataType>::Search(const BlockBuffer &newInfo,
642 const BlockBuffer &oldInfo, int64_t start, int64_t end, int64_t &pos) const
643 {
644 int64_t x = 0;
645 int64_t y = 0;
646 if ((end - start) < MULTIPLE_TWO) {
647 BlockBuffer oldStart = {oldInfo.buffer + suffixArray_[start], oldInfo.length - suffixArray_[start]};
648 BlockBuffer oldEnd = {oldInfo.buffer + suffixArray_[end], oldInfo.length - suffixArray_[end]};
649 x = MatchLength(oldStart, newInfo);
650 y = MatchLength(oldEnd, newInfo);
651 if (x > y) {
652 pos = suffixArray_[start];
653 return x;
654 } else {
655 pos = suffixArray_[end];
656 return y;
657 }
658 }
659 x = start + (end - start) / MULTIPLE_TWO;
660 if (memcmp(oldInfo.buffer + suffixArray_[x],
661 newInfo.buffer, MIN(oldInfo.length - suffixArray_[x], newInfo.length)) < 0) {
662 return Search(newInfo, oldInfo, x, end, pos);
663 } else {
664 return Search(newInfo, oldInfo, start, x, pos);
665 }
666 }
667
668 template<class DataType>
InitBuckets(const BlockBuffer & oldInfo,std::vector<DataType> & buckets,std::vector<DataType> & suffixArrayTemp)669 void SuffixArray<DataType>::InitBuckets(const BlockBuffer &oldInfo,
670 std::vector<DataType> &buckets, std::vector<DataType> &suffixArrayTemp)
671 {
672 suffixArray_.resize(oldInfo.length + 1, 0);
673 suffixArrayTemp.resize(oldInfo.length + 1, 0);
674 buckets.resize(BUCKET_SIZE, 0);
675
676 for (size_t i = 0; i < oldInfo.length; i++) {
677 buckets[oldInfo.buffer[i]]++;
678 }
679 for (size_t i = 1; i < buckets.size(); i++) {
680 buckets[i] += buckets[i - 1];
681 }
682 for (size_t i = buckets.size() - 1; i > 0; i--) {
683 buckets[i] = buckets[i - 1];
684 }
685 buckets[0] = 0;
686
687 DataType i;
688 for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
689 suffixArray_[++buckets[oldInfo.buffer[i]]] = i;
690 }
691 suffixArray_[0] = oldInfo.length;
692
693 for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
694 suffixArrayTemp[i] = buckets[oldInfo.buffer[i]];
695 }
696 suffixArrayTemp[oldInfo.length] = 0;
697
698 for (i = 1; i < static_cast<DataType>(BUCKET_SIZE); i++) {
699 if (buckets[i] == buckets[i - 1] + 1) {
700 suffixArray_[buckets[i]] = -1;
701 }
702 }
703 suffixArray_[0] = -1;
704 }
705 } // namespace UpdatePatch
706