1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "blocks_diff.h"
17 #include "scope_guard.h"
18 #include <cstdio>
19 #include <iostream>
20 #include <vector>
21 #include "update_diff.h"
22 
23 using namespace Hpackage;
24 using namespace std;
25 using namespace Updater;
26 
27 namespace UpdatePatch {
28 #define SWAP(a, b) auto swapTmp = (a); (a) = (b); (b) = swapTmp
29 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
30 #define SET_BUFFER(y, buffer, index) \
31     (buffer)[index] = static_cast<uint8_t>((y) % 256); (y) -= (buffer)[index]; (y) = (y) / 256
32 
33 constexpr uint32_t BUCKET_SIZE = 256;
34 constexpr uint32_t MULTIPLE_TWO = 2;
35 constexpr int64_t BLOCK_SCORE = 8;
36 constexpr int64_t MIN_LENGTH = 16;
37 
WriteLE64(const BlockBuffer & buffer,int64_t value)38 static void WriteLE64(const BlockBuffer &buffer, int64_t value)
39 {
40     int32_t index = 0;
41     auto y = static_cast<uint64_t>((value < 0) ? -value : value);
42     SET_BUFFER(y, buffer.buffer, index);
43     index++;
44     SET_BUFFER(y, buffer.buffer, index);
45     index++;
46     SET_BUFFER(y, buffer.buffer, index);
47     index++;
48     SET_BUFFER(y, buffer.buffer, index);
49     index++;
50     SET_BUFFER(y, buffer.buffer, index);
51     index++;
52     SET_BUFFER(y, buffer.buffer, index);
53     index++;
54     SET_BUFFER(y, buffer.buffer, index);
55     index++;
56     SET_BUFFER(y, buffer.buffer, index);
57     if (value < 0) {
58         buffer.buffer[index] |= 0x80;
59     }
60 }
61 
MakePatch(const std::string & oldFileName,const std::string & newFileName,const std::string & patchFileName)62 int32_t BlocksDiff::MakePatch(const std::string &oldFileName, const std::string &newFileName,
63     const std::string &patchFileName)
64 {
65     PATCH_LOGI("BlocksDiff::MakePatch %s ", patchFileName.c_str());
66     std::fstream patchFile(patchFileName, std::ios::out | std::ios::trunc | std::ios::binary);
67     if (patchFile.fail()) {
68         PATCH_LOGE("Failed to open %s", strerror(errno));
69         return -1;
70     }
71 
72     MemMapInfo oldBuffer {};
73     MemMapInfo newBuffer {};
74     int32_t ret = PatchMapFile(oldFileName, oldBuffer);
75     int32_t ret1 = PatchMapFile(newFileName, newBuffer);
76     if (ret != 0 || ret1 != 0) {
77         PATCH_LOGE("Failed to open %s", newFileName.c_str());
78         return -1;
79     }
80     BlockBuffer newInfo = {newBuffer.memory, newBuffer.length};
81     BlockBuffer oldInfo = {oldBuffer.memory, oldBuffer.length};
82     std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(patchFile, 0);
83     size_t patchSize = 0;
84     ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
85     if (ret != PATCH_SUCCESS) {
86         PATCH_LOGE("Failed to generate patch");
87         return ret;
88     }
89     PATCH_DEBUG("MakePatch %zu", static_cast<size_t>(patchFile.tellp()));
90     patchFile.close();
91     PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
92     return ret;
93 }
94 
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<uint8_t> & patchData,size_t offset,size_t & patchSize)95 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
96     const BlockBuffer &oldInfo, std::vector<uint8_t> &patchData, size_t offset, size_t &patchSize)
97 {
98     if (patchData.empty()) {
99         patchData.resize(IGMDIFF_LIMIT_UNIT);
100     }
101     std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksBufferDiff>(patchData, offset);
102     int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
103     if (ret != PATCH_SUCCESS) {
104         PATCH_LOGE("Failed to generate patch");
105         return ret;
106     }
107     if (patchData.size() < patchSize) {
108         PATCH_LOGE("Failed to make block patch");
109         return -1;
110     }
111     patchData.resize(patchSize);
112     PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
113     return ret;
114 }
115 
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::fstream & patchFile,size_t & patchSize)116 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
117     const BlockBuffer &oldInfo, std::fstream &patchFile, size_t &patchSize)
118 {
119     std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(
120         patchFile, static_cast<size_t>(patchFile.tellp()));
121     int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
122     if (ret != PATCH_SUCCESS) {
123         PATCH_LOGE("Failed to generate patch");
124         return ret;
125     }
126     PATCH_LOGI("BlocksDiff::MakePatch success %zu patchFile %zu",
127         patchSize, static_cast<size_t>(patchFile.tellp()));
128     return ret;
129 }
130 
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,size_t & patchSize)131 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo, const BlockBuffer &oldInfo, size_t &patchSize)
132 {
133     if (suffixArray_ == nullptr) {
134         suffixArray_.reset(new SuffixArray<int32_t>());
135         if (suffixArray_ == nullptr) {
136             PATCH_LOGE("Failed to create SuffixArray");
137             return -1;
138         }
139         suffixArray_->Init(oldInfo);
140     }
141 
142     std::vector<ControlData> controlDatas;
143     int32_t ret = GetCtrlDatas(newInfo, oldInfo, controlDatas);
144     if (ret != 0) {
145         PATCH_LOGE("Failed to get control data");
146         return ret;
147     }
148 
149     return WritePatchData(controlDatas, newInfo, patchSize);
150 }
151 
WritePatchData(const std::vector<ControlData> & controlDatas,const BlockBuffer & newInfo,size_t & patchSize)152 int32_t BlocksDiff::WritePatchData(const std::vector<ControlData> &controlDatas,
153     const BlockBuffer &newInfo, size_t &patchSize)
154 {
155     patchSize = 0;
156     int32_t ret = WritePatchHeader(0, 0, 0, patchSize);
157     if (ret != 0) {
158         PATCH_LOGE("Failed to write patch header");
159         return ret;
160     }
161 
162     size_t controlSize = patchSize;
163     ret = WriteControlData(controlDatas, patchSize);
164     if (ret != BZ_OK) {
165         PATCH_LOGE("Failed to write control data");
166         return ret;
167     }
168     controlSize = patchSize - controlSize;
169 
170     // 写diff数据
171     size_t diffDataSize = patchSize;
172     ret = WriteDiffData(controlDatas, patchSize);
173     if (ret != BZ_OK) {
174         PATCH_LOGE("Failed to write diff data");
175         return ret;
176     }
177     diffDataSize = patchSize - diffDataSize;
178 
179     size_t extraDataSize = patchSize;
180     ret = WriteExtraData(controlDatas, patchSize);
181     if (ret != BZ_OK) {
182         PATCH_LOGE("Failed to write extra data");
183         return ret;
184     }
185     extraDataSize = patchSize - extraDataSize;
186 
187     // write real data
188     size_t headerLen = 0;
189     ret = WritePatchHeader(controlSize, diffDataSize, newInfo.length, headerLen);
190     if (ret != 0) {
191         PATCH_LOGE("Failed to write patch header");
192         return ret;
193     }
194     PATCH_DEBUG("MakePatch success patchSize:%zu controlSize:%zu diffDataSize:%zu, extraDataSize:%zu",
195         patchSize, controlSize, diffDataSize, extraDataSize);
196     return 0;
197 }
198 
CreateBZip2Adapter(size_t patchOffset)199 std::unique_ptr<DeflateAdapter> BlocksBufferDiff::CreateBZip2Adapter(size_t patchOffset)
200 {
201     std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZipBuffer2Adapter>(patchData_,
202         offset_ + patchOffset);
203     if (bzip2Adapter == nullptr) {
204         PATCH_LOGE("Failed to create bzip2Adapter");
205         return nullptr;
206     }
207     bzip2Adapter->Open();
208     return bzip2Adapter;
209 }
210 
CreateBZip2Adapter(size_t patchOffset)211 std::unique_ptr<DeflateAdapter> BlocksStreamDiff::CreateBZip2Adapter(size_t patchOffset)
212 {
213     std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZip2StreamAdapter>(stream_);
214     if (bzip2Adapter == nullptr) {
215         PATCH_LOGE("Failed to create bzip2Adapter");
216         return nullptr;
217     }
218     bzip2Adapter->Open();
219     return bzip2Adapter;
220 }
221 
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)222 int32_t BlocksBufferDiff::WritePatchHeader(int64_t controlSize,
223     int64_t diffDataSize, int64_t newSize, size_t &headerLen)
224 {
225     headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t) + sizeof(int64_t) + sizeof(int64_t);
226     if (patchData_.size() <= headerLen + offset_) {
227         PATCH_LOGE("Invalid patch size");
228         return -1;
229     }
230 
231     int32_t ret = memcpy_s(patchData_.data() + offset_, patchData_.size(), BSDIFF_MAGIC,
232         std::char_traits<char>::length(BSDIFF_MAGIC));
233     if (ret != 0) {
234         PATCH_LOGE("Failed to copy magic");
235         return ret;
236     }
237     headerLen = std::char_traits<char>::length(BSDIFF_MAGIC);
238     BlockBuffer data = {patchData_.data() + offset_ + headerLen, patchData_.size()};
239     WriteLE64(data, controlSize);
240     headerLen += sizeof(int64_t);
241     BlockBuffer diffData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
242     WriteLE64(diffData, diffDataSize);
243     headerLen += sizeof(int64_t);
244     BlockBuffer newData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
245     WriteLE64(newData, newSize);
246     headerLen += sizeof(int64_t);
247     return 0;
248 }
249 
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)250 int32_t BlocksStreamDiff::WritePatchHeader(int64_t controlSize,
251     int64_t diffDataSize, int64_t newSize, size_t &headerLen)
252 {
253     PATCH_DEBUG("WritePatchHeader %zu", static_cast<size_t>(stream_.tellp()));
254 #ifdef __aarch64__
255     if (offset_ > static_cast<size_t>(numeric_limits<std::fstream::off_type>::max())) {
256         PATCH_LOGE("offset_ error");
257         return -1;
258     }
259 #endif
260     stream_.seekp(offset_, std::ios::beg);
261     stream_.write(BSDIFF_MAGIC, std::char_traits<char>::length(BSDIFF_MAGIC));
262     PkgBuffer buffer(sizeof(int64_t));
263     WriteLE64(buffer, controlSize);
264     stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
265     WriteLE64(buffer, diffDataSize);
266     stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
267     WriteLE64(buffer, newSize);
268     stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
269     headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t)  + sizeof(int64_t)  + sizeof(int64_t);
270     stream_.seekp(0, std::ios::end);
271     return 0;
272 }
273 
ComputeOldScore(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & oldScore,int64_t & matchLen)274 void BlocksDiff::ComputeOldScore(const BlockBuffer &newInfo,
275     const BlockBuffer &oldInfo, int64_t &oldScore, int64_t &matchLen)
276 {
277     int64_t newSize = static_cast<int64_t>(newInfo.length);
278     for (int64_t begin = currentOffset_ += matchLen; currentOffset_ < newSize; currentOffset_++) {
279         BlockBuffer newBuff = {newInfo.buffer + currentOffset_, newInfo.length - currentOffset_};
280         matchLen = suffixArray_->Search(newBuff, { oldInfo.buffer, oldInfo.length }, 0, oldInfo.length, matchPos_);
281         for (; begin < currentOffset_ + matchLen; begin++) {
282             if ((begin + lastOffset_ < static_cast<int64_t>(oldInfo.length))
283                 && (oldInfo.buffer[begin + lastOffset_] == newInfo.buffer[begin])) {
284                 oldScore++;
285             }
286         }
287         if (((matchLen == oldScore) && (matchLen != 0)) || (matchLen > (oldScore + BLOCK_SCORE))) {
288             break;
289         }
290         if ((currentOffset_ + lastOffset_ < static_cast<int64_t>(oldInfo.length)) &&
291             (oldInfo.buffer[currentOffset_ + lastOffset_] == newInfo.buffer[currentOffset_])) {
292             oldScore--;
293         }
294     }
295 }
296 
ComputeLength(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & lengthFront,int64_t & lengthBack)297 void BlocksDiff::ComputeLength(const BlockBuffer &newInfo,
298     const BlockBuffer &oldInfo, int64_t &lengthFront, int64_t &lengthBack)
299 {
300     lengthFront = 0;
301     lengthBack = 0;
302     int64_t i = 0;
303     int64_t s = 0;
304     int64_t tmp = 0;
305     while (((lastScan_ + i) < currentOffset_) && ((lastPos_ + i) < static_cast<int64_t>(oldInfo.length))) {
306         if (oldInfo.buffer[lastPos_ + i] == newInfo.buffer[lastScan_ + i]) {
307             s++;
308         }
309         i++;
310         if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthFront)) {
311             tmp = s;
312             lengthFront = i;
313         }
314     }
315     s = 0;
316     tmp = 0;
317     if (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
318         for (i = 1; (currentOffset_ >= lastScan_ + i) && (matchPos_ >= i); i++) {
319             if (oldInfo.buffer[matchPos_ - i] == newInfo.buffer[currentOffset_ - i]) {
320                 s++;
321             }
322             if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthBack)) {
323                 tmp = s;
324                 lengthBack = i;
325             }
326         }
327     }
328 
329     if (lastScan_ + lengthFront > currentOffset_ - lengthBack) {
330         int64_t lens = 0;
331         int64_t overlap = (lastScan_ + lengthFront) - (currentOffset_ - lengthBack);
332         s = 0;
333         tmp = 0;
334         for (i = 0; i < overlap; i++) {
335             if (newInfo.buffer[lastScan_ + lengthFront - overlap + i] ==
336                 oldInfo.buffer[lastPos_ + lengthFront - overlap + i]) {
337                 s++;
338             }
339             if (newInfo.buffer[currentOffset_ - lengthBack + i] == oldInfo.buffer[matchPos_ - lengthBack + i]) {
340                 s--;
341             }
342             if (s > tmp) {
343                 tmp = s;
344                 lens = i + 1;
345             }
346         }
347         lengthFront += lens - overlap;
348         lengthBack -= lens;
349     }
350 }
351 
GetCtrlDatas(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<ControlData> & controlDatas)352 int32_t BlocksDiff::GetCtrlDatas(const BlockBuffer &newInfo,
353     const BlockBuffer &oldInfo, std::vector<ControlData> &controlDatas)
354 {
355     int64_t matchLen = 0;
356     while (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
357         int64_t oldScore = 0;
358         int64_t lenFront = 0;
359         int64_t lenBack = 0;
360         ComputeOldScore(newInfo, oldInfo, oldScore, matchLen);
361         if ((matchLen == oldScore) && (currentOffset_ != static_cast<int64_t>(newInfo.length))) {
362             continue;
363         }
364         ComputeLength(newInfo, oldInfo, lenFront, lenBack);
365 
366         // save ctrl data
367         ControlData ctrlData;
368         ctrlData.diffLength = lenFront;
369         ctrlData.extraLength = (currentOffset_ - lenBack) - (lastScan_ + lenFront);
370         ctrlData.offsetIncrement = (matchPos_ - lenBack) - (lastPos_ + lenFront);
371         ctrlData.diffNewStart = &newInfo.buffer[lastScan_];
372         ctrlData.diffOldStart = &oldInfo.buffer[lastPos_];
373         ctrlData.extraNewStart = &newInfo.buffer[lastScan_ + lenFront];
374         controlDatas.push_back(ctrlData);
375         lastScan_ = currentOffset_ - lenBack;
376         lastPos_ = matchPos_ - lenBack;
377         lastOffset_ = matchPos_ - currentOffset_;
378     }
379     return 0;
380 }
381 
WriteControlData(const std::vector<ControlData> controlDatas,size_t & patchSize)382 int32_t BlocksDiff::WriteControlData(const std::vector<ControlData> controlDatas, size_t &patchSize)
383 {
384     std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
385     if (bzip2Adapter == nullptr) {
386         PATCH_LOGE("Failed to create bzip2Adapter");
387         return -1;
388     }
389     bzip2Adapter->Open();
390     int32_t ret = 0;
391     uint8_t buffer[sizeof(int64_t)] = {0};
392     BlockBuffer srcData = {buffer, sizeof(int64_t)};
393     PATCH_DEBUG("WriteControlData patchSize %zu controlDatas %zu", patchSize, controlDatas.size());
394     std::vector<int64_t> data;
395     ON_SCOPE_EXIT(closeBzip2Adapter) {
396         bzip2Adapter->Close();
397     };
398     for (size_t i = 0; i < controlDatas.size(); i++) {
399         WriteLE64(srcData, controlDatas[i].diffLength);
400         ret = bzip2Adapter->WriteData(srcData);
401         if (ret != 0) {
402             PATCH_LOGE("Failed to write data");
403             return ret;
404         }
405         WriteLE64(srcData, controlDatas[i].extraLength);
406         ret = bzip2Adapter->WriteData(srcData);
407         if (ret != 0) {
408             PATCH_LOGE("Failed to write data");
409             return ret;
410         }
411         WriteLE64(srcData, controlDatas[i].offsetIncrement);
412         ret = bzip2Adapter->WriteData(srcData);
413         if (ret != 0) {
414             PATCH_LOGE("WriteControlData : Failed to write data");
415             return ret;
416         }
417     }
418     size_t dataSize = 0;
419     ret = bzip2Adapter->FlushData(dataSize);
420     if (ret != 0) {
421         PATCH_LOGE("Failed to FlushData %d", ret);
422         return ret;
423     }
424     patchSize += dataSize;
425     PATCH_DEBUG("WriteControlData exit patchSize %zu", patchSize);
426     return 0;
427 }
428 
WriteDiffData(const std::vector<ControlData> controlDatas,size_t & patchSize)429 int32_t BlocksDiff::WriteDiffData(const std::vector<ControlData> controlDatas, size_t &patchSize)
430 {
431     PATCH_DEBUG("WriteDiffData patchSize %zu", patchSize);
432     std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
433     if (bzip2Adapter == nullptr) {
434         PATCH_LOGE("Failed to create bzip2Adapter");
435         return -1;
436     }
437     bzip2Adapter->Open();
438     ON_SCOPE_EXIT(closeBzip2Adapter) {
439         bzip2Adapter->Close();
440     };
441 
442     std::vector<uint8_t> diffData(IGMDIFF_LIMIT_UNIT, 0);
443     int32_t ret = 0;
444     for (size_t i = 0; i < controlDatas.size(); i++) {
445         if (controlDatas[i].diffLength <= 0) {
446             continue;
447         }
448 
449         int64_t offset = 0;
450         while (offset < controlDatas[i].diffLength) {
451             int64_t cpyLen = static_cast<int64_t>(IGMDIFF_LIMIT_UNIT);
452             cpyLen = (offset + cpyLen > controlDatas[i].diffLength) ? (controlDatas[i].diffLength - offset) : cpyLen;
453             for (int64_t k = 0; k < cpyLen; k++) {
454                 diffData[k] = controlDatas[i].diffNewStart[offset + k] - controlDatas[i].diffOldStart[offset + k];
455             }
456 
457             BlockBuffer srcData = {reinterpret_cast<uint8_t*>(diffData.data()), static_cast<size_t>(cpyLen)};
458             ret = bzip2Adapter->WriteData(srcData);
459             if (ret != 0) {
460                 PATCH_LOGE("Failed to write data");
461                 return ret;
462             }
463             offset += cpyLen;
464         }
465     }
466     size_t dataSize = 0;
467     ret = bzip2Adapter->FlushData(dataSize);
468     if (ret != 0) {
469         PATCH_LOGE("Failed to FlushData %d", ret);
470         return ret;
471     }
472     patchSize += dataSize;
473     PATCH_DEBUG("WriteDiffData exit patchSize %zu dataSize %zu ", patchSize, dataSize);
474     return 0;
475 }
476 
WriteExtraData(const std::vector<ControlData> controlDatas,size_t & patchSize)477 int32_t BlocksDiff::WriteExtraData(const std::vector<ControlData> controlDatas, size_t &patchSize)
478 {
479     PATCH_DEBUG("WriteExtraData patchSize %zu ", patchSize);
480     std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
481     if (bzip2Adapter == nullptr) {
482         PATCH_LOGE("Failed to create bzip2Adapter");
483         return -1;
484     }
485     bzip2Adapter->Open();
486     ON_SCOPE_EXIT(closeBzip2Adapter) {
487         bzip2Adapter->Close();
488     };
489     int32_t ret = 0;
490     for (size_t i = 0; i < controlDatas.size(); i++) {
491         if (controlDatas[i].extraLength <= 0) {
492             continue;
493         }
494         BlockBuffer srcData = {controlDatas[i].extraNewStart, static_cast<size_t>(controlDatas[i].extraLength)};
495         ret = bzip2Adapter->WriteData(srcData);
496         if (ret != 0) {
497             PATCH_LOGE("WriteExtraData : Failed to write data");
498             return ret;
499         }
500     }
501     size_t dataSize = 0;
502     ret = bzip2Adapter->FlushData(dataSize);
503     if (ret != 0) {
504         PATCH_LOGE("Failed to FlushData %d", ret);
505         return ret;
506     }
507     patchSize += dataSize;
508     PATCH_DEBUG("WriteExtraData exit patchSize %zu", patchSize);
509     return 0;
510 }
511 
512 template<class DataType>
Init(const BlockBuffer & oldInfo)513 void SuffixArray<DataType>::Init(const BlockBuffer &oldInfo)
514 {
515     std::vector<DataType> suffixArrayTemp;
516     std::vector<DataType> buckets;
517     InitBuckets(oldInfo, buckets, suffixArrayTemp);
518     DataType i = 0;
519     DataType h = 0;
520     DataType len = 0;
521     for (h = 1; suffixArray_[0] != -(static_cast<DataType>(oldInfo.length) + 1); h += h) {
522         len = 0;
523         for (i = 0; i < (static_cast<DataType>(oldInfo.length) + 1);) {
524             if (suffixArray_[i] < 0) {
525                 len -= suffixArray_[i];
526                 i -= suffixArray_[i];
527             } else {
528                 suffixArray_[i - len] = len != 0 ? -len : suffixArray_[i - len];
529                 len = suffixArrayTemp[suffixArray_[i]] + 1 - i;
530                 Split(suffixArrayTemp, i, len, h);
531                 i += len;
532                 len = 0;
533             }
534         }
535         if (len != 0) {
536             suffixArray_[i - len] = -len;
537         }
538     }
539 
540     for (i = 0; i < static_cast<DataType>(oldInfo.length) + 1; i++) {
541         suffixArray_[suffixArrayTemp[i]] = i;
542     }
543 
544     PATCH_DEBUG("SuffixArray::Init %d finish", static_cast<int>(oldInfo.length));
545 }
546 
547 template<class DataType>
SplitForLess(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)548 void SuffixArray<DataType>::SplitForLess(std::vector<DataType> &suffixArrayTemp,
549     DataType start, DataType len, DataType h)
550 {
551     DataType j = 0;
552     for (DataType k = start; k < start + len; k += j) {
553         j = 1;
554         DataType x = suffixArrayTemp[suffixArray_[k] + h];
555         for (DataType i = 1; k + i < start + len; i++) {
556             if (suffixArrayTemp[suffixArray_[k + i] + h] < x) {
557                 x = suffixArrayTemp[suffixArray_[k + i] + h];
558                 j = 0;
559             }
560             if (suffixArrayTemp[suffixArray_[k + i] + h] == x) {
561                 SWAP(suffixArray_[k + j], suffixArray_[k + i]);
562                 j++;
563             }
564         }
565         for (DataType i = 0; i < j; i++) {
566             suffixArrayTemp[suffixArray_[k + i]] = k + j - 1;
567         }
568         if (j == 1) {
569             suffixArray_[k] = -1;
570         }
571     }
572 }
573 
574 template<class DataType>
Split(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)575 void SuffixArray<DataType>::Split(std::vector<DataType> &suffixArrayTemp, DataType start, DataType len, DataType h)
576 {
577     if (len < MIN_LENGTH) {
578         SplitForLess(suffixArrayTemp, start, len, h);
579         return;
580     }
581 
582     DataType x = suffixArrayTemp[suffixArray_[start + len / MULTIPLE_TWO] + h];
583     DataType jj = 0;
584     DataType kk = 0;
585     for (DataType i = start; i < (start + len); i++) {
586         jj = (suffixArrayTemp[suffixArray_[i] + h] < x) ? (jj + 1) : jj;
587         kk = (suffixArrayTemp[suffixArray_[i] + h] == x) ? (kk + 1) : kk;
588     }
589     jj += start;
590     kk += jj;
591     DataType i = start;
592     DataType j = 0;
593     DataType k = 0;
594     while (i < jj) {
595         if (suffixArrayTemp[suffixArray_[i] + h] < x) {
596             i++;
597         } else if (suffixArrayTemp[suffixArray_[i] + h] == x) {
598             SWAP(suffixArray_[i], suffixArray_[jj + j]);
599             j++;
600         } else {
601             SWAP(suffixArray_[i], suffixArray_[kk + k]);
602             k++;
603         }
604     }
605     while (jj + j < kk) {
606         if (suffixArrayTemp[suffixArray_[jj + j] + h] == x) {
607             j++;
608         } else {
609             SWAP(suffixArray_[jj + j], suffixArray_[kk + k]);
610             k++;
611         }
612     }
613     if (jj > start) {
614         Split(suffixArrayTemp, start, jj - start, h);
615     }
616 
617     for (i = 0; i < kk - jj; i++) {
618         suffixArrayTemp[suffixArray_[jj + i]] = kk - 1;
619     }
620     if (jj == kk - 1) {
621         suffixArray_[jj] = -1;
622     }
623     if (start + len > kk) {
624         Split(suffixArrayTemp, kk, start + len - kk, h);
625     }
626 }
627 
628 template<class DataType>
MatchLength(const BlockBuffer & oldBuffer,const BlockBuffer & newBuffer) const629 int64_t SuffixArray<DataType>::MatchLength(const BlockBuffer &oldBuffer, const BlockBuffer &newBuffer) const
630 {
631     int64_t i = 0;
632     for (; (i < static_cast<int64_t>(oldBuffer.length)) && (i < static_cast<int64_t>(newBuffer.length)); i++) {
633         if (oldBuffer.buffer[i] != newBuffer.buffer[i]) {
634             break;
635         }
636     }
637     return i;
638 }
639 
640 template<class DataType>
Search(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t start,int64_t end,int64_t & pos) const641 int64_t SuffixArray<DataType>::Search(const BlockBuffer &newInfo,
642     const BlockBuffer &oldInfo, int64_t start, int64_t end, int64_t &pos) const
643 {
644     int64_t x = 0;
645     int64_t y = 0;
646     if ((end - start) < MULTIPLE_TWO) {
647         BlockBuffer oldStart = {oldInfo.buffer + suffixArray_[start], oldInfo.length - suffixArray_[start]};
648         BlockBuffer oldEnd = {oldInfo.buffer + suffixArray_[end], oldInfo.length - suffixArray_[end]};
649         x = MatchLength(oldStart, newInfo);
650         y = MatchLength(oldEnd, newInfo);
651         if (x > y) {
652             pos = suffixArray_[start];
653             return x;
654         } else {
655             pos = suffixArray_[end];
656             return y;
657         }
658     }
659     x = start + (end - start) / MULTIPLE_TWO;
660     if (memcmp(oldInfo.buffer + suffixArray_[x],
661         newInfo.buffer, MIN(oldInfo.length - suffixArray_[x], newInfo.length)) < 0) {
662         return Search(newInfo, oldInfo, x, end, pos);
663     } else {
664         return Search(newInfo, oldInfo, start, x, pos);
665     }
666 }
667 
668 template<class DataType>
InitBuckets(const BlockBuffer & oldInfo,std::vector<DataType> & buckets,std::vector<DataType> & suffixArrayTemp)669 void SuffixArray<DataType>::InitBuckets(const BlockBuffer &oldInfo,
670     std::vector<DataType> &buckets, std::vector<DataType> &suffixArrayTemp)
671 {
672     suffixArray_.resize(oldInfo.length + 1, 0);
673     suffixArrayTemp.resize(oldInfo.length + 1, 0);
674     buckets.resize(BUCKET_SIZE, 0);
675 
676     for (size_t i = 0; i < oldInfo.length; i++) {
677         buckets[oldInfo.buffer[i]]++;
678     }
679     for (size_t i = 1; i < buckets.size(); i++) {
680         buckets[i] += buckets[i - 1];
681     }
682     for (size_t i = buckets.size() - 1; i > 0; i--) {
683         buckets[i] = buckets[i - 1];
684     }
685     buckets[0] = 0;
686 
687     DataType i;
688     for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
689         suffixArray_[++buckets[oldInfo.buffer[i]]] = i;
690     }
691     suffixArray_[0] = oldInfo.length;
692 
693     for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
694         suffixArrayTemp[i] = buckets[oldInfo.buffer[i]];
695     }
696     suffixArrayTemp[oldInfo.length] = 0;
697 
698     for (i = 1; i < static_cast<DataType>(BUCKET_SIZE); i++) {
699         if (buckets[i] == buckets[i - 1] + 1) {
700             suffixArray_[buckets[i]] = -1;
701         }
702     }
703     suffixArray_[0] = -1;
704 }
705 } // namespace UpdatePatch
706