1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef OHOS_ABILITY_BASE_ZIP_FILE_H
17 #define OHOS_ABILITY_BASE_ZIP_FILE_H
18 
19 #include <memory>
20 #include <mutex>
21 #include <set>
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "file_mapper.h"
27 #include "unzip.h"
28 
29 namespace OHOS {
30 namespace AbilityBase {
31 class ZipFileReader;
32 struct CentralDirEntry;
33 struct ZipEntry;
34 using ZipPos = ZPOS64_T;
35 using ZipEntryMap = std::unordered_map<std::string, ZipEntry>;
36 using BytePtr = Byte *;
37 
38 // Local file header: descript in APPNOTE-6.3.4
39 //    local file header signature     4 bytes  (0x04034b50)
40 //    version needed to extract       2 bytes
41 //    general purpose bit flag        2 bytes
42 //    compression method              2 bytes  10
43 //    last mod file time              2 bytes
44 //    last mod file date              2 bytes
45 //    crc-32                          4 bytes
46 //    compressed size                 4 bytes  22
47 //    uncompressed size               4 bytes
48 //    file name length                2 bytes
49 //    extra field length              2 bytes  30
50 struct __attribute__((packed)) LocalHeader {
51     uint32_t signature = 0;
52     uint16_t versionNeeded = 0;
53     uint16_t flags = 0;
54     uint16_t compressionMethod = 0;
55     uint16_t modifiedTime = 0;
56     uint16_t modifiedDate = 0;
57     uint32_t crc = 0;
58     uint32_t compressedSize = 0;
59     uint32_t uncompressedSize = 0;
60     uint16_t nameSize = 0;
61     uint16_t extraSize = 0;
62 };
63 
64 // central file header
65 //    Central File header:
66 //    central file header signature   4 bytes  (0x02014b50)
67 //    version made by                 2 bytes
68 //    version needed to extract       2 bytes
69 //    general purpose bit flag        2 bytes  10
70 //    compression method              2 bytes
71 //    last mod file time              2 bytes
72 //    last mod file date              2 bytes
73 //    crc-32                          4 bytes  20
74 //    compressed size                 4 bytes
75 //    uncompressed size               4 bytes
76 //    file name length                2 bytes  30
77 //    extra field length              2 bytes
78 //    file comment length             2 bytes
79 //    disk number start               2 bytes
80 //    internal file attributes        2 bytes
81 //    external file attributes        4 bytes
82 //    relative offset of local header 4 bytes 46byte
83 struct __attribute__((packed)) CentralDirEntry {
84     uint32_t signature = 0;
85     uint16_t versionMade = 0;
86     uint16_t versionNeeded = 0;
87     uint16_t flags = 0;  // general purpose bit flag
88     uint16_t compressionMethod = 0;
89     uint16_t modifiedTime = 0;
90     uint16_t modifiedDate = 0;
91     uint32_t crc = 0;
92     uint32_t compressedSize = 0;
93     uint32_t uncompressedSize = 0;
94     uint16_t nameSize = 0;
95     uint16_t extraSize = 0;
96     uint16_t commentSize = 0;
97     uint16_t diskNumStart = 0;
98     uint16_t internalAttr = 0;
99     uint32_t externalAttr = 0;
100     uint32_t localHeaderOffset = 0;
101 };
102 
103 // end of central directory packed structure
104 //    end of central dir signature    4 bytes  (0x06054b50)
105 //    number of this disk             2 bytes
106 //    number of the disk with the
107 //    start of the central directory  2 bytes
108 //    total number of entries in the
109 //    central directory on this disk  2 bytes
110 //    total number of entries in
111 //    the central directory           2 bytes
112 //    size of the central directory   4 bytes
113 //    offset of start of central
114 //    directory with respect to
115 //    the starting disk number        4 bytes
116 //    .ZIP file comment length        2 bytes
117 struct __attribute__((packed)) EndDir {
118     uint32_t signature = 0;
119     uint16_t numDisk = 0;
120     uint16_t startDiskOfCentralDir = 0;
121     uint16_t totalEntriesInThisDisk = 0;
122     uint16_t totalEntries = 0;
123     uint32_t sizeOfCentralDir = 0;
124     uint32_t offset = 0;
125     uint16_t commentLen = 0;
126 };
127 
128 // Data descriptor:
129 //    data descriptor signature       4 bytes  (0x06054b50)
130 //    crc-32                          4 bytes
131 //    compressed size                 4 bytes
132 //    uncompressed size               4 bytes
133 // This descriptor MUST exist if bit 3 of the general purpose bit flag is set (see below).
134 // It is byte aligned and immediately follows the last byte of compressed data.
135 struct __attribute__((packed)) DataDesc {
136     uint32_t signature = 0;
137     uint32_t crc = 0;
138     uint32_t compressedSize = 0;
139     uint32_t uncompressedSize = 0;
140 };
141 
142 struct ZipEntry {
143     ZipEntry() = default;
144     explicit ZipEntry(const CentralDirEntry &centralEntry);
145     ~ZipEntry() = default;  // for CodeDEX warning
146 
147     uint16_t compressionMethod = 0;
148     uint32_t uncompressedSize = 0;
149     uint32_t compressedSize = 0;
150     uint32_t localHeaderOffset = 0;
151     uint32_t crc = 0;
152     uint16_t flags = 0;
153     uint16_t modifiedTime = 0;
154     uint16_t modifiedDate = 0;
155     std::string fileName;
156 };
157 
158 struct DirTreeNode {
159     bool isDir = false;
160     std::unordered_map<std::string, std::shared_ptr<DirTreeNode>> children;
161 };
162 
163 enum class CacheMode: uint32_t {
164     CACHE_NONE = 0,
165     CACHE_CASE,  // This mode depends on file amount in hap.
166     CACHE_ALL
167 };
168 
169 // zip file extract class for bundle format.
170 class ZipFile {
171 public:
172     explicit ZipFile(const std::string &pathName);
173     ~ZipFile();
174     /**
175      * @brief Open zip file.
176      * @return Returns true if the zip file is successfully opened; returns false otherwise.
177      */
178     bool Open();
179     /**
180      * @brief Close zip file.
181      */
182     void Close();
183     /**
184      * @brief Set this zip content start offset and length in the zip file form pathName.
185      * @param start Indicates the zip content location start position.
186      * @param length Indicates the zip content length.
187      */
188     void SetContentLocation(const ZipPos start, const size_t length);
189     /**
190      * @brief Get all entries in the zip file.
191      * @param start Indicates the zip content location start position.
192      * @param length Indicates the zip content length.
193      * @return Returns the ZipEntryMap object cotain all entries.
194      */
195     const ZipEntryMap &GetAllEntries() const;
196     /**
197      * @brief Has entry by name.
198      * @param entryName Indicates the entry name.
199      * @return Returns true if the ZipEntry is successfully finded; returns false otherwise.
200      */
201     bool HasEntry(const std::string &entryName) const;
202 
203     bool IsDirExist(const std::string &dir);
204     void GetAllFileList(const std::string &srcPath, std::vector<std::string> &assetList);
205     void GetChildNames(const std::string &srcPath, std::set<std::string> &fileSet);
206 
207     /**
208      * @brief Get entry by name.
209      * @param entryName Indicates the entry name.
210      * @param resultEntry Indicates the obtained ZipEntry object.
211      * @return Returns true if the ZipEntry is successfully finded; returns false otherwise.
212      */
213     bool GetEntry(const std::string &entryName, ZipEntry &resultEntry) const;
214     /**
215      * @brief Get data relative offset for file.
216      * @param file Indicates the entry name.
217      * @param offset Indicates the obtained offset.
218      * @param length Indicates the length.
219      * @return Returns true if this function is successfully called; returns false otherwise.
220      */
221     bool GetDataOffsetRelative(const std::string &file, ZipPos &offset, uint32_t &length) const;
222     /**
223      * @brief Get data relative offset for file.
224      * @param file Indicates the entry name.
225      * @param dest Indicates the obtained ostream object.
226      * @return Returns true if file is successfully extracted; returns false otherwise.
227      */
228     bool ExtractFile(const std::string &file, std::ostream &dest) const;
229 
230     bool ExtractFileFromMMap(const std::string &file, void *mmapDataPtr,
231         std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const;
232 
233     std::unique_ptr<FileMapper> CreateFileMapper(const std::string &fileName, FileMapperType type) const;
234     bool ExtractToBufByName(const std::string &fileName, std::unique_ptr<uint8_t[]> &dataPtr,
235         size_t &len) const;
236     void SetCacheMode(CacheMode cacheMode);
237     bool UseDirCache() const;
238 private:
239     bool GetDataOffsetRelative(const ZipEntry &zipEntry, ZipPos &offset, uint32_t &length) const;
240     /**
241      * @brief Check the EndDir object.
242      * @param endDir Indicates the EndDir object to check.
243      * @return Returns true if  successfully checked; returns false otherwise.
244      */
245     bool CheckEndDir(const EndDir &endDir) const;
246     /**
247      * @brief Parse the EndDir.
248      * @return Returns true if  successfully Parsed; returns false otherwise.
249      */
250     bool ParseEndDirectory();
251     /**
252      * @brief Parse one entry.
253      * @return Returns true if successfully parsed; returns false otherwise.
254      */
255     bool ParseOneEntry(uint8_t* &entryPtr);
256     /**
257      * @brief Parse all Entries.
258      * @return Returns true if successfully parsed; returns false otherwise.
259      */
260     bool ParseAllEntries();
261     /**
262      * @brief Get LocalHeader object size.
263      * @param nameSize Indicates the nameSize.
264      * @param extraSize Indicates the extraSize.
265      * @return Returns size of LocalHeader.
266      */
267     size_t GetLocalHeaderSize(const uint16_t nameSize = 0, const uint16_t extraSize = 0) const;
268     /**
269      * @brief Get entry data offset.
270      * @param zipEntry Indicates the ZipEntry object.
271      * @param extraSize Indicates the extraSize.
272      * @return Returns position.
273      */
274     ZipPos GetEntryDataOffset(const ZipEntry &zipEntry, const uint16_t extraSize) const;
275     /**
276      * @brief Check data description.
277      * @param zipEntry Indicates the ZipEntry object.
278      * @param localHeader Indicates the localHeader object.
279      * @return Returns true if successfully checked; returns false otherwise.
280      */
281     bool CheckDataDesc(const ZipEntry &zipEntry, const LocalHeader &localHeader) const;
282     /**
283      * @brief Check coherency LocalHeader object.
284      * @param zipEntry Indicates the ZipEntry object.
285      * @param extraSize Indicates the obtained size.
286      * @return Returns true if successfully checked; returns false otherwise.
287      */
288     bool CheckCoherencyLocalHeader(const ZipEntry &zipEntry, uint16_t &extraSize) const;
289     /**
290      * @brief Unzip ZipEntry object to ostream.
291      * @param zipEntry Indicates the ZipEntry object.
292      * @param extraSize Indicates the size.
293      * @param dest Indicates the obtained ostream object.
294      * @return Returns true if successfully Unzip; returns false otherwise.
295      */
296     bool UnzipWithStore(const ZipEntry &zipEntry, const uint16_t extraSize, std::ostream &dest) const;
297     /**
298      * @brief Unzip ZipEntry object to ostream.
299      * @param zipEntry Indicates the ZipEntry object.
300      * @param extraSize Indicates the size.
301      * @param dest Indicates the obtained ostream object.
302      * @return Returns true if successfully Unzip; returns false otherwise.
303      */
304     bool UnzipWithInflated(const ZipEntry &zipEntry, const uint16_t extraSize, std::ostream &dest) const;
305     /**
306      * @brief Get Entry start.
307      * @param zipEntry Indicates the ZipEntry object.
308      * @param extraSize Indicates the extra size.
309      * @return Returns true if successfully Seeked; returns false otherwise.
310      */
311     size_t GetEntryStart(const ZipEntry &zipEntry, const uint16_t extraSize) const;
312     /**
313      * @brief Init zlib stream.
314      * @param zstream Indicates the obtained z_stream object.
315      * @return Returns true if successfully init; returns false otherwise.
316      */
317     bool InitZStream(z_stream &zstream) const;
318     /**
319      * @brief Read zlib stream.
320      * @param buffer Indicates the buffer to read.
321      * @param zstream Indicates the obtained z_stream object.
322      * @param remainCompressedSize Indicates the obtained size.
323      * @return Returns true if successfully read; returns false otherwise.
324      */
325     bool ReadZStream(const BytePtr &buffer, z_stream &zstream, uint32_t &remainCompressedSize, size_t &startPos) const;
326 
327     bool UnzipWithInflatedFromMMap(const ZipEntry &zipEntry, const uint16_t extraSize,
328         void *mmapDataPtr, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const;
329     bool CopyInflateOut(z_stream &zstream, size_t inflateLen, uint8_t** dstDataPtr,
330         BytePtr bufOut, uint8_t &errorTimes) const;
331     bool ReadZStreamFromMMap(const BytePtr &buffer, void* &dataPtr,
332         z_stream &zstream, uint32_t &remainCompressedSize) const;
333 
334     std::shared_ptr<DirTreeNode> GetDirRoot();
335     std::shared_ptr<DirTreeNode> MakeDirTree() const;
336 
337     bool IsDirExistCache(const std::string &dir);
338     void GetAllFileListCache(const std::string &srcPath, std::vector<std::string> &assetList);
339     void GetChildNamesCache(const std::string &srcPath, std::set<std::string> &fileSet);
340 
341     bool IsDirExistNormal(const std::string &dir);
342     void GetAllFileListNormal(const std::string &srcPath, std::vector<std::string> &assetList);
343     void GetChildNamesNormal(const std::string &srcPath, std::set<std::string> &fileSet);
344 
345 private:
346     std::string pathName_;
347     std::shared_ptr<ZipFileReader> zipFileReader_;
348     EndDir endDir_;
349     ZipEntryMap entriesMap_;
350     std::mutex dirRootMutex_;
351     std::shared_ptr<DirTreeNode> dirRoot_;
352     // offset of central directory relative to zip file.
353     ZipPos centralDirPos_ = 0;
354     // this zip content start offset relative to zip file.
355     ZipPos fileStartPos_ = 0;
356     // this zip content length in the zip file.
357     ZipPos fileLength_ = 0;
358     bool isOpen_ = false;
359     CacheMode cacheMode_ = CacheMode::CACHE_CASE;
360 };
361 }  // namespace AbilityBase
362 }  // namespace OHOS
363 #endif  // OHOS_ABILITY_BASE_ZIP_FILE_H
364