1 /*
2  * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "b_filesystem/b_dir.h"
17 
18 #include <algorithm>
19 #include <dirent.h>
20 #include <fnmatch.h>
21 #include <functional>
22 #include <filesystem>
23 #include <glob.h>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29 
30 #include "b_anony/b_anony.h"
31 #include "b_error/b_error.h"
32 #include "b_filesystem/b_file_hash.h"
33 #include "b_resources/b_constants.h"
34 #include "directory_ex.h"
35 #include "errors.h"
36 #include "filemgmt_libhilog.h"
37 
38 namespace OHOS::FileManagement::Backup {
39 using namespace std;
40 const int32_t PATH_MAX_LEN = 4096;
41 const size_t TOP_ELE = 0;
42 const std::string APP_DATA_DIR = BConstants::PATH_PUBLIC_HOME +
43     BConstants::PATH_APP_DATA + BConstants::FILE_SEPARATOR_CHAR;
44 
IsEmptyDirectory(const string & path)45 static bool IsEmptyDirectory(const string &path)
46 {
47     DIR *dir = opendir(path.c_str());
48     if (dir == nullptr) {
49         HILOGE("Opendir failed, errno:%{public}d", errno);
50         return false;
51     }
52     bool isEmpty = true;
53     struct dirent *entry = nullptr;
54     while ((entry = readdir(dir)) != nullptr) {
55         if (entry->d_type != DT_DIR || (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0)) {
56             isEmpty = false;
57             break;
58         }
59     }
60     closedir(dir);
61     return isEmpty;
62 }
63 
GetFile(const string & path,off_t size=-1)64 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetFile(const string &path, off_t size = -1)
65 {
66     map<string, struct stat> files;
67     map<string, size_t> smallFiles;
68     struct stat sta = {};
69     if (stat(path.data(), &sta) == -1) {
70         HILOGE("File not exist, errno:%{public}d, fileName:%{private}s.", errno, path.c_str());
71         return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
72     }
73     if (path == "/") {
74         return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
75     }
76     if (sta.st_size <= size) {
77         smallFiles.insert(make_pair(path, sta.st_size));
78     } else {
79         files.try_emplace(path, sta);
80     }
81     return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
82 }
83 
CheckOverLongPath(const string & path)84 static uint32_t CheckOverLongPath(const string &path)
85 {
86     uint32_t len = path.length();
87     if (len >= PATH_MAX_LEN) {
88         size_t found = path.find_last_of(BConstants::FILE_SEPARATOR_CHAR);
89         string sub = path.substr(found + 1);
90         HILOGE("Path over long, length:%{public}d, fileName:%{public}s.", len, sub.c_str());
91     }
92     return len;
93 }
94 
GetDirFilesDetail(const string & path,bool recursion,off_t size=-1)95 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetDirFilesDetail(const string &path,
96                                                                                        bool recursion,
97                                                                                        off_t size = -1)
98 {
99     map<string, struct stat> files;
100     map<string, size_t> smallFiles;
101 
102     if (IsEmptyDirectory(path)) {
103         string newPath = path;
104         if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
105             newPath += BConstants::FILE_SEPARATOR_CHAR;
106         }
107         smallFiles.insert(make_pair(newPath, 0));
108         return {ERR_OK, files, smallFiles};
109     }
110 
111     unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
112     if (!dir) {
113         HILOGE("Invalid directory path: %{private}s", path.c_str());
114         return GetFile(path, size);
115     }
116     struct dirent *ptr = nullptr;
117     while (!!(ptr = readdir(dir.get()))) {
118         // current dir OR parent dir
119         if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
120             continue;
121         } else if (ptr->d_type == DT_REG) {
122             struct stat sta = {};
123             string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
124             if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
125                 continue;
126             }
127             if (sta.st_size <= size) {
128                 smallFiles.insert(make_pair(fileName, sta.st_size));
129                 continue;
130             }
131 
132             files.try_emplace(fileName, sta);
133             continue;
134         } else if (ptr->d_type != DT_DIR) {
135             HILOGE("Not support file type");
136             continue;
137         }
138         // DT_DIR type
139         if (!recursion) {
140             continue;
141         }
142         auto [errCode, subFiles, subSmallFiles] =
143             GetDirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), recursion, size);
144         if (errCode != 0) {
145             return {errCode, files, smallFiles};
146         }
147         files.merge(subFiles);
148         smallFiles.insert(subSmallFiles.begin(), subSmallFiles.end());
149     }
150     return {ERR_OK, files, smallFiles};
151 }
152 
PreDealExcludes(std::vector<std::string> & excludes)153 static void PreDealExcludes(std::vector<std::string> &excludes)
154 {
155     size_t lenEx = excludes.size();
156     int j = 0;
157     for (size_t i = 0; i < lenEx; ++i) {
158         if (!excludes[i].empty()) {
159             if (excludes[i].at(excludes[i].size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
160                 excludes[i] += "*";
161             }
162             if (excludes[i].find(BConstants::FILE_SEPARATOR_CHAR) != string::npos &&
163                 excludes[i].at(TOP_ELE) != BConstants::FILE_SEPARATOR_CHAR) {
164                 excludes[i] = BConstants::FILE_SEPARATOR_CHAR + excludes[i];
165             }
166             excludes[j++] = excludes[i];
167         }
168     }
169     excludes.resize(j);
170 }
171 
GetDirFiles(const string & path)172 tuple<ErrCode, vector<string>> BDir::GetDirFiles(const string &path)
173 {
174     vector<string> files;
175     unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
176     if (!dir) {
177         HILOGE("Invalid directory path: %{private}s", path.c_str());
178         return {BError(errno).GetCode(), files};
179     }
180 
181     struct dirent *ptr = nullptr;
182     while (!!(ptr = readdir(dir.get()))) {
183         // current dir OR parent dir
184         if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
185             continue;
186         } else if (ptr->d_type == DT_DIR) {
187             continue;
188         } else {
189             files.push_back(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
190         }
191     }
192 
193     return {ERR_OK, files};
194 }
195 
GetSubDir(const std::string & path)196 static std::set<std::string> GetSubDir(const std::string &path)
197 {
198     if (path.empty()) {
199         return {};
200     }
201     std::set<std::string> result;
202     unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
203     if (!dir) {
204         HILOGE("Invalid directory path: %{private}s", path.c_str());
205         return {};
206     }
207 
208     struct dirent *ptr = nullptr;
209     while (!!(ptr = readdir(dir.get()))) {
210         // current dir OR parent dir
211         if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
212             continue;
213         } else if (ptr->d_type == DT_DIR) {
214             std::string tmpPath = IncludeTrailingPathDelimiter(path) +
215                 string(ptr->d_name) + BConstants::FILE_SEPARATOR_CHAR;
216             if (tmpPath == APP_DATA_DIR) {
217                 HILOGI("Filter appdata successfully");
218                 continue;
219             }
220             result.emplace(tmpPath);
221         } else {
222             result.emplace(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
223         }
224     }
225     return result;
226 }
227 
RmForceExcludePath(set<string> & expandPath)228 static void RmForceExcludePath(set<string> &expandPath)
229 {
230     set<string> addPaths;
231     for (auto it = expandPath.begin(); it != expandPath.end();) {
232         if (*it == BConstants::PATH_PUBLIC_HOME) {
233             addPaths = GetSubDir(*it);
234         }
235         if ((*it).find(APP_DATA_DIR) == 0) {
236             it = expandPath.erase(it);
237             continue;
238         }
239         ++it;
240     }
241     if (!addPaths.empty()) {
242         expandPath.erase(BConstants::PATH_PUBLIC_HOME);
243         expandPath.merge(addPaths);
244     }
245 }
246 
ExpandPathWildcard(const vector<string> & vec,bool onlyPath)247 static set<string> ExpandPathWildcard(const vector<string> &vec, bool onlyPath)
248 {
249     unique_ptr<glob_t, function<void(glob_t *)>> gl {new glob_t, [](glob_t *ptr) { globfree(ptr); }};
250     *gl = {};
251 
252     unsigned int flags = GLOB_DOOFFS | GLOB_MARK;
253     for (const string &pattern : vec) {
254         if (!pattern.empty()) {
255             glob(pattern.data(), static_cast<int>(flags), NULL, gl.get());
256             flags |= GLOB_APPEND;
257         }
258     }
259 
260     set<string> expandPath, filteredPath;
261     for (size_t i = 0; i < gl->gl_pathc; ++i) {
262         std::string tmpPath = gl->gl_pathv[i];
263         auto pos = tmpPath.find(BConstants::FILE_SEPARATOR_CHAR);
264         if (pos != 0 && pos != std::string::npos) {
265             tmpPath = BConstants::FILE_SEPARATOR_CHAR + tmpPath;
266         }
267         expandPath.emplace(tmpPath);
268     }
269     RmForceExcludePath(expandPath);
270     for (auto it = expandPath.begin(); it != expandPath.end(); ++it) {
271         filteredPath.insert(*it);
272         if (onlyPath && *it->rbegin() != BConstants::FILE_SEPARATOR_CHAR) {
273             continue;
274         }
275         auto jt = it;
276         for (++jt; jt != expandPath.end() && (jt->find(*it) == 0); ++jt) {
277         }
278 
279         it = --jt;
280     }
281 
282     return filteredPath;
283 }
284 
GetBigFiles(const vector<string> & includes,const vector<string> & excludes)285 tuple<ErrCode, map<string, struct stat>, map<string, size_t>> BDir::GetBigFiles(const vector<string> &includes,
286                                                                                 const vector<string> &excludes)
287 {
288     set<string> inc = ExpandPathWildcard(includes, true);
289 
290     map<string, struct stat> incFiles;
291     map<string, size_t> incSmallFiles;
292     for (const auto &item : inc) {
293         HILOGW("GetBigFiles, path = %{public}s", item.c_str());
294         auto [errCode, files, smallFiles] = GetDirFilesDetail(item, true, BConstants::BIG_FILE_BOUNDARY);
295         if (errCode == 0) {
296             incFiles.merge(move(files));
297             HILOGW("big files: %{public}zu; small files: %{public}zu", files.size(), smallFiles.size());
298             incSmallFiles.insert(smallFiles.begin(), smallFiles.end());
299         }
300     }
301     vector<string> endExcludes = excludes;
302     PreDealExcludes(endExcludes);
303     auto isMatch = [](const vector<string> &s, const string &str) -> bool {
304         if (str.empty()) {
305             return false;
306         }
307         for (const string &item : s) {
308             if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
309                 return true;
310             }
311         }
312         return false;
313     };
314 
315     map<string, size_t> resSmallFiles;
316     for (const auto &item : incSmallFiles) {
317         if (!isMatch(endExcludes, item.first)) {
318             resSmallFiles.insert(make_pair(item.first, item.second));
319         }
320     }
321 
322     map<string, struct stat> bigFiles;
323     for (const auto &item : incFiles) {
324         if (!isMatch(endExcludes, item.first)) {
325             bigFiles[item.first] = item.second;
326         }
327     }
328     HILOGW("total number of big files is %{public}zu", bigFiles.size());
329     HILOGW("total number of small files is %{public}zu", resSmallFiles.size());
330     return {ERR_OK, move(bigFiles), move(resSmallFiles)};
331 }
332 
GetUser0FileStat(vector<string> bigFile,vector<string> smallFile,vector<struct ReportFileInfo> & allFiles,vector<struct ReportFileInfo> & smallFiles,vector<struct ReportFileInfo> & bigFiles)333 void BDir::GetUser0FileStat(vector<string> bigFile,
334                             vector<string> smallFile,
335                             vector<struct ReportFileInfo> &allFiles,
336                             vector<struct ReportFileInfo> &smallFiles,
337                             vector<struct ReportFileInfo> &bigFiles)
338 {
339     for (const auto &item : smallFile) {
340         struct ReportFileInfo storageFiles;
341         storageFiles.filePath = item;
342         if (filesystem::is_directory(item)) {
343             storageFiles.isDir = 1;
344             storageFiles.userTar = 0;
345         } else {
346             storageFiles.isDir = 0;
347             auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
348             if (fileHash.empty()) {
349                 continue;
350             }
351             storageFiles.hash = fileHash;
352             storageFiles.userTar = 1;
353         }
354         struct stat sta = {};
355         if (stat(item.c_str(), &sta) != 0) {
356             throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
357         }
358         storageFiles.size = sta.st_size;
359         storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
360         int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
361         storageFiles.mtime = lastUpdateTime;
362         allFiles.push_back(storageFiles);
363         smallFiles.push_back(storageFiles);
364     }
365     for (const auto &item : bigFile) {
366         struct ReportFileInfo storageFiles;
367         storageFiles.filePath = item;
368         auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
369         if (fileHash.empty()) {
370             continue;
371         }
372         storageFiles.hash = fileHash;
373         struct stat sta = {};
374         if (stat(item.c_str(), &sta) != 0) {
375             throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
376         }
377         storageFiles.size = sta.st_size;
378         storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
379         int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
380         storageFiles.mtime = lastUpdateTime;
381         storageFiles.userTar = 1;
382         allFiles.push_back(storageFiles);
383         bigFiles.push_back(storageFiles);
384     }
385     HILOGI("get FileStat end, bigfiles = %{public}zu, smallFiles = %{public}zu, allFiles = %{public}zu,",
386         bigFiles.size(), smallFiles.size(), allFiles.size());
387 }
388 
IsNotPath(const string & path,vector<string> & bigFiles,vector<string> & smallFiles,off_t size)389 static tuple<vector<string>, vector<string>> IsNotPath(const string &path, vector<string> &bigFiles,
390     vector<string> &smallFiles, off_t size)
391 {
392     struct stat sta = {};
393     if (CheckOverLongPath(path) >= PATH_MAX_LEN || stat(path.data(), &sta) == -1) {
394         return {};
395     }
396     if (sta.st_size <= size) {
397         smallFiles.push_back(path);
398         HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
399         return {bigFiles, smallFiles};
400     }
401     bigFiles.push_back(path);
402     HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
403     return {bigFiles, smallFiles};
404 }
405 
GetUser0DirFilesDetail(const string & path,off_t size=-1)406 static tuple<vector<string>, vector<string>> GetUser0DirFilesDetail(const string &path, off_t size = -1)
407 {
408     vector<string> bigFiles;
409     vector<string> smallFiles;
410     if (IsEmptyDirectory(path)) {
411         string newPath = path;
412         if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
413             newPath += BConstants::FILE_SEPARATOR_CHAR;
414         }
415         smallFiles.push_back(newPath);
416         return {bigFiles, smallFiles};
417     }
418     if (filesystem::is_regular_file(path)) {
419         return IsNotPath(path, bigFiles, smallFiles, size);
420     }
421     unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
422     if (!dir) {
423         HILOGE("Invalid directory path: %{private}s", path.c_str());
424         return {};
425     }
426     struct dirent *ptr = nullptr;
427     while (!!(ptr = readdir(dir.get()))) {
428         // current dir OR parent dir
429         if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
430             continue;
431         } else if (ptr->d_type == DT_REG) {
432             struct stat sta = {};
433             string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
434             if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
435                 continue;
436             }
437             if (sta.st_size <= size) {
438                 smallFiles.push_back(fileName);
439                 continue;
440             }
441 
442             bigFiles.push_back(fileName);
443             continue;
444         } else if (ptr->d_type != DT_DIR) {
445             HILOGE("Not support file type");
446             continue;
447         }
448         // DT_DIR type
449         auto [subBigFiles, subSmallFiles] =
450             GetUser0DirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), size);
451         bigFiles.insert(bigFiles.end(), subBigFiles.begin(), subBigFiles.end());
452         smallFiles.insert(smallFiles.end(), subSmallFiles.begin(), subSmallFiles.end());
453     }
454     HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
455     return {bigFiles, smallFiles};
456 }
457 
GetBackupList(const vector<string> & includes,const vector<string> & excludes)458 tuple<vector<string>, vector<string>> BDir::GetBackupList(const vector<string> &includes,
459                                                           const vector<string> &excludes)
460 {
461     HILOGI("start get bigfiles and smallfiles");
462     set<string> inc = ExpandPathWildcard(includes, true);
463     vector<string> bigFiles;
464     vector<string> smallFiles;
465     for (const auto &item : inc) {
466         auto [bigFile, smallFile] = GetUser0DirFilesDetail(item, BConstants::BIG_FILE_BOUNDARY);
467         bigFiles.insert(bigFiles.end(), bigFile.begin(), bigFile.end());
468         smallFiles.insert(smallFiles.end(), smallFile.begin(), smallFile.end());
469     }
470     HILOGI("end bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
471     vector<string> endExcludes = excludes;
472     PreDealExcludes(endExcludes);
473     auto isMatch = [](const vector<string> &s, const string &str) -> bool {
474         if (str.empty()) {
475             return false;
476         }
477         for (const string &item : s) {
478             if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
479                 return true;
480             }
481         }
482         return false;
483     };
484 
485     for (auto item = bigFiles.begin(); item != bigFiles.end();) {
486         if (isMatch(endExcludes, *item)) {
487             item = bigFiles.erase(item);
488         } else {
489             ++item;
490         }
491     }
492     for (auto item = smallFiles.begin(); item != smallFiles.end();) {
493         if (isMatch(endExcludes, *item)) {
494             item = smallFiles.erase(item);
495         } else {
496             ++item;
497         }
498     }
499     HILOGI("End compare bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
500     return {bigFiles, smallFiles};
501 }
502 
GetDirs(const vector<string_view> & paths)503 vector<string> BDir::GetDirs(const vector<string_view> &paths)
504 {
505     vector<string> wildcardPath(paths.begin(), paths.end());
506     set<string> inc = ExpandPathWildcard(wildcardPath, true);
507     vector<string> dirs(inc.begin(), inc.end());
508     return dirs;
509 }
510 
CheckFilePathInvalid(const std::string & filePath)511 bool BDir::CheckFilePathInvalid(const std::string &filePath)
512 {
513     size_t pos = filePath.find(BConstants::PATH_ABSOLUTE);
514     while (pos != string::npos) {
515         if (pos == 0 || filePath[pos - 1] == BConstants::FILE_SEPARATOR_CHAR) {
516             HILOGE("Relative path is not allowed, path = %{public}s", GetAnonyPath(filePath).c_str());
517             return true;
518         }
519         pos = filePath.find(BConstants::PATH_ABSOLUTE, pos + BConstants::PATH_ABSOLUTE.size());
520     }
521     return false;
522 }
523 } // namespace OHOS::FileManagement::Backup