1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "b_filesystem/b_dir.h"
17
18 #include <algorithm>
19 #include <dirent.h>
20 #include <fnmatch.h>
21 #include <functional>
22 #include <filesystem>
23 #include <glob.h>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29
30 #include "b_anony/b_anony.h"
31 #include "b_error/b_error.h"
32 #include "b_filesystem/b_file_hash.h"
33 #include "b_resources/b_constants.h"
34 #include "directory_ex.h"
35 #include "errors.h"
36 #include "filemgmt_libhilog.h"
37
38 namespace OHOS::FileManagement::Backup {
39 using namespace std;
40 const int32_t PATH_MAX_LEN = 4096;
41 const size_t TOP_ELE = 0;
42 const std::string APP_DATA_DIR = BConstants::PATH_PUBLIC_HOME +
43 BConstants::PATH_APP_DATA + BConstants::FILE_SEPARATOR_CHAR;
44
IsEmptyDirectory(const string & path)45 static bool IsEmptyDirectory(const string &path)
46 {
47 DIR *dir = opendir(path.c_str());
48 if (dir == nullptr) {
49 HILOGE("Opendir failed, errno:%{public}d", errno);
50 return false;
51 }
52 bool isEmpty = true;
53 struct dirent *entry = nullptr;
54 while ((entry = readdir(dir)) != nullptr) {
55 if (entry->d_type != DT_DIR || (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0)) {
56 isEmpty = false;
57 break;
58 }
59 }
60 closedir(dir);
61 return isEmpty;
62 }
63
GetFile(const string & path,off_t size=-1)64 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetFile(const string &path, off_t size = -1)
65 {
66 map<string, struct stat> files;
67 map<string, size_t> smallFiles;
68 struct stat sta = {};
69 if (stat(path.data(), &sta) == -1) {
70 HILOGE("File not exist, errno:%{public}d, fileName:%{private}s.", errno, path.c_str());
71 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
72 }
73 if (path == "/") {
74 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
75 }
76 if (sta.st_size <= size) {
77 smallFiles.insert(make_pair(path, sta.st_size));
78 } else {
79 files.try_emplace(path, sta);
80 }
81 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
82 }
83
CheckOverLongPath(const string & path)84 static uint32_t CheckOverLongPath(const string &path)
85 {
86 uint32_t len = path.length();
87 if (len >= PATH_MAX_LEN) {
88 size_t found = path.find_last_of(BConstants::FILE_SEPARATOR_CHAR);
89 string sub = path.substr(found + 1);
90 HILOGE("Path over long, length:%{public}d, fileName:%{public}s.", len, sub.c_str());
91 }
92 return len;
93 }
94
GetDirFilesDetail(const string & path,bool recursion,off_t size=-1)95 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetDirFilesDetail(const string &path,
96 bool recursion,
97 off_t size = -1)
98 {
99 map<string, struct stat> files;
100 map<string, size_t> smallFiles;
101
102 if (IsEmptyDirectory(path)) {
103 string newPath = path;
104 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
105 newPath += BConstants::FILE_SEPARATOR_CHAR;
106 }
107 smallFiles.insert(make_pair(newPath, 0));
108 return {ERR_OK, files, smallFiles};
109 }
110
111 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
112 if (!dir) {
113 HILOGE("Invalid directory path: %{private}s", path.c_str());
114 return GetFile(path, size);
115 }
116 struct dirent *ptr = nullptr;
117 while (!!(ptr = readdir(dir.get()))) {
118 // current dir OR parent dir
119 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
120 continue;
121 } else if (ptr->d_type == DT_REG) {
122 struct stat sta = {};
123 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
124 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
125 continue;
126 }
127 if (sta.st_size <= size) {
128 smallFiles.insert(make_pair(fileName, sta.st_size));
129 continue;
130 }
131
132 files.try_emplace(fileName, sta);
133 continue;
134 } else if (ptr->d_type != DT_DIR) {
135 HILOGE("Not support file type");
136 continue;
137 }
138 // DT_DIR type
139 if (!recursion) {
140 continue;
141 }
142 auto [errCode, subFiles, subSmallFiles] =
143 GetDirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), recursion, size);
144 if (errCode != 0) {
145 return {errCode, files, smallFiles};
146 }
147 files.merge(subFiles);
148 smallFiles.insert(subSmallFiles.begin(), subSmallFiles.end());
149 }
150 return {ERR_OK, files, smallFiles};
151 }
152
PreDealExcludes(std::vector<std::string> & excludes)153 static void PreDealExcludes(std::vector<std::string> &excludes)
154 {
155 size_t lenEx = excludes.size();
156 int j = 0;
157 for (size_t i = 0; i < lenEx; ++i) {
158 if (!excludes[i].empty()) {
159 if (excludes[i].at(excludes[i].size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
160 excludes[i] += "*";
161 }
162 if (excludes[i].find(BConstants::FILE_SEPARATOR_CHAR) != string::npos &&
163 excludes[i].at(TOP_ELE) != BConstants::FILE_SEPARATOR_CHAR) {
164 excludes[i] = BConstants::FILE_SEPARATOR_CHAR + excludes[i];
165 }
166 excludes[j++] = excludes[i];
167 }
168 }
169 excludes.resize(j);
170 }
171
GetDirFiles(const string & path)172 tuple<ErrCode, vector<string>> BDir::GetDirFiles(const string &path)
173 {
174 vector<string> files;
175 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
176 if (!dir) {
177 HILOGE("Invalid directory path: %{private}s", path.c_str());
178 return {BError(errno).GetCode(), files};
179 }
180
181 struct dirent *ptr = nullptr;
182 while (!!(ptr = readdir(dir.get()))) {
183 // current dir OR parent dir
184 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
185 continue;
186 } else if (ptr->d_type == DT_DIR) {
187 continue;
188 } else {
189 files.push_back(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
190 }
191 }
192
193 return {ERR_OK, files};
194 }
195
GetSubDir(const std::string & path)196 static std::set<std::string> GetSubDir(const std::string &path)
197 {
198 if (path.empty()) {
199 return {};
200 }
201 std::set<std::string> result;
202 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
203 if (!dir) {
204 HILOGE("Invalid directory path: %{private}s", path.c_str());
205 return {};
206 }
207
208 struct dirent *ptr = nullptr;
209 while (!!(ptr = readdir(dir.get()))) {
210 // current dir OR parent dir
211 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
212 continue;
213 } else if (ptr->d_type == DT_DIR) {
214 std::string tmpPath = IncludeTrailingPathDelimiter(path) +
215 string(ptr->d_name) + BConstants::FILE_SEPARATOR_CHAR;
216 if (tmpPath == APP_DATA_DIR) {
217 HILOGI("Filter appdata successfully");
218 continue;
219 }
220 result.emplace(tmpPath);
221 } else {
222 result.emplace(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
223 }
224 }
225 return result;
226 }
227
RmForceExcludePath(set<string> & expandPath)228 static void RmForceExcludePath(set<string> &expandPath)
229 {
230 set<string> addPaths;
231 for (auto it = expandPath.begin(); it != expandPath.end();) {
232 if (*it == BConstants::PATH_PUBLIC_HOME) {
233 addPaths = GetSubDir(*it);
234 }
235 if ((*it).find(APP_DATA_DIR) == 0) {
236 it = expandPath.erase(it);
237 continue;
238 }
239 ++it;
240 }
241 if (!addPaths.empty()) {
242 expandPath.erase(BConstants::PATH_PUBLIC_HOME);
243 expandPath.merge(addPaths);
244 }
245 }
246
ExpandPathWildcard(const vector<string> & vec,bool onlyPath)247 static set<string> ExpandPathWildcard(const vector<string> &vec, bool onlyPath)
248 {
249 unique_ptr<glob_t, function<void(glob_t *)>> gl {new glob_t, [](glob_t *ptr) { globfree(ptr); }};
250 *gl = {};
251
252 unsigned int flags = GLOB_DOOFFS | GLOB_MARK;
253 for (const string &pattern : vec) {
254 if (!pattern.empty()) {
255 glob(pattern.data(), static_cast<int>(flags), NULL, gl.get());
256 flags |= GLOB_APPEND;
257 }
258 }
259
260 set<string> expandPath, filteredPath;
261 for (size_t i = 0; i < gl->gl_pathc; ++i) {
262 std::string tmpPath = gl->gl_pathv[i];
263 auto pos = tmpPath.find(BConstants::FILE_SEPARATOR_CHAR);
264 if (pos != 0 && pos != std::string::npos) {
265 tmpPath = BConstants::FILE_SEPARATOR_CHAR + tmpPath;
266 }
267 expandPath.emplace(tmpPath);
268 }
269 RmForceExcludePath(expandPath);
270 for (auto it = expandPath.begin(); it != expandPath.end(); ++it) {
271 filteredPath.insert(*it);
272 if (onlyPath && *it->rbegin() != BConstants::FILE_SEPARATOR_CHAR) {
273 continue;
274 }
275 auto jt = it;
276 for (++jt; jt != expandPath.end() && (jt->find(*it) == 0); ++jt) {
277 }
278
279 it = --jt;
280 }
281
282 return filteredPath;
283 }
284
GetBigFiles(const vector<string> & includes,const vector<string> & excludes)285 tuple<ErrCode, map<string, struct stat>, map<string, size_t>> BDir::GetBigFiles(const vector<string> &includes,
286 const vector<string> &excludes)
287 {
288 set<string> inc = ExpandPathWildcard(includes, true);
289
290 map<string, struct stat> incFiles;
291 map<string, size_t> incSmallFiles;
292 for (const auto &item : inc) {
293 HILOGW("GetBigFiles, path = %{public}s", item.c_str());
294 auto [errCode, files, smallFiles] = GetDirFilesDetail(item, true, BConstants::BIG_FILE_BOUNDARY);
295 if (errCode == 0) {
296 incFiles.merge(move(files));
297 HILOGW("big files: %{public}zu; small files: %{public}zu", files.size(), smallFiles.size());
298 incSmallFiles.insert(smallFiles.begin(), smallFiles.end());
299 }
300 }
301 vector<string> endExcludes = excludes;
302 PreDealExcludes(endExcludes);
303 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
304 if (str.empty()) {
305 return false;
306 }
307 for (const string &item : s) {
308 if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
309 return true;
310 }
311 }
312 return false;
313 };
314
315 map<string, size_t> resSmallFiles;
316 for (const auto &item : incSmallFiles) {
317 if (!isMatch(endExcludes, item.first)) {
318 resSmallFiles.insert(make_pair(item.first, item.second));
319 }
320 }
321
322 map<string, struct stat> bigFiles;
323 for (const auto &item : incFiles) {
324 if (!isMatch(endExcludes, item.first)) {
325 bigFiles[item.first] = item.second;
326 }
327 }
328 HILOGW("total number of big files is %{public}zu", bigFiles.size());
329 HILOGW("total number of small files is %{public}zu", resSmallFiles.size());
330 return {ERR_OK, move(bigFiles), move(resSmallFiles)};
331 }
332
GetUser0FileStat(vector<string> bigFile,vector<string> smallFile,vector<struct ReportFileInfo> & allFiles,vector<struct ReportFileInfo> & smallFiles,vector<struct ReportFileInfo> & bigFiles)333 void BDir::GetUser0FileStat(vector<string> bigFile,
334 vector<string> smallFile,
335 vector<struct ReportFileInfo> &allFiles,
336 vector<struct ReportFileInfo> &smallFiles,
337 vector<struct ReportFileInfo> &bigFiles)
338 {
339 for (const auto &item : smallFile) {
340 struct ReportFileInfo storageFiles;
341 storageFiles.filePath = item;
342 if (filesystem::is_directory(item)) {
343 storageFiles.isDir = 1;
344 storageFiles.userTar = 0;
345 } else {
346 storageFiles.isDir = 0;
347 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
348 if (fileHash.empty()) {
349 continue;
350 }
351 storageFiles.hash = fileHash;
352 storageFiles.userTar = 1;
353 }
354 struct stat sta = {};
355 if (stat(item.c_str(), &sta) != 0) {
356 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
357 }
358 storageFiles.size = sta.st_size;
359 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
360 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
361 storageFiles.mtime = lastUpdateTime;
362 allFiles.push_back(storageFiles);
363 smallFiles.push_back(storageFiles);
364 }
365 for (const auto &item : bigFile) {
366 struct ReportFileInfo storageFiles;
367 storageFiles.filePath = item;
368 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
369 if (fileHash.empty()) {
370 continue;
371 }
372 storageFiles.hash = fileHash;
373 struct stat sta = {};
374 if (stat(item.c_str(), &sta) != 0) {
375 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
376 }
377 storageFiles.size = sta.st_size;
378 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
379 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
380 storageFiles.mtime = lastUpdateTime;
381 storageFiles.userTar = 1;
382 allFiles.push_back(storageFiles);
383 bigFiles.push_back(storageFiles);
384 }
385 HILOGI("get FileStat end, bigfiles = %{public}zu, smallFiles = %{public}zu, allFiles = %{public}zu,",
386 bigFiles.size(), smallFiles.size(), allFiles.size());
387 }
388
IsNotPath(const string & path,vector<string> & bigFiles,vector<string> & smallFiles,off_t size)389 static tuple<vector<string>, vector<string>> IsNotPath(const string &path, vector<string> &bigFiles,
390 vector<string> &smallFiles, off_t size)
391 {
392 struct stat sta = {};
393 if (CheckOverLongPath(path) >= PATH_MAX_LEN || stat(path.data(), &sta) == -1) {
394 return {};
395 }
396 if (sta.st_size <= size) {
397 smallFiles.push_back(path);
398 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
399 return {bigFiles, smallFiles};
400 }
401 bigFiles.push_back(path);
402 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
403 return {bigFiles, smallFiles};
404 }
405
GetUser0DirFilesDetail(const string & path,off_t size=-1)406 static tuple<vector<string>, vector<string>> GetUser0DirFilesDetail(const string &path, off_t size = -1)
407 {
408 vector<string> bigFiles;
409 vector<string> smallFiles;
410 if (IsEmptyDirectory(path)) {
411 string newPath = path;
412 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
413 newPath += BConstants::FILE_SEPARATOR_CHAR;
414 }
415 smallFiles.push_back(newPath);
416 return {bigFiles, smallFiles};
417 }
418 if (filesystem::is_regular_file(path)) {
419 return IsNotPath(path, bigFiles, smallFiles, size);
420 }
421 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
422 if (!dir) {
423 HILOGE("Invalid directory path: %{private}s", path.c_str());
424 return {};
425 }
426 struct dirent *ptr = nullptr;
427 while (!!(ptr = readdir(dir.get()))) {
428 // current dir OR parent dir
429 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
430 continue;
431 } else if (ptr->d_type == DT_REG) {
432 struct stat sta = {};
433 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
434 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
435 continue;
436 }
437 if (sta.st_size <= size) {
438 smallFiles.push_back(fileName);
439 continue;
440 }
441
442 bigFiles.push_back(fileName);
443 continue;
444 } else if (ptr->d_type != DT_DIR) {
445 HILOGE("Not support file type");
446 continue;
447 }
448 // DT_DIR type
449 auto [subBigFiles, subSmallFiles] =
450 GetUser0DirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), size);
451 bigFiles.insert(bigFiles.end(), subBigFiles.begin(), subBigFiles.end());
452 smallFiles.insert(smallFiles.end(), subSmallFiles.begin(), subSmallFiles.end());
453 }
454 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
455 return {bigFiles, smallFiles};
456 }
457
GetBackupList(const vector<string> & includes,const vector<string> & excludes)458 tuple<vector<string>, vector<string>> BDir::GetBackupList(const vector<string> &includes,
459 const vector<string> &excludes)
460 {
461 HILOGI("start get bigfiles and smallfiles");
462 set<string> inc = ExpandPathWildcard(includes, true);
463 vector<string> bigFiles;
464 vector<string> smallFiles;
465 for (const auto &item : inc) {
466 auto [bigFile, smallFile] = GetUser0DirFilesDetail(item, BConstants::BIG_FILE_BOUNDARY);
467 bigFiles.insert(bigFiles.end(), bigFile.begin(), bigFile.end());
468 smallFiles.insert(smallFiles.end(), smallFile.begin(), smallFile.end());
469 }
470 HILOGI("end bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
471 vector<string> endExcludes = excludes;
472 PreDealExcludes(endExcludes);
473 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
474 if (str.empty()) {
475 return false;
476 }
477 for (const string &item : s) {
478 if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
479 return true;
480 }
481 }
482 return false;
483 };
484
485 for (auto item = bigFiles.begin(); item != bigFiles.end();) {
486 if (isMatch(endExcludes, *item)) {
487 item = bigFiles.erase(item);
488 } else {
489 ++item;
490 }
491 }
492 for (auto item = smallFiles.begin(); item != smallFiles.end();) {
493 if (isMatch(endExcludes, *item)) {
494 item = smallFiles.erase(item);
495 } else {
496 ++item;
497 }
498 }
499 HILOGI("End compare bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
500 return {bigFiles, smallFiles};
501 }
502
GetDirs(const vector<string_view> & paths)503 vector<string> BDir::GetDirs(const vector<string_view> &paths)
504 {
505 vector<string> wildcardPath(paths.begin(), paths.end());
506 set<string> inc = ExpandPathWildcard(wildcardPath, true);
507 vector<string> dirs(inc.begin(), inc.end());
508 return dirs;
509 }
510
CheckFilePathInvalid(const std::string & filePath)511 bool BDir::CheckFilePathInvalid(const std::string &filePath)
512 {
513 size_t pos = filePath.find(BConstants::PATH_ABSOLUTE);
514 while (pos != string::npos) {
515 if (pos == 0 || filePath[pos - 1] == BConstants::FILE_SEPARATOR_CHAR) {
516 HILOGE("Relative path is not allowed, path = %{public}s", GetAnonyPath(filePath).c_str());
517 return true;
518 }
519 pos = filePath.find(BConstants::PATH_ABSOLUTE, pos + BConstants::PATH_ABSOLUTE.size());
520 }
521 return false;
522 }
523 } // namespace OHOS::FileManagement::Backup