1#!/usr/bin/env python
2# coding: utf-8
3
4"""
5Copyright (c) 2023 Huawei Device Co., Ltd.
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10    http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18"""
19
20import argparse
21import os
22import re
23from check_common import read_file, traverse_file_in_each_type
24
25WHITELIST_FILE_NAME = "data_regex_whitelist.txt"
26
27
28def check_regex_path(path):
29    # remove all escape
30    path = re.sub(r'\\\\', '', path)
31    path = re.sub(r'\\/', '/', path)
32
33    path_elements = path.split('/')
34    second_dir_name = path_elements[2]
35
36    # remove all escape
37    replace_str = re.sub(r'\\[\$\(\)\*\+\.\[\]\?\\\^\{\}\|]', '', second_dir_name)
38
39    # find special characters that have not been escaped
40    return re.search(r'[\$\(\)\*\+\.\[\]\?\\\^\{\}\|]', replace_str)
41
42
43def check_file_contexts(args, file_contexts, whitelist_set):
44    line_index = 0
45    err = False
46    for line in file_contexts:
47        line_index += 1
48        split_list = line.split(None, 1)
49        if len(split_list) == 0:
50            continue
51        path = split_list[0]
52        normalize_path = os.path.normpath(path)
53        if normalize_path.startswith("/data/"):
54            if path in whitelist_set:
55                continue
56            if not check_regex_path(normalize_path):
57                continue
58            print("Regex is not allowed in the secondary directory under data,",
59                "check '{}' failed in file {}:{}\n".format(path, args.file_contexts, line_index),
60                "There are two solutions:\n",
61                "1. Add '{}' to whitelist file \'{}\' under \'{}\';\n".format(
62                    path, WHITELIST_FILE_NAME, args.policy_dir_list),
63                "2. Modify '{}' to remove the regular expression\n".format(path))
64            err = True
65    if err:
66        raise Exception(-1)
67
68
69def get_whitelist(args):
70    whitelist_file_list = traverse_file_in_each_type(args.policy_dir_list, WHITELIST_FILE_NAME)
71    whitelist_set = set()
72    for path in whitelist_file_list:
73        whitelist = read_file(path)
74        for it in whitelist:
75            whitelist_set.add(it)
76    return whitelist_set
77
78
79def parse_args():
80    parser = argparse.ArgumentParser()
81    parser.add_argument(
82        '--file_contexts', help='the file_contexts file path', required=True)
83    parser.add_argument(
84        '--policy-dir-list', help='the whitelist path list', required=True)
85    return parser.parse_args()
86
87
88if __name__ == "__main__":
89    input_args = parse_args()
90    script_path = os.path.dirname(os.path.realpath(__file__))
91    whitelist_data = get_whitelist(input_args)
92
93    file_contexts_data = read_file(input_args.file_contexts)
94    check_file_contexts(input_args, file_contexts_data, whitelist_data)
95