1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4#
5# Copyright (c) 2023 Huawei Device Co., Ltd.
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17#
18
19
20import os
21import sys
22import argparse
23import subprocess
24import stat
25import libc_static_analysis as gen_libc
26import audit_log_analysis as audit_policy
27import generate_code_from_policy as gen_policy
28
29
30#modified the path of objdump and readelf path
31def get_obj_dump_path():
32    obj_dump_path = ''
33    return obj_dump_path
34
35
36def get_read_elf_path():
37    read_elf_path = ''
38    return read_elf_path
39
40
41def create_needed_file(elf_path, locate_path, cmd, suffix):
42    if locate_path[-1] != '/':
43        locate_path = '{}/'.format(locate_path)
44    elf_file_name = elf_path.split('/')[-1].split('.')[0] + suffix
45    target_path = '{}{}'.format(locate_path, elf_file_name)
46    flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
47    modes = stat.S_IWUSR | stat.S_IRUSR | stat.S_IWGRP | stat.S_IRGRP
48    with os.fdopen(os.open(target_path, flags, modes), 'w') as output_file:
49        process = subprocess.Popen(cmd.split(' '), stdout=output_file)
50        process.communicate(timeout=3)
51    return target_path
52
53
54def generate_libc_asm(target_cpu, elf_path, locate_path):
55    if target_cpu == 'arm':
56        cmd_obj_dump = 'arm-linux-musleabi-objdump'
57    elif target_cpu == 'arm64':
58        cmd_obj_dump = 'aarch64-linux-musl-objdump'
59    elif target_cpu == 'riscv64':
60        cmd_obj_dump = 'riscv64-linux-musl-objdump'
61    else:
62        raise ValueError("target cpu error")
63
64    cmd = '{} -d {}'.format(cmd_obj_dump, elf_path)
65    return create_needed_file(elf_path, locate_path, cmd, '.asm')
66
67
68def get_lib_path(elf_path, elf_name, cmd_extra):
69    grep_unstrip = ' | grep unstripped | grep -v _x64 {}'.format(cmd_extra)
70
71    if elf_name == 'libc++.so':
72        grep_unstrip = '| grep aarch64-linux'
73    cmd = 'find {} -name {}{}'.format(elf_path, elf_name, grep_unstrip)
74    result_list = os.popen(cmd).read().split('\n')
75    result = result_list[0].strip()
76    for item in result_list:
77        item = item.strip()
78        if len(item) > len(result):
79            result = item
80    return result
81
82
83def extract_elf_name(elf_path):
84    cmd = '{} -d {} | grep \'Shared library\''.format(get_read_elf_path(), elf_path)
85    result = os.popen(cmd).read().strip()
86    elf_name = set()
87    for item in result.split('\n'):
88        name = item[item.find('[') + 1: item.find(']')]
89        if name == 'libc.so' or name == '':
90            continue
91        elf_name.add(name)
92
93    return elf_name
94
95
96def extract_undef_name(elf_path):
97    cmd = '{} -sW {} | grep UND'.format(get_read_elf_path(), elf_path)
98    result = os.popen(cmd).read().strip()
99    func_name = set()
100    for item in result.split('\n'):
101        name = item[item.find('UND') + 3:].strip()
102        if name != '':
103            func_name.add(name)
104
105    return func_name
106
107
108def collect_elf(elf_path, elf_name):
109    elf_list = set(elf_name)
110    elf_path_list = set()
111    current_elf_name = set(elf_name)
112    while current_elf_name:
113        elf_path_list_tmp = set()
114        elf_list |= current_elf_name
115        for lib_name in current_elf_name:
116            elf_path_list_tmp.add(get_lib_path(elf_path, lib_name, ''))
117
118        current_elf_name.clear()
119        for lib_path in elf_path_list_tmp:
120            current_elf_name |= extract_elf_name(lib_path)
121        elf_path_list |= elf_path_list_tmp
122        elf_path_list_tmp.clear()
123
124    return elf_path_list
125
126
127def collect_undef_func_name(elf_path_list):
128    func_name = set()
129    for elf_path in elf_path_list:
130        func_name |= extract_undef_name(elf_path)
131
132    return func_name
133
134
135def collect_syscall(undef_func_name, libc_func_map):
136    syscall_nr = set()
137
138    for libc_func in libc_func_map:
139        for func_name in undef_func_name:
140            if func_name == libc_func.func_name:
141                syscall_nr |= libc_func.nr
142                break
143
144    return syscall_nr
145
146
147def get_item_content(arch, nr_set, name_nr_table):
148    func_name_list = list()
149    for nr in sorted(list(nr_set)):
150        func_name_list.append(name_nr_table.get(arch).get(nr))
151    content = '@allowList\n{};{}\n'.format(';{}\n'.format(arch).join(func_name_list), arch)
152
153    return content
154
155
156def get_und_func_except_libc(elf_path, libc_func_list):
157    und_func = extract_undef_name(elf_path)
158    und_func_except_libc = [item for item in und_func if item not in libc_func_list]
159    return und_func_except_libc
160
161
162def get_und_libc_func(elf_path, libc_func_list):
163    und_func = extract_undef_name(elf_path)
164    und_func_only_libc = [item for item in und_func if item in libc_func_list]
165    return und_func_only_libc
166
167
168def create_disassemble_file(elf_path, locate_path, section):
169    cmd = '{} -d --section={} {}'.format(get_obj_dump_path(), section, elf_path)
170    return create_needed_file(elf_path, locate_path, cmd, section + '.asm')
171
172
173def remove_disassemble_file(path):
174    file_list = [item for item in os.listdir(path) if item.endswith('.asm')]
175    subprocess.call(['rm'] + file_list)
176
177
178class FuncCallee:
179    def __init__(self, func_name):
180        self.func_name = func_name
181        self.func_callee = set()
182
183    def print_info(self):
184        print('{} call function {}'.format(self.func_name, self.func_callee))
185
186
187def parse_line(fp):
188    func_list = []
189    func_call_tmp = None
190    for line in fp:
191        if '>:' in line:
192            if func_call_tmp:
193                func_list.append(func_call_tmp)
194            func_call_tmp = FuncCallee(line[line.find('<') + 1: line.find('>:')])
195            continue
196
197        if '<' in line and ">" in line:
198            func_name_callee = line[line.find('<') + 1: line.find('>')]
199            if '@plt' in func_name_callee:
200                func_call_tmp.func_callee.add(func_name_callee[:func_name_callee.find('@plt')])
201            elif '+0x' in func_name_callee:
202                continue
203            else:
204                func_call_tmp.func_callee.add(func_name_callee)
205    func_list.append(func_call_tmp)
206    return func_list
207
208
209def parse_text_asm_file(elf_asm_path):
210    func_list = list()
211    with open(elf_asm_path) as fp:
212        func_list = parse_line(fp)
213
214    return func_list
215
216
217def generate_libc_dict(libc_func):
218    libc_dict = dict()
219    for item in libc_func:
220        libc_dict.update({item: set({item})})
221
222    return libc_dict
223
224
225def add_caller(callee, func_map):
226    caller = set()
227    for item in func_map:
228        if callee in item.func_callee:
229            caller.add(item.func_name)
230    return caller
231
232
233def update_libc_func_caller(caller_list, func_map):
234    caller_list_old_len = -1
235    caller_list_new_len = len(caller_list)
236    current_caller_list = caller_list
237
238    while caller_list_old_len != caller_list_new_len:
239        caller_list_old_len = caller_list_new_len
240        add_current_caller_list = set()
241        for callee in current_caller_list:
242            add_current_caller_list |= add_caller(callee, func_map)
243        current_caller_list = add_current_caller_list
244        caller_list |= add_current_caller_list
245        caller_list_new_len = len(caller_list)
246
247    return caller_list
248
249
250def generate_libc_func_map(libc_dict, func_map):
251    for key in libc_dict.keys():
252        update_libc_func_caller(libc_dict.get(key), func_map)
253
254
255def get_lib_func_to_other_func_maps(elf_path, libc_func_list):
256    libc_func = get_und_libc_func(elf_path, libc_func_list)
257    elf_asm_path = create_disassemble_file(elf_path, '.', '.text')
258    func_map = parse_text_asm_file(elf_asm_path)
259    libc_dict = generate_libc_dict(libc_func)
260    generate_libc_func_map(libc_dict, func_map)
261    return libc_dict
262
263
264def extract_libc_func(callee_und_func_name, libc_func_maps):
265    libc_func = set()
266    for func_name in callee_und_func_name:
267        for key in libc_func_maps.keys():
268            if func_name in libc_func_maps.get(key):
269                libc_func.add(key)
270    return libc_func
271
272
273def get_function_name_nr_table(src_syscall_path):
274    function_name_nr_table_dict = {}
275    for file_name in src_syscall_path:
276        file_name_tmp = file_name.split('/')[-1]
277        if not file_name_tmp.lower().startswith('libsyscall_to_nr_'):
278            continue
279        gen_policy.gen_syscall_nr_table(file_name, function_name_nr_table_dict)
280
281    return function_name_nr_table_dict
282
283
284def collect_concrete_syscall(args):
285    if args.target_cpu == 'arm64':
286        arch_str = 'aarch64-linux'
287    elif args.target_cpu == 'arm':
288        arch_str = 'arm-linux'
289    elif args.target_cpu == 'riscv64':
290        arch_str = 'riscv64-linux'
291    libc_path = get_lib_path(args.src_elf_path, 'libc.so', ' | grep ' + arch_str)
292    libc_asm_path = generate_libc_asm(args.target_cpu, libc_path, '.')
293
294    # get the map of libc function to syscall nr used by the function
295    libc_func_map = gen_libc.get_syscall_map(args.target_cpu, args.src_syscall_path, libc_asm_path)
296
297    libc_func_used = set()
298    # get libc function list
299    libc_func_list = [item.func_name for item in libc_func_map]
300
301    for elf_name in args.elf_name:
302        elf_name_path = get_lib_path(args.src_elf_path, elf_name, '')
303        # get libc function symbols used by the elf files
304        libc_func_used |= set(get_und_libc_func(elf_name_path, libc_func_list))
305    current_elf_name_list = args.elf_name
306    while len(current_elf_name_list) != 0:
307        for elf_name in current_elf_name_list:
308            elf_name_path = get_lib_path(args.src_elf_path, elf_name, '')
309            deps_elf_name_list = extract_elf_name(elf_name_path)
310            callee_und_func_name = get_und_func_except_libc(elf_name_path, libc_func_list)
311
312            for deps_elf_name in deps_elf_name_list:
313                deps_elf_path = get_lib_path(args.src_elf_path, deps_elf_name, '')
314                # get the direct caller and indirect caller of libc function
315                libc_func_maps = get_lib_func_to_other_func_maps(deps_elf_path, libc_func_list)
316                libc_func_used |= extract_libc_func(callee_und_func_name, libc_func_maps)
317        current_elf_name_list = deps_elf_name_list
318    syscall_nr_list = collect_syscall(libc_func_used, libc_func_map)
319
320    nr_to_func_dict = dict()
321    function_name_nr_table_dict = get_function_name_nr_table(args.src_syscall_path)
322    audit_policy.converse_fuction_name_nr(nr_to_func_dict, function_name_nr_table_dict)
323    content = get_item_content(args.target_cpu, syscall_nr_list, nr_to_func_dict)
324
325    audit_policy.gen_output_file(args.filter_name, content)
326    remove_disassemble_file('.')
327
328
329def main():
330    parser = argparse.ArgumentParser(
331      description='Generates a seccomp-bpf policy')
332    parser.add_argument('--src-elf-path', type=str,
333                        help='the drectory of the elf file')
334    parser.add_argument('--elf-name', action='append',
335                        help='path to syscall to nr files')
336    parser.add_argument('--src-syscall-path', type=str, action='append',
337                        help=('path to syscall to nr files\n'))
338    parser.add_argument('--target-cpu', type=str,
339                        help='input arm or arm64 or riscv64')
340    parser.add_argument('--filter-name', type=str,
341                        help=('consist of output file name\n'))
342
343    args = parser.parse_args()
344    if args.target_cpu not in gen_policy.supported_architecture:
345        raise ValueError("target_cpu must int {}".format(gen_policy.supported_architecture))
346
347    collect_concrete_syscall(args)
348
349
350if __name__ == '__main__':
351    sys.exit(main())
352