1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4# 5# Copyright (c) 2023 Huawei Device Co., Ltd. 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17# 18 19 20import os 21import sys 22import argparse 23import subprocess 24import stat 25import libc_static_analysis as gen_libc 26import audit_log_analysis as audit_policy 27import generate_code_from_policy as gen_policy 28 29 30#modified the path of objdump and readelf path 31def get_obj_dump_path(): 32 obj_dump_path = '' 33 return obj_dump_path 34 35 36def get_read_elf_path(): 37 read_elf_path = '' 38 return read_elf_path 39 40 41def create_needed_file(elf_path, locate_path, cmd, suffix): 42 if locate_path[-1] != '/': 43 locate_path = '{}/'.format(locate_path) 44 elf_file_name = elf_path.split('/')[-1].split('.')[0] + suffix 45 target_path = '{}{}'.format(locate_path, elf_file_name) 46 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC 47 modes = stat.S_IWUSR | stat.S_IRUSR | stat.S_IWGRP | stat.S_IRGRP 48 with os.fdopen(os.open(target_path, flags, modes), 'w') as output_file: 49 process = subprocess.Popen(cmd.split(' '), stdout=output_file) 50 process.communicate(timeout=3) 51 return target_path 52 53 54def generate_libc_asm(target_cpu, elf_path, locate_path): 55 if target_cpu == 'arm': 56 cmd_obj_dump = 'arm-linux-musleabi-objdump' 57 elif target_cpu == 'arm64': 58 cmd_obj_dump = 'aarch64-linux-musl-objdump' 59 elif target_cpu == 'riscv64': 60 cmd_obj_dump = 'riscv64-linux-musl-objdump' 61 else: 62 raise ValueError("target cpu error") 63 64 cmd = '{} -d {}'.format(cmd_obj_dump, elf_path) 65 return create_needed_file(elf_path, locate_path, cmd, '.asm') 66 67 68def get_lib_path(elf_path, elf_name, cmd_extra): 69 grep_unstrip = ' | grep unstripped | grep -v _x64 {}'.format(cmd_extra) 70 71 if elf_name == 'libc++.so': 72 grep_unstrip = '| grep aarch64-linux' 73 cmd = 'find {} -name {}{}'.format(elf_path, elf_name, grep_unstrip) 74 result_list = os.popen(cmd).read().split('\n') 75 result = result_list[0].strip() 76 for item in result_list: 77 item = item.strip() 78 if len(item) > len(result): 79 result = item 80 return result 81 82 83def extract_elf_name(elf_path): 84 cmd = '{} -d {} | grep \'Shared library\''.format(get_read_elf_path(), elf_path) 85 result = os.popen(cmd).read().strip() 86 elf_name = set() 87 for item in result.split('\n'): 88 name = item[item.find('[') + 1: item.find(']')] 89 if name == 'libc.so' or name == '': 90 continue 91 elf_name.add(name) 92 93 return elf_name 94 95 96def extract_undef_name(elf_path): 97 cmd = '{} -sW {} | grep UND'.format(get_read_elf_path(), elf_path) 98 result = os.popen(cmd).read().strip() 99 func_name = set() 100 for item in result.split('\n'): 101 name = item[item.find('UND') + 3:].strip() 102 if name != '': 103 func_name.add(name) 104 105 return func_name 106 107 108def collect_elf(elf_path, elf_name): 109 elf_list = set(elf_name) 110 elf_path_list = set() 111 current_elf_name = set(elf_name) 112 while current_elf_name: 113 elf_path_list_tmp = set() 114 elf_list |= current_elf_name 115 for lib_name in current_elf_name: 116 elf_path_list_tmp.add(get_lib_path(elf_path, lib_name, '')) 117 118 current_elf_name.clear() 119 for lib_path in elf_path_list_tmp: 120 current_elf_name |= extract_elf_name(lib_path) 121 elf_path_list |= elf_path_list_tmp 122 elf_path_list_tmp.clear() 123 124 return elf_path_list 125 126 127def collect_undef_func_name(elf_path_list): 128 func_name = set() 129 for elf_path in elf_path_list: 130 func_name |= extract_undef_name(elf_path) 131 132 return func_name 133 134 135def collect_syscall(undef_func_name, libc_func_map): 136 syscall_nr = set() 137 138 for libc_func in libc_func_map: 139 for func_name in undef_func_name: 140 if func_name == libc_func.func_name: 141 syscall_nr |= libc_func.nr 142 break 143 144 return syscall_nr 145 146 147def get_item_content(arch, nr_set, name_nr_table): 148 func_name_list = list() 149 for nr in sorted(list(nr_set)): 150 func_name_list.append(name_nr_table.get(arch).get(nr)) 151 content = '@allowList\n{};{}\n'.format(';{}\n'.format(arch).join(func_name_list), arch) 152 153 return content 154 155 156def get_und_func_except_libc(elf_path, libc_func_list): 157 und_func = extract_undef_name(elf_path) 158 und_func_except_libc = [item for item in und_func if item not in libc_func_list] 159 return und_func_except_libc 160 161 162def get_und_libc_func(elf_path, libc_func_list): 163 und_func = extract_undef_name(elf_path) 164 und_func_only_libc = [item for item in und_func if item in libc_func_list] 165 return und_func_only_libc 166 167 168def create_disassemble_file(elf_path, locate_path, section): 169 cmd = '{} -d --section={} {}'.format(get_obj_dump_path(), section, elf_path) 170 return create_needed_file(elf_path, locate_path, cmd, section + '.asm') 171 172 173def remove_disassemble_file(path): 174 file_list = [item for item in os.listdir(path) if item.endswith('.asm')] 175 subprocess.call(['rm'] + file_list) 176 177 178class FuncCallee: 179 def __init__(self, func_name): 180 self.func_name = func_name 181 self.func_callee = set() 182 183 def print_info(self): 184 print('{} call function {}'.format(self.func_name, self.func_callee)) 185 186 187def parse_line(fp): 188 func_list = [] 189 func_call_tmp = None 190 for line in fp: 191 if '>:' in line: 192 if func_call_tmp: 193 func_list.append(func_call_tmp) 194 func_call_tmp = FuncCallee(line[line.find('<') + 1: line.find('>:')]) 195 continue 196 197 if '<' in line and ">" in line: 198 func_name_callee = line[line.find('<') + 1: line.find('>')] 199 if '@plt' in func_name_callee: 200 func_call_tmp.func_callee.add(func_name_callee[:func_name_callee.find('@plt')]) 201 elif '+0x' in func_name_callee: 202 continue 203 else: 204 func_call_tmp.func_callee.add(func_name_callee) 205 func_list.append(func_call_tmp) 206 return func_list 207 208 209def parse_text_asm_file(elf_asm_path): 210 func_list = list() 211 with open(elf_asm_path) as fp: 212 func_list = parse_line(fp) 213 214 return func_list 215 216 217def generate_libc_dict(libc_func): 218 libc_dict = dict() 219 for item in libc_func: 220 libc_dict.update({item: set({item})}) 221 222 return libc_dict 223 224 225def add_caller(callee, func_map): 226 caller = set() 227 for item in func_map: 228 if callee in item.func_callee: 229 caller.add(item.func_name) 230 return caller 231 232 233def update_libc_func_caller(caller_list, func_map): 234 caller_list_old_len = -1 235 caller_list_new_len = len(caller_list) 236 current_caller_list = caller_list 237 238 while caller_list_old_len != caller_list_new_len: 239 caller_list_old_len = caller_list_new_len 240 add_current_caller_list = set() 241 for callee in current_caller_list: 242 add_current_caller_list |= add_caller(callee, func_map) 243 current_caller_list = add_current_caller_list 244 caller_list |= add_current_caller_list 245 caller_list_new_len = len(caller_list) 246 247 return caller_list 248 249 250def generate_libc_func_map(libc_dict, func_map): 251 for key in libc_dict.keys(): 252 update_libc_func_caller(libc_dict.get(key), func_map) 253 254 255def get_lib_func_to_other_func_maps(elf_path, libc_func_list): 256 libc_func = get_und_libc_func(elf_path, libc_func_list) 257 elf_asm_path = create_disassemble_file(elf_path, '.', '.text') 258 func_map = parse_text_asm_file(elf_asm_path) 259 libc_dict = generate_libc_dict(libc_func) 260 generate_libc_func_map(libc_dict, func_map) 261 return libc_dict 262 263 264def extract_libc_func(callee_und_func_name, libc_func_maps): 265 libc_func = set() 266 for func_name in callee_und_func_name: 267 for key in libc_func_maps.keys(): 268 if func_name in libc_func_maps.get(key): 269 libc_func.add(key) 270 return libc_func 271 272 273def get_function_name_nr_table(src_syscall_path): 274 function_name_nr_table_dict = {} 275 for file_name in src_syscall_path: 276 file_name_tmp = file_name.split('/')[-1] 277 if not file_name_tmp.lower().startswith('libsyscall_to_nr_'): 278 continue 279 gen_policy.gen_syscall_nr_table(file_name, function_name_nr_table_dict) 280 281 return function_name_nr_table_dict 282 283 284def collect_concrete_syscall(args): 285 if args.target_cpu == 'arm64': 286 arch_str = 'aarch64-linux' 287 elif args.target_cpu == 'arm': 288 arch_str = 'arm-linux' 289 elif args.target_cpu == 'riscv64': 290 arch_str = 'riscv64-linux' 291 libc_path = get_lib_path(args.src_elf_path, 'libc.so', ' | grep ' + arch_str) 292 libc_asm_path = generate_libc_asm(args.target_cpu, libc_path, '.') 293 294 # get the map of libc function to syscall nr used by the function 295 libc_func_map = gen_libc.get_syscall_map(args.target_cpu, args.src_syscall_path, libc_asm_path) 296 297 libc_func_used = set() 298 # get libc function list 299 libc_func_list = [item.func_name for item in libc_func_map] 300 301 for elf_name in args.elf_name: 302 elf_name_path = get_lib_path(args.src_elf_path, elf_name, '') 303 # get libc function symbols used by the elf files 304 libc_func_used |= set(get_und_libc_func(elf_name_path, libc_func_list)) 305 current_elf_name_list = args.elf_name 306 while len(current_elf_name_list) != 0: 307 for elf_name in current_elf_name_list: 308 elf_name_path = get_lib_path(args.src_elf_path, elf_name, '') 309 deps_elf_name_list = extract_elf_name(elf_name_path) 310 callee_und_func_name = get_und_func_except_libc(elf_name_path, libc_func_list) 311 312 for deps_elf_name in deps_elf_name_list: 313 deps_elf_path = get_lib_path(args.src_elf_path, deps_elf_name, '') 314 # get the direct caller and indirect caller of libc function 315 libc_func_maps = get_lib_func_to_other_func_maps(deps_elf_path, libc_func_list) 316 libc_func_used |= extract_libc_func(callee_und_func_name, libc_func_maps) 317 current_elf_name_list = deps_elf_name_list 318 syscall_nr_list = collect_syscall(libc_func_used, libc_func_map) 319 320 nr_to_func_dict = dict() 321 function_name_nr_table_dict = get_function_name_nr_table(args.src_syscall_path) 322 audit_policy.converse_fuction_name_nr(nr_to_func_dict, function_name_nr_table_dict) 323 content = get_item_content(args.target_cpu, syscall_nr_list, nr_to_func_dict) 324 325 audit_policy.gen_output_file(args.filter_name, content) 326 remove_disassemble_file('.') 327 328 329def main(): 330 parser = argparse.ArgumentParser( 331 description='Generates a seccomp-bpf policy') 332 parser.add_argument('--src-elf-path', type=str, 333 help='the drectory of the elf file') 334 parser.add_argument('--elf-name', action='append', 335 help='path to syscall to nr files') 336 parser.add_argument('--src-syscall-path', type=str, action='append', 337 help=('path to syscall to nr files\n')) 338 parser.add_argument('--target-cpu', type=str, 339 help='input arm or arm64 or riscv64') 340 parser.add_argument('--filter-name', type=str, 341 help=('consist of output file name\n')) 342 343 args = parser.parse_args() 344 if args.target_cpu not in gen_policy.supported_architecture: 345 raise ValueError("target_cpu must int {}".format(gen_policy.supported_architecture)) 346 347 collect_concrete_syscall(args) 348 349 350if __name__ == '__main__': 351 sys.exit(main()) 352