1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "native_leak_detector.h"
16
17 #include <cinttypes>
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23
24 #include "fault_common_base.h"
25 #include "fault_detector_base.h"
26 #include "fault_detector_util.h"
27 #include "fault_info_base.h"
28 #include "fault_state_base.h"
29 #include "ffrt.h"
30 #include "hiview_logger.h"
31 #include "native_leak_config.h"
32 #include "native_leak_info.h"
33 #include "native_leak_state.h"
34 #include "native_leak_state_context.h"
35 #include "native_leak_util.h"
36
37 namespace OHOS {
38 namespace HiviewDFX {
39 DEFINE_LOG_TAG("NativeLeakDetector");
40
41 using std::string;
42 using std::vector;
43 using std::shared_ptr;
44 using std::make_shared;
45 using std::static_pointer_cast;
46 using std::make_pair;
47
48 namespace {
49 constexpr time_t PROCESSED_UPLOAD_INTERVAL = 18000; // 5 hours
50 static constexpr uint32_t UPDATE_INTERVAL = 400;
51 static constexpr uint32_t SAMPLE_INTERVAL = 200; // 200s
52 static constexpr uint32_t TEST_UPDATE_INTERVAL = 50;
53 static constexpr uint32_t TEST_SAMPLE_INTERVAL = 5;
54 static constexpr uint32_t NATIVE_MAX_MONITOR_NUMS = 4;
55 }
56
NativeLeakDetector()57 NativeLeakDetector::NativeLeakDetector() {};
58
~NativeLeakDetector()59 NativeLeakDetector::~NativeLeakDetector() {};
60
PrepareNativeLeakEnv()61 void NativeLeakDetector::PrepareNativeLeakEnv()
62 {
63 HIVIEW_LOGI("NativeLeakDetector PrepareNativeLeakEnv()");
64 NativeLeakConfigParse();
65 InitMonitorInfo();
66 }
67
NativeLeakConfigParse()68 void NativeLeakDetector::NativeLeakConfigParse()
69 {
70 NativeLeakConfig::GetThresholdList(thresholdLists_);
71 HIVIEW_LOGI("thresholdLists size %{public}zu", thresholdLists_.size());
72 sampleInterval_ = SAMPLE_INTERVAL;
73 updateInterval_ = UPDATE_INTERVAL;
74 // set default threshold
75 auto it = thresholdLists_.find("DEFAULT");
76 if (it != thresholdLists_.end()) {
77 defauleThreshold_ = it->second;
78 } else {
79 defauleThreshold_ = DEFAULT_THRESHOLD;
80 }
81 if (FaultDetectorUtil::IsMemTestEnable()) {
82 sampleInterval_ = TEST_SAMPLE_INTERVAL;
83 updateInterval_ = TEST_UPDATE_INTERVAL;
84 }
85 }
86
InitMonitorInfo()87 void NativeLeakDetector::InitMonitorInfo()
88 {
89 UpdateUserMonitorInfo();
90 RecordNativeInfo();
91 }
92
UpdateUserMonitorInfo()93 void NativeLeakDetector::UpdateUserMonitorInfo()
94 {
95 UpdateProcessedPidsList();
96 vector<int> pids = FaultDetectorUtil::GetAllPids();
97
98 for (auto pid : pids) {
99 if (FaultDetectorUtil::IsKernelProcess(pid)) {
100 continue;
101 }
102 string name = FaultDetectorUtil::GetProcessName(pid);
103 if (processedPids_.find(name) != processedPids_.end()) {
104 continue;
105 }
106 if (grayList_.find(pid) != grayList_.end()) {
107 continue;
108 }
109 if (monitoredPidsList_.find(pid) != monitoredPidsList_.end()) {
110 continue;
111 }
112 uint64_t threshold = 0;
113 bool isInThresholdList = false;
114 auto thresholdItem = thresholdLists_.find(name); // rename thresholdItem
115 if (thresholdItem != thresholdLists_.end()) {
116 threshold = thresholdItem->second;
117 isInThresholdList = true;
118 } else {
119 threshold = defauleThreshold_;
120 }
121 uint64_t rssThreshold = NativeLeakUtil::GetRSSMemoryThreshold(threshold);
122 uint64_t rssNum = FaultDetectorUtil::GetProcessRss(pid);
123 if (rssNum > rssThreshold) {
124 shared_ptr<FaultInfoBase> monitorInfo = make_shared<NativeLeakInfo>();
125
126 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
127 userMonitorInfo->SetPid(pid);
128 userMonitorInfo->SetProcessName(name);
129 userMonitorInfo->SetPidStartTime(FaultDetectorUtil::GetProcessStartTime(pid));
130 userMonitorInfo->SetDebugStartTime(FaultDetectorUtil::GetRunningMonotonicTime());
131 userMonitorInfo->SetMemoryLimit(threshold);
132 userMonitorInfo->SetActualRssThreshold(rssNum);
133 userMonitorInfo->SetInThresholdList(isInThresholdList);
134 grayList_.insert(make_pair(pid, monitorInfo));
135 }
136 }
137 }
138
RemoveInvalidLeakedPid()139 void NativeLeakDetector::RemoveInvalidLeakedPid()
140 {
141 for (auto it = monitoredPidsInfo_.begin(); it != monitoredPidsInfo_.end(); it++) {
142 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
143 pid_t pid = userMonitorInfo->GetPid();
144 if (userMonitorInfo->GetIsProcessDied()) {
145 continue;
146 }
147 time_t startTime = FaultDetectorUtil::GetProcessStartTime(pid);
148 if (startTime == -1 || userMonitorInfo->GetPidStartTime() != startTime) {
149 NativeLeakUtil::RemoveInvalidFile(it->second);
150 userMonitorInfo->SetIsProcessDied(true);
151 HIVIEW_LOGW("process: %{public}s already died, set state.", userMonitorInfo->GetProcessName().c_str());
152 continue;
153 }
154 }
155 }
156
RemoveInvalidUserInfo()157 void NativeLeakDetector::RemoveInvalidUserInfo()
158 {
159 RemoveInvalidLeakedPid();
160 for (auto it = grayList_.begin(); it != grayList_.end();) {
161 if (it->second == nullptr) {
162 it = grayList_.erase(it);
163 continue;
164 }
165 pid_t pid = it->second->GetPid();
166 time_t startTime = FaultDetectorUtil::GetProcessStartTime(pid);
167 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
168 if (startTime == -1 || userMonitorInfo->GetPidStartTime() != startTime) {
169 NativeLeakUtil::RemoveInvalidFile(it->second);
170 HIVIEW_LOGI("id %{public}d, process %{public}s already died, remove it from grayList_.",
171 pid, it->second->GetProcessName().c_str());
172 time_t now = time(nullptr);
173 if (now == (time_t)(-1)) {
174 now = (time_t)(0);
175 }
176 processedPids_.insert(make_pair(it->second->GetProcessName(), now));
177 it = grayList_.erase(it);
178 continue;
179 }
180 it++;
181 }
182 }
183
RecordNativeInfo()184 void NativeLeakDetector::RecordNativeInfo()
185 {
186 FaultStateBase* baseStateObj = NativeLeakStateContext::GetInstance().GetStateObj(PROC_DUMP_STATE);
187 auto dumpStateObj = static_cast<NativeLeakDumpState*>(baseStateObj);
188 for (auto it = grayList_.begin(); it != grayList_.end(); it++) {
189 if (it->second == nullptr) {
190 continue;
191 }
192 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
193 dumpStateObj->DumpUserMemInfo(userMonitorInfo);
194 }
195 }
196
UpdateProcessedPidsList()197 void NativeLeakDetector::UpdateProcessedPidsList()
198 {
199 for (auto it = processedPids_.begin(); it != processedPids_.end();) {
200 int64_t now = time(nullptr);
201 if (now > it->second + PROCESSED_UPLOAD_INTERVAL) {
202 it = processedPids_.erase(it);
203 } else {
204 it++;
205 }
206 }
207 }
208
RemoveFinishedInfo(int64_t pid)209 void NativeLeakDetector::RemoveFinishedInfo(int64_t pid)
210 {
211 auto &info = monitoredPidsInfo_;
212 auto &list = monitoredPidsList_;
213 HIVIEW_LOGD("Before RemoveFinishedInfo, info size %{public}zu, list size %{public}zu", info.size(), list.size());
214 nativeDetectorMtx_.lock();
215 info.erase(pid);
216 list.erase(pid);
217 nativeDetectorMtx_.unlock();
218 HIVIEW_LOGD("After RemoveFinishedInfo, info size %{public}zu, list size %{public}zu", info.size(), list.size());
219 }
220
MainProcess()221 ErrCode NativeLeakDetector::MainProcess()
222 {
223 ++loopCnt_;
224 ++funcLoopCnt_;
225 if (loopCnt_ < sampleInterval_ / TASK_LOOP_INTERVAL) {
226 return SUCCESSED;
227 }
228 loopCnt_ = 0;
229 nativeDetectorMtx_.lock();
230 HIVIEW_LOGI("=== sample native memory status ===");
231 RemoveInvalidUserInfo();
232 // Sample and Judge
233 DoProcessNativeLeak();
234 if (funcLoopCnt_ > updateInterval_ / TASK_LOOP_INTERVAL) {
235 UpdateUserMonitorInfo();
236 RecordNativeInfo();
237 funcLoopCnt_ = 0;
238 }
239
240 // state change: Collect->Report->Control
241 for (auto &it : monitoredPidsInfo_) {
242 if (it.second == nullptr) {
243 continue;
244 }
245 // if is monitored or not leaked, ignore it
246 if (it.second->IsMonitoredStat() || it.second->GetState() != PROC_JUDGE_STATE) {
247 continue;
248 }
249 it.second->SetIsMonitoredStat(true);
250 // one pid info, one task
251 ffrt::submit([&] {
252 ExeNextStateProcess(it.second, PROC_DUMP_STATE);
253 RemoveFinishedInfo(it.second->GetPid());
254 }, {}, {});
255 }
256 nativeDetectorMtx_.unlock();
257 return SUCCESSED;
258 }
259
DoProcessNativeLeak()260 void NativeLeakDetector::DoProcessNativeLeak()
261 {
262 UpdateProcessedPidsList();
263
264 for (auto it = grayList_.begin(); it != grayList_.end();) {
265 if (monitoredPidsInfo_.size() >= NATIVE_MAX_MONITOR_NUMS) {
266 HIVIEW_LOGW("monitoring process is too many, drop it, now is %{public}zu", monitoredPidsInfo_.size());
267 break;
268 }
269 if (it->second == nullptr) {
270 it = grayList_.erase(it);
271 HIVIEW_LOGE("monitor info is nullptr in grayList_, please check move it");
272 continue;
273 }
274 auto it2 = processedPids_.find(it->second->GetProcessName());
275 if (it2 != processedPids_.end()) {
276 it++;
277 continue;
278 }
279 auto it3 = monitoredPidsList_.find(it->first);
280 if (it3 != monitoredPidsList_.end()) {
281 it++;
282 continue;
283 }
284 ErrCode ret = GetStateObj(PROC_SAMPLE_STATE)->StateProcess(it->second, *this);
285 if (JudgeNativeLeak(it->second)) {
286 AddMonitorToList(it->second);
287 it = grayList_.erase(it);
288 continue;
289 }
290 it++;
291 }
292 }
293
JudgeNativeLeak(shared_ptr<FaultInfoBase> & monitorInfo)294 bool NativeLeakDetector::JudgeNativeLeak(shared_ptr<FaultInfoBase> &monitorInfo)
295 {
296 ErrCode ret = GetStateObj(PROC_JUDGE_STATE)->StateProcess(monitorInfo, *this);
297 if (ret) {
298 return false;
299 }
300 return true;
301 }
302
AddMonitorToList(shared_ptr<FaultInfoBase> & monitorInfo)303 void NativeLeakDetector::AddMonitorToList(shared_ptr<FaultInfoBase> &monitorInfo)
304 {
305 int32_t pid = monitorInfo->GetPid();
306 HIVIEW_LOGI("monitor process: %{public}s memory leaked", monitorInfo->GetProcessName().c_str());
307 monitorInfo->RecordLeakedTime();
308 monitorInfo->SetHapVersion(FaultDetectorUtil::GetApplicationVersion(pid));
309 monitorInfo->SetState(PROC_JUDGE_STATE); // set PROC_JUDGE_STATE if leaked
310 monitoredPidsList_.insert(make_pair(pid, monitorInfo->GetLeakedTime()));
311 monitoredPidsInfo_.insert(make_pair(pid, monitorInfo));
312 }
313
ExeNextStateProcess(shared_ptr<FaultInfoBase> monitorInfo,FaultStateType stateType)314 ErrCode NativeLeakDetector::ExeNextStateProcess(shared_ptr<FaultInfoBase> monitorInfo, FaultStateType stateType)
315 {
316 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
317 const string &name = userMonitorInfo->GetProcessName();
318 HIVIEW_LOGI("pid: %{public}d, process: %{public}s, current state: %{public}d, next state: %{public}d",
319 userMonitorInfo->GetPid(), name.c_str(), userMonitorInfo->GetState(), stateType);
320 if (userMonitorInfo->GetIsProcessDied()) {
321 HIVIEW_LOGE("pid: %{public}d already died", userMonitorInfo->GetPid());
322 return FAILURE;
323 }
324
325 nativeDetectorMtx_.lock();
326 if (userMonitorInfo->GetState() == PROC_REPORT_STATE) {
327 time_t now = time(nullptr);
328 if (now == static_cast<time_t>(-1)) {
329 now = static_cast<time_t>(0);
330 }
331 processedPids_.insert(make_pair(name, now));
332 }
333 nativeDetectorMtx_.unlock();
334
335 if (stateType == PROC_FINISHED_STATE) {
336 HIVIEW_LOGI("next stateType is PROC_FINISHED_STATE");
337 return SUCCESSED;
338 }
339 OnChangeState(monitorInfo, stateType);
340 ErrCode ret = GetStateObj(stateType)->StateProcess(monitorInfo, *this);
341 if (ret) {
342 HIVIEW_LOGE("exe %{public}s state process failed, ret is %{public}d",
343 FaultStateName[userMonitorInfo->GetState()].c_str(), ret);
344 return FAILURE;
345 }
346 return SUCCESSED;
347 }
348
GetStateObj(FaultStateType stateType)349 FaultStateBase* NativeLeakDetector::GetStateObj(FaultStateType stateType)
350 {
351 FaultStateBase* stateObj = NativeLeakStateContext::GetInstance().GetStateObj(stateType);
352 if (stateObj == nullptr) {
353 HIVIEW_LOGE("%{public}s state obj is null, stateType(%{public}d)",
354 FaultStateName[stateType].c_str(), stateType);
355 return nullptr;
356 }
357 return stateObj;
358 }
359
OnChangeState(shared_ptr<FaultInfoBase> & monitorInfo,FaultStateType stateType)360 void NativeLeakDetector::OnChangeState(shared_ptr<FaultInfoBase> &monitorInfo, FaultStateType stateType)
361 {
362 if (monitorInfo->GetState() == stateType) {
363 HIVIEW_LOGE("pid: %{public}d have been %{public}s", monitorInfo->GetPid(), FaultStateName[stateType].c_str());
364 return;
365 }
366 HIVIEW_LOGI("Change state from %{public}s to %{public}s",
367 FaultStateName[monitorInfo->GetState()].c_str(), FaultStateName[stateType].c_str());
368 monitorInfo->SetState(stateType);
369 }
370
ProcessUserEvent(const string & name,const string & msg,uint32_t pid)371 void NativeLeakDetector::ProcessUserEvent(const string &name, const string &msg, uint32_t pid)
372 {
373 string fullName = FaultDetectorUtil::GetProcessName(pid);
374 if (fullName.find(name) == string::npos) {
375 HIVIEW_LOGE("Invalid name: %{public}s, pid: %{public}d, Realname is %{public}s",
376 name.c_str(), pid, fullName.c_str());
377 return;
378 }
379 if (monitoredPidsList_.find(pid) != monitoredPidsList_.end()) {
380 HIVIEW_LOGE("%{public}s already leaked", name.c_str());
381 return;
382 }
383 auto it = processedPids_.find(fullName);
384 if (it != processedPids_.end()) {
385 HIVIEW_LOGE("%{public}s processed", it->first.c_str());
386 return;
387 }
388 // is in gray list
389 for (auto it2 = grayList_.begin(); it2 != grayList_.end();) {
390 if (it2->second == nullptr) {
391 it2 = grayList_.erase(it2);
392 HIVIEW_LOGE("monitor in grayList_ is null");
393 continue;
394 }
395 if (it2->second->GetProcessName() == name) {
396 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it2->second);
397 userMonitorInfo->SetEventMsg(msg);
398 nativeDetectorMtx_.lock();
399 AddMonitorToList(it2->second);
400 nativeDetectorMtx_.unlock();
401 grayList_.erase(it2);
402 return;
403 }
404 it2++;
405 }
406
407 // not in gray list
408 shared_ptr<FaultInfoBase> monitorInfo = make_shared<NativeLeakInfo>();
409 auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
410 if (userMonitorInfo == nullptr) {
411 HIVIEW_LOGE("failed to create %{public}d memory_leak_info", pid);
412 return;
413 }
414 HIVIEW_LOGI("start monitor pid: %{public}d, name: %{public}s", pid, fullName.c_str());
415 userMonitorInfo->SetPid(pid);
416 userMonitorInfo->SetProcessName(fullName);
417 userMonitorInfo->SetPidStartTime(FaultDetectorUtil::GetProcessStartTime(pid));
418 userMonitorInfo->SetEventMsg(msg);
419 nativeDetectorMtx_.lock();
420 AddMonitorToList(monitorInfo);
421 nativeDetectorMtx_.unlock();
422 }
423 } // namespace HiviewDFX
424 } // namespace OHOS
425