1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "native_leak_detector.h"
16 
17 #include <cinttypes>
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23 
24 #include "fault_common_base.h"
25 #include "fault_detector_base.h"
26 #include "fault_detector_util.h"
27 #include "fault_info_base.h"
28 #include "fault_state_base.h"
29 #include "ffrt.h"
30 #include "hiview_logger.h"
31 #include "native_leak_config.h"
32 #include "native_leak_info.h"
33 #include "native_leak_state.h"
34 #include "native_leak_state_context.h"
35 #include "native_leak_util.h"
36 
37 namespace OHOS {
38 namespace HiviewDFX {
39 DEFINE_LOG_TAG("NativeLeakDetector");
40 
41 using std::string;
42 using std::vector;
43 using std::shared_ptr;
44 using std::make_shared;
45 using std::static_pointer_cast;
46 using std::make_pair;
47 
48 namespace {
49 constexpr time_t PROCESSED_UPLOAD_INTERVAL = 18000;  // 5 hours
50 static constexpr uint32_t UPDATE_INTERVAL = 400;
51 static constexpr uint32_t SAMPLE_INTERVAL = 200;     // 200s
52 static constexpr uint32_t TEST_UPDATE_INTERVAL = 50;
53 static constexpr uint32_t TEST_SAMPLE_INTERVAL = 5;
54 static constexpr uint32_t NATIVE_MAX_MONITOR_NUMS = 4;
55 }
56 
NativeLeakDetector()57 NativeLeakDetector::NativeLeakDetector() {};
58 
~NativeLeakDetector()59 NativeLeakDetector::~NativeLeakDetector() {};
60 
PrepareNativeLeakEnv()61 void NativeLeakDetector::PrepareNativeLeakEnv()
62 {
63     HIVIEW_LOGI("NativeLeakDetector PrepareNativeLeakEnv()");
64     NativeLeakConfigParse();
65     InitMonitorInfo();
66 }
67 
NativeLeakConfigParse()68 void NativeLeakDetector::NativeLeakConfigParse()
69 {
70     NativeLeakConfig::GetThresholdList(thresholdLists_);
71     HIVIEW_LOGI("thresholdLists size %{public}zu", thresholdLists_.size());
72     sampleInterval_ = SAMPLE_INTERVAL;
73     updateInterval_ = UPDATE_INTERVAL;
74     // set default threshold
75     auto it = thresholdLists_.find("DEFAULT");
76     if (it != thresholdLists_.end()) {
77         defauleThreshold_ = it->second;
78     } else {
79         defauleThreshold_ = DEFAULT_THRESHOLD;
80     }
81     if (FaultDetectorUtil::IsMemTestEnable()) {
82         sampleInterval_ = TEST_SAMPLE_INTERVAL;
83         updateInterval_ = TEST_UPDATE_INTERVAL;
84     }
85 }
86 
InitMonitorInfo()87 void NativeLeakDetector::InitMonitorInfo()
88 {
89     UpdateUserMonitorInfo();
90     RecordNativeInfo();
91 }
92 
UpdateUserMonitorInfo()93 void NativeLeakDetector::UpdateUserMonitorInfo()
94 {
95     UpdateProcessedPidsList();
96     vector<int> pids = FaultDetectorUtil::GetAllPids();
97 
98     for (auto pid : pids) {
99         if (FaultDetectorUtil::IsKernelProcess(pid)) {
100             continue;
101         }
102         string name = FaultDetectorUtil::GetProcessName(pid);
103         if (processedPids_.find(name) != processedPids_.end()) {
104             continue;
105         }
106         if (grayList_.find(pid) != grayList_.end()) {
107             continue;
108         }
109         if (monitoredPidsList_.find(pid) != monitoredPidsList_.end()) {
110             continue;
111         }
112         uint64_t threshold = 0;
113         bool isInThresholdList = false;
114         auto thresholdItem = thresholdLists_.find(name); // rename thresholdItem
115         if (thresholdItem != thresholdLists_.end()) {
116             threshold = thresholdItem->second;
117             isInThresholdList = true;
118         } else {
119             threshold = defauleThreshold_;
120         }
121         uint64_t rssThreshold = NativeLeakUtil::GetRSSMemoryThreshold(threshold);
122         uint64_t rssNum = FaultDetectorUtil::GetProcessRss(pid);
123         if (rssNum > rssThreshold) {
124             shared_ptr<FaultInfoBase> monitorInfo = make_shared<NativeLeakInfo>();
125 
126             auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
127             userMonitorInfo->SetPid(pid);
128             userMonitorInfo->SetProcessName(name);
129             userMonitorInfo->SetPidStartTime(FaultDetectorUtil::GetProcessStartTime(pid));
130             userMonitorInfo->SetDebugStartTime(FaultDetectorUtil::GetRunningMonotonicTime());
131             userMonitorInfo->SetMemoryLimit(threshold);
132             userMonitorInfo->SetActualRssThreshold(rssNum);
133             userMonitorInfo->SetInThresholdList(isInThresholdList);
134             grayList_.insert(make_pair(pid, monitorInfo));
135         }
136     }
137 }
138 
RemoveInvalidLeakedPid()139 void NativeLeakDetector::RemoveInvalidLeakedPid()
140 {
141     for (auto it = monitoredPidsInfo_.begin(); it != monitoredPidsInfo_.end(); it++) {
142         auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
143         pid_t pid = userMonitorInfo->GetPid();
144         if (userMonitorInfo->GetIsProcessDied()) {
145             continue;
146         }
147         time_t startTime = FaultDetectorUtil::GetProcessStartTime(pid);
148         if (startTime == -1 || userMonitorInfo->GetPidStartTime() != startTime) {
149             NativeLeakUtil::RemoveInvalidFile(it->second);
150             userMonitorInfo->SetIsProcessDied(true);
151             HIVIEW_LOGW("process: %{public}s already died, set state.", userMonitorInfo->GetProcessName().c_str());
152             continue;
153         }
154     }
155 }
156 
RemoveInvalidUserInfo()157 void NativeLeakDetector::RemoveInvalidUserInfo()
158 {
159     RemoveInvalidLeakedPid();
160     for (auto it = grayList_.begin(); it != grayList_.end();) {
161         if (it->second == nullptr) {
162             it = grayList_.erase(it);
163             continue;
164         }
165         pid_t pid = it->second->GetPid();
166         time_t startTime = FaultDetectorUtil::GetProcessStartTime(pid);
167         auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
168         if (startTime == -1 || userMonitorInfo->GetPidStartTime() != startTime) {
169             NativeLeakUtil::RemoveInvalidFile(it->second);
170             HIVIEW_LOGI("id %{public}d, process %{public}s already died, remove it from grayList_.",
171                 pid, it->second->GetProcessName().c_str());
172             time_t now = time(nullptr);
173             if (now == (time_t)(-1)) {
174                 now = (time_t)(0);
175             }
176             processedPids_.insert(make_pair(it->second->GetProcessName(), now));
177             it = grayList_.erase(it);
178             continue;
179         }
180         it++;
181     }
182 }
183 
RecordNativeInfo()184 void NativeLeakDetector::RecordNativeInfo()
185 {
186     FaultStateBase* baseStateObj = NativeLeakStateContext::GetInstance().GetStateObj(PROC_DUMP_STATE);
187     auto dumpStateObj = static_cast<NativeLeakDumpState*>(baseStateObj);
188     for (auto it = grayList_.begin(); it != grayList_.end(); it++) {
189         if (it->second == nullptr) {
190             continue;
191         }
192         auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it->second);
193         dumpStateObj->DumpUserMemInfo(userMonitorInfo);
194     }
195 }
196 
UpdateProcessedPidsList()197 void NativeLeakDetector::UpdateProcessedPidsList()
198 {
199     for (auto it = processedPids_.begin(); it != processedPids_.end();) {
200         int64_t now = time(nullptr);
201         if (now > it->second + PROCESSED_UPLOAD_INTERVAL) {
202             it = processedPids_.erase(it);
203         } else {
204             it++;
205         }
206     }
207 }
208 
RemoveFinishedInfo(int64_t pid)209 void NativeLeakDetector::RemoveFinishedInfo(int64_t pid)
210 {
211     auto &info = monitoredPidsInfo_;
212     auto &list = monitoredPidsList_;
213     HIVIEW_LOGD("Before RemoveFinishedInfo, info size %{public}zu, list size %{public}zu", info.size(), list.size());
214     nativeDetectorMtx_.lock();
215     info.erase(pid);
216     list.erase(pid);
217     nativeDetectorMtx_.unlock();
218     HIVIEW_LOGD("After RemoveFinishedInfo, info size %{public}zu, list size %{public}zu", info.size(), list.size());
219 }
220 
MainProcess()221 ErrCode NativeLeakDetector::MainProcess()
222 {
223     ++loopCnt_;
224     ++funcLoopCnt_;
225     if (loopCnt_ < sampleInterval_ / TASK_LOOP_INTERVAL) {
226         return SUCCESSED;
227     }
228     loopCnt_ = 0;
229     nativeDetectorMtx_.lock();
230     HIVIEW_LOGI("=== sample native memory status ===");
231     RemoveInvalidUserInfo();
232     // Sample and Judge
233     DoProcessNativeLeak();
234     if (funcLoopCnt_ > updateInterval_ / TASK_LOOP_INTERVAL) {
235         UpdateUserMonitorInfo();
236         RecordNativeInfo();
237         funcLoopCnt_ = 0;
238     }
239 
240     // state change: Collect->Report->Control
241     for (auto &it : monitoredPidsInfo_) {
242         if (it.second == nullptr) {
243             continue;
244         }
245         // if is monitored or not leaked, ignore it
246         if (it.second->IsMonitoredStat() || it.second->GetState() != PROC_JUDGE_STATE) {
247             continue;
248         }
249         it.second->SetIsMonitoredStat(true);
250         // one pid info, one task
251         ffrt::submit([&] {
252             ExeNextStateProcess(it.second, PROC_DUMP_STATE);
253             RemoveFinishedInfo(it.second->GetPid());
254             }, {}, {});
255     }
256     nativeDetectorMtx_.unlock();
257     return SUCCESSED;
258 }
259 
DoProcessNativeLeak()260 void NativeLeakDetector::DoProcessNativeLeak()
261 {
262     UpdateProcessedPidsList();
263 
264     for (auto it = grayList_.begin(); it != grayList_.end();) {
265         if (monitoredPidsInfo_.size() >= NATIVE_MAX_MONITOR_NUMS) {
266             HIVIEW_LOGW("monitoring process is too many, drop it, now is %{public}zu", monitoredPidsInfo_.size());
267             break;
268         }
269         if (it->second == nullptr) {
270             it = grayList_.erase(it);
271             HIVIEW_LOGE("monitor info is nullptr in grayList_, please check move it");
272             continue;
273         }
274         auto it2 = processedPids_.find(it->second->GetProcessName());
275         if (it2 != processedPids_.end()) {
276             it++;
277             continue;
278         }
279         auto it3 = monitoredPidsList_.find(it->first);
280         if (it3 != monitoredPidsList_.end()) {
281             it++;
282             continue;
283         }
284         ErrCode ret = GetStateObj(PROC_SAMPLE_STATE)->StateProcess(it->second, *this);
285         if (JudgeNativeLeak(it->second)) {
286             AddMonitorToList(it->second);
287             it = grayList_.erase(it);
288             continue;
289         }
290         it++;
291     }
292 }
293 
JudgeNativeLeak(shared_ptr<FaultInfoBase> & monitorInfo)294 bool NativeLeakDetector::JudgeNativeLeak(shared_ptr<FaultInfoBase> &monitorInfo)
295 {
296     ErrCode ret = GetStateObj(PROC_JUDGE_STATE)->StateProcess(monitorInfo, *this);
297     if (ret) {
298         return false;
299     }
300     return true;
301 }
302 
AddMonitorToList(shared_ptr<FaultInfoBase> & monitorInfo)303 void NativeLeakDetector::AddMonitorToList(shared_ptr<FaultInfoBase> &monitorInfo)
304 {
305     int32_t pid = monitorInfo->GetPid();
306     HIVIEW_LOGI("monitor process: %{public}s memory leaked", monitorInfo->GetProcessName().c_str());
307     monitorInfo->RecordLeakedTime();
308     monitorInfo->SetHapVersion(FaultDetectorUtil::GetApplicationVersion(pid));
309     monitorInfo->SetState(PROC_JUDGE_STATE); // set PROC_JUDGE_STATE if leaked
310     monitoredPidsList_.insert(make_pair(pid, monitorInfo->GetLeakedTime()));
311     monitoredPidsInfo_.insert(make_pair(pid, monitorInfo));
312 }
313 
ExeNextStateProcess(shared_ptr<FaultInfoBase> monitorInfo,FaultStateType stateType)314 ErrCode NativeLeakDetector::ExeNextStateProcess(shared_ptr<FaultInfoBase> monitorInfo, FaultStateType stateType)
315 {
316     auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
317     const string &name = userMonitorInfo->GetProcessName();
318     HIVIEW_LOGI("pid: %{public}d, process: %{public}s, current state: %{public}d, next state: %{public}d",
319         userMonitorInfo->GetPid(), name.c_str(), userMonitorInfo->GetState(), stateType);
320     if (userMonitorInfo->GetIsProcessDied()) {
321         HIVIEW_LOGE("pid: %{public}d already died", userMonitorInfo->GetPid());
322         return FAILURE;
323     }
324 
325     nativeDetectorMtx_.lock();
326     if (userMonitorInfo->GetState() == PROC_REPORT_STATE) {
327         time_t now = time(nullptr);
328         if (now == static_cast<time_t>(-1)) {
329             now = static_cast<time_t>(0);
330         }
331         processedPids_.insert(make_pair(name, now));
332     }
333     nativeDetectorMtx_.unlock();
334 
335     if (stateType == PROC_FINISHED_STATE) {
336         HIVIEW_LOGI("next stateType is PROC_FINISHED_STATE");
337         return SUCCESSED;
338     }
339     OnChangeState(monitorInfo, stateType);
340     ErrCode ret = GetStateObj(stateType)->StateProcess(monitorInfo, *this);
341     if (ret) {
342         HIVIEW_LOGE("exe %{public}s state process failed, ret is %{public}d",
343             FaultStateName[userMonitorInfo->GetState()].c_str(), ret);
344         return FAILURE;
345     }
346     return SUCCESSED;
347 }
348 
GetStateObj(FaultStateType stateType)349 FaultStateBase* NativeLeakDetector::GetStateObj(FaultStateType stateType)
350 {
351     FaultStateBase* stateObj = NativeLeakStateContext::GetInstance().GetStateObj(stateType);
352     if (stateObj == nullptr) {
353         HIVIEW_LOGE("%{public}s state obj is null, stateType(%{public}d)",
354             FaultStateName[stateType].c_str(), stateType);
355         return nullptr;
356     }
357     return stateObj;
358 }
359 
OnChangeState(shared_ptr<FaultInfoBase> & monitorInfo,FaultStateType stateType)360 void NativeLeakDetector::OnChangeState(shared_ptr<FaultInfoBase> &monitorInfo, FaultStateType stateType)
361 {
362     if (monitorInfo->GetState() == stateType) {
363         HIVIEW_LOGE("pid: %{public}d have been %{public}s", monitorInfo->GetPid(), FaultStateName[stateType].c_str());
364         return;
365     }
366     HIVIEW_LOGI("Change state from %{public}s to %{public}s",
367         FaultStateName[monitorInfo->GetState()].c_str(), FaultStateName[stateType].c_str());
368     monitorInfo->SetState(stateType);
369 }
370 
ProcessUserEvent(const string & name,const string & msg,uint32_t pid)371 void NativeLeakDetector::ProcessUserEvent(const string &name, const string &msg, uint32_t pid)
372 {
373     string fullName = FaultDetectorUtil::GetProcessName(pid);
374     if (fullName.find(name) == string::npos) {
375         HIVIEW_LOGE("Invalid name: %{public}s, pid: %{public}d, Realname is %{public}s",
376             name.c_str(), pid, fullName.c_str());
377         return;
378     }
379     if (monitoredPidsList_.find(pid) != monitoredPidsList_.end()) {
380         HIVIEW_LOGE("%{public}s already leaked", name.c_str());
381         return;
382     }
383     auto it = processedPids_.find(fullName);
384     if (it != processedPids_.end()) {
385         HIVIEW_LOGE("%{public}s processed", it->first.c_str());
386         return;
387     }
388     // is in gray list
389     for (auto it2 = grayList_.begin(); it2 != grayList_.end();) {
390         if (it2->second == nullptr) {
391             it2 = grayList_.erase(it2);
392             HIVIEW_LOGE("monitor in grayList_ is null");
393             continue;
394         }
395         if (it2->second->GetProcessName() == name) {
396             auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(it2->second);
397             userMonitorInfo->SetEventMsg(msg);
398             nativeDetectorMtx_.lock();
399             AddMonitorToList(it2->second);
400             nativeDetectorMtx_.unlock();
401             grayList_.erase(it2);
402             return;
403         }
404         it2++;
405     }
406 
407     // not in gray list
408     shared_ptr<FaultInfoBase> monitorInfo = make_shared<NativeLeakInfo>();
409     auto userMonitorInfo = static_pointer_cast<NativeLeakInfo>(monitorInfo);
410     if (userMonitorInfo == nullptr) {
411         HIVIEW_LOGE("failed to create %{public}d memory_leak_info", pid);
412         return;
413     }
414     HIVIEW_LOGI("start monitor pid: %{public}d, name: %{public}s", pid, fullName.c_str());
415     userMonitorInfo->SetPid(pid);
416     userMonitorInfo->SetProcessName(fullName);
417     userMonitorInfo->SetPidStartTime(FaultDetectorUtil::GetProcessStartTime(pid));
418     userMonitorInfo->SetEventMsg(msg);
419     nativeDetectorMtx_.lock();
420     AddMonitorToList(monitorInfo);
421     nativeDetectorMtx_.unlock();
422 }
423 } // namespace HiviewDFX
424 } // namespace OHOS
425