1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_task.h"
17 
18 #include <cinttypes>
19 #include <ctime>
20 #include <cstdio>
21 #include <securec.h>
22 #include <thread>
23 
24 #include <fcntl.h>
25 #include <dlfcn.h>
26 #include <unistd.h>
27 
28 #include "backtrace_local.h"
29 #include "hisysevent.h"
30 #include "watchdog_inner.h"
31 #include "xcollie_define.h"
32 #include "xcollie_utils.h"
33 
34 namespace OHOS {
35 namespace HiviewDFX {
36 namespace {
37 constexpr const char* CORE_PROCS[] = {
38     "anco_service_br", "aptouch_daemon", "foundation", "init",
39     "multimodalinput", "com.ohos.sceneboard", "render_service"
40 };
41 }
42 int64_t WatchdogTask::curId = 0;
43 const char* BBOX_PATH = "/dev/bbox";
44 struct HstackVal {
45     uint32_t magic;
46     pid_t tid;
47     char hstackLogBuff[BUFF_STACK_SIZE];
48 };
WatchdogTask(std::string name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)49 WatchdogTask::WatchdogTask(std::string name, std::shared_ptr<AppExecFwk::EventHandler> handler,
50     TimeOutCallback timeOutCallback, uint64_t interval)
51     : name(name), task(nullptr), timeOutCallback(timeOutCallback), timeout(0), func(nullptr),
52       arg(nullptr), flag(0), timeLimit(0), countLimit(0)
53 {
54     id = ++curId;
55     checker = std::make_shared<HandlerChecker>(name, handler);
56     checkInterval = interval;
57     nextTickTime = GetCurrentTickMillseconds();
58     isTaskScheduled = false;
59     isOneshotTask = false;
60 }
61 
WatchdogTask(std::string name,Task && task,uint64_t delay,uint64_t interval,bool isOneshot)62 WatchdogTask::WatchdogTask(std::string name, Task&& task, uint64_t delay, uint64_t interval,  bool isOneshot)
63     : name(name), task(std::move(task)), timeOutCallback(nullptr), checker(nullptr), timeout(0), func(nullptr),
64       arg(nullptr), flag(0), watchdogTid(0), timeLimit(0), countLimit(0)
65 {
66     id = ++curId;
67     checkInterval = interval;
68     nextTickTime = GetCurrentTickMillseconds() + delay;
69     isTaskScheduled = false;
70     isOneshotTask = isOneshot;
71 }
72 
WatchdogTask(std::string name,unsigned int timeout,XCollieCallback func,void * arg,unsigned int flag)73 WatchdogTask::WatchdogTask(std::string name, unsigned int timeout, XCollieCallback func, void *arg, unsigned int flag)
74     : name(name), task(nullptr), timeOutCallback(nullptr), checker(nullptr), timeout(timeout), func(std::move(func)),
75       arg(arg), flag(flag), timeLimit(0), countLimit(0)
76 {
77     id = ++curId;
78     checkInterval = 0;
79     nextTickTime = GetCurrentTickMillseconds() + timeout;
80     isTaskScheduled = false;
81     isOneshotTask = true;
82     watchdogTid = getproctid();
83 }
84 
WatchdogTask(std::string name,unsigned int timeLimit,int countLimit)85 WatchdogTask::WatchdogTask(std::string name, unsigned int timeLimit, int countLimit)
86     : name(name), task(nullptr), timeOutCallback(nullptr), timeout(0), func(nullptr), arg(nullptr), flag(0),
87       isTaskScheduled(false), isOneshotTask(false), watchdogTid(0), timeLimit(timeLimit), countLimit(countLimit)
88 {
89     id = ++curId;
90     checkInterval = timeLimit / timeLimitIntervalRatio;
91     nextTickTime = GetCurrentTickMillseconds();
92 }
93 
DoCallback()94 void WatchdogTask::DoCallback()
95 {
96     if (func) {
97         XCOLLIE_LOGE("XCollieInner::DoTimerCallback %{public}s callback", name.c_str());
98         func(arg);
99     }
100     if (WatchdogInner::GetInstance().IsCallbackLimit(flag)) {
101         XCOLLIE_LOGE("Too many callback triggered in a short time, %{public}s skip", name.c_str());
102         return;
103     }
104     if (flag & XCOLLIE_FLAG_LOG) {
105         /* send to freezedetector */
106         std::string msg = "timeout: " + name + " to check " + std::to_string(timeout) + "ms ago";
107         SendXCollieEvent(name, msg);
108     }
109     if (getuid() > uidTypeThreshold) {
110         XCOLLIE_LOGI("check uid is app, do not exit");
111         return;
112     }
113     if (flag & XCOLLIE_FLAG_RECOVERY) {
114         XCOLLIE_LOGE("%{public}s blocked, after timeout %{public}llu ,process will exit", name.c_str(),
115             static_cast<long long>(timeout));
116         std::thread exitFunc([]() {
117             std::string description = "timeout, exit...";
118             WatchdogInner::LeftTimeExitProcess(description);
119         });
120         if (exitFunc.joinable()) {
121             exitFunc.detach();
122         }
123     }
124 }
125 
Run(uint64_t now)126 void WatchdogTask::Run(uint64_t now)
127 {
128     if (countLimit > 0) {
129         TimerCountTask();
130         return;
131     }
132 
133     constexpr int resetRatio = 2;
134     if ((checkInterval != 0) && (now - nextTickTime > (resetRatio * checkInterval))) {
135         XCOLLIE_LOGI("checker thread may be blocked, reset next tick time."
136             "now:%{public}" PRIu64 " expect:%{public}" PRIu64 " interval:%{public}" PRIu64 "",
137             now, nextTickTime, checkInterval);
138         nextTickTime = now;
139         isTaskScheduled = false;
140         return;
141     }
142 
143     if (timeout != 0) {
144         DoCallback();
145     } else if (task != nullptr) {
146         task();
147     } else {
148         RunHandlerCheckerTask();
149     }
150 }
151 
TimerCountTask()152 void WatchdogTask::TimerCountTask()
153 {
154     int size = static_cast<int>(triggerTimes.size());
155     if (size < countLimit) {
156         return;
157     }
158     XCOLLIE_LOGD("timeLimit : %{public}" PRIu64 ", countLimit : %{public}d, triggerTimes size : %{public}d",
159         timeLimit, countLimit, size);
160 
161     while (size >= countLimit) {
162         uint64_t timeInterval = triggerTimes[size -1] - triggerTimes[size - countLimit];
163         if (timeInterval < timeLimit) {
164             std::string sendMsg = name + " occured " + std::to_string(countLimit) + " times in " +
165                 std::to_string(timeInterval) + " ms, " + message;
166             HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, name, HiSysEvent::EventType::FAULT,
167                 "PID", getprocpid(), "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg);
168             triggerTimes.clear();
169             return;
170         }
171         size--;
172     }
173 
174     if (triggerTimes.size() > static_cast<unsigned long>(countLimit * countLimitNumMaxRatio)) {
175         triggerTimes.erase(triggerTimes.begin(), triggerTimes.end() - countLimit);
176     }
177 }
178 
RunHandlerCheckerTask()179 void WatchdogTask::RunHandlerCheckerTask()
180 {
181     if (checker == nullptr) {
182         return;
183     }
184 
185     if (!isTaskScheduled) {
186         checker->ScheduleCheck();
187         isTaskScheduled = true;
188     } else {
189         if (EvaluateCheckerState() == CheckStatus::COMPLETED) {
190             // allow next check
191             isTaskScheduled = false;
192         }
193     }
194 }
195 
SendEvent(const std::string & msg,const std::string & eventName)196 void WatchdogTask::SendEvent(const std::string &msg, const std::string &eventName)
197 {
198     int32_t pid = getprocpid();
199     if (IsProcessDebug(pid)) {
200         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
201         return;
202     }
203     uint32_t gid = getgid();
204     uint32_t uid = getuid();
205     time_t curTime = time(nullptr);
206     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
207         "\n" + msg + "\n";
208     sendMsg += checker->GetDumpInfo();
209 
210     watchdogTid = pid;
211     std::string tidFrontStr = "Thread ID = ";
212     std::string tidRearStr = ") is running";
213     std::size_t frontPos = sendMsg.find(tidFrontStr);
214     std::size_t rearPos = sendMsg.find(tidRearStr);
215     std::size_t startPos = frontPos + tidFrontStr.length();
216     if (frontPos != std::string::npos && rearPos != std::string::npos && rearPos > startPos) {
217         size_t tidLength = rearPos - startPos;
218         if (tidLength < std::to_string(INT32_MAX).length()) {
219             std::string tidStr = sendMsg.substr(startPos, tidLength);
220             if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char &c) {
221                 return isdigit(c);
222             })) {
223                 watchdogTid = std::stoi(tidStr);
224             }
225         }
226     }
227 
228     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
229         "PID", pid, "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", name,
230         "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", GetProcessStacktrace());
231 
232     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], msg=%{public}s",
233         ret, eventName.c_str(), msg.c_str());
234 }
235 
SendXCollieEvent(const std::string & timerName,const std::string & keyMsg) const236 void WatchdogTask::SendXCollieEvent(const std::string &timerName, const std::string &keyMsg) const
237 {
238     int32_t pid = getprocpid();
239     if (IsProcessDebug(pid)) {
240         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
241         return;
242     }
243     uint32_t gid = getgid();
244     uint32_t uid = getuid();
245     time_t curTime = time(nullptr);
246     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) + "\n" +
247         "timeout timer: " + timerName + "\n" + keyMsg;
248 
249     struct HstackVal val;
250     if (memset_s(&val, sizeof(val), 0, sizeof(val)) != 0) {
251         XCOLLIE_LOGE("memset val failed\n");
252         return;
253     }
254     val.tid = watchdogTid;
255     val.magic = MAGIC_NUM;
256     int fd = open(BBOX_PATH, O_WRONLY | O_CLOEXEC);
257     if (fd < 0) {
258         XCOLLIE_LOGE("open %{public}s failed", BBOX_PATH);
259         return;
260     }
261     int ret = ioctl(fd, LOGGER_GET_STACK, &val);
262     close(fd);
263     if (ret != 0) {
264         XCOLLIE_LOGE("XCollieDumpKernel getStack failed");
265     } else {
266         XCOLLIE_LOGI("XCollieDumpKernel buff is %{public}s", val.hstackLogBuff);
267     }
268 
269     std::string eventName = "APP_HICOLLIE";
270     std::string processName = GetSelfProcName();
271     std::string stack = "";
272     if (uid <= uidTypeThreshold) {
273         eventName = std::find(std::begin(CORE_PROCS), std::end(CORE_PROCS), processName) != std::end(CORE_PROCS) ?
274             "SERVICE_TIMEOUT" : "SERVICE_TIMEOUT_WARNING";
275         stack = GetProcessStacktrace();
276     } else if (!GetBacktraceStringByTid(stack, watchdogTid, 0, true)) {
277         XCOLLIE_LOGE("get tid:%{public}d BacktraceString failed", watchdogTid);
278     }
279 
280     int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT, "PID", pid,
281         "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", timerName, "PROCESS_NAME", processName,
282         "MSG", sendMsg, "STACK", stack + "\n"+ (ret != 0 ? "" : val.hstackLogBuff));
283     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
284         "msg=%{public}s", result, eventName.c_str(), keyMsg.c_str());
285 }
286 
EvaluateCheckerState()287 int WatchdogTask::EvaluateCheckerState()
288 {
289     int waitState = checker->GetCheckState();
290     if (waitState == CheckStatus::COMPLETED) {
291         return waitState;
292     } else if (waitState == CheckStatus::WAITED_HALF) {
293         XCOLLIE_LOGI("Watchdog half-block happened, send event");
294         std::string description = GetBlockDescription(checkInterval / 1000); // 1s = 1000ms
295         if (timeOutCallback != nullptr) {
296             timeOutCallback(name, waitState);
297         } else {
298             if (name.compare(IPC_FULL) != 0) {
299                 SendEvent(description, "SERVICE_WARNING");
300             }
301         }
302     } else {
303         XCOLLIE_LOGI("Watchdog happened, send event twice.");
304         std::string description = GetBlockDescription(checkInterval / 1000) +
305             ", report twice instead of exiting process."; // 1s = 1000ms
306         if (timeOutCallback != nullptr) {
307             timeOutCallback(name, waitState);
308         } else {
309             if (name.compare(IPC_FULL) == 0) {
310                 SendEvent(description, IPC_FULL);
311             } else {
312                 SendEvent(description, "SERVICE_BLOCK");
313             }
314             // peer binder log is collected in hiview asynchronously
315             // if blocked process exit early, binder blocked state will change
316             // thus delay exit and let hiview have time to collect log.
317             WatchdogInner::KillPeerBinderProcess(description);
318         }
319     }
320     return waitState;
321 }
322 
GetBlockDescription(uint64_t interval)323 std::string WatchdogTask::GetBlockDescription(uint64_t interval)
324 {
325     std::string desc = "Watchdog: thread(";
326     desc += name;
327     desc += ") blocked " + std::to_string(interval) + "s";
328     return desc;
329 }
330 } // end of namespace HiviewDFX
331 } // end of namespace OHOS
332