1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "core/common/watch_dog.h"
17 
18 #include <csignal>
19 
20 #include "base/log/event_report.h"
21 #include "bridge/common/utils/engine_helper.h"
22 #include "core/common/ace_engine.h"
23 #include "core/common/anr_thread.h"
24 
25 namespace OHOS::Ace {
26 namespace {
27 
28 constexpr int32_t NORMAL_CHECK_PERIOD = 3;
29 constexpr int32_t WARNING_CHECK_PERIOD = 2;
30 constexpr int32_t FREEZE_CHECK_PERIOD = 1;
31 constexpr char JS_THREAD_NAME[] = "JS";
32 constexpr char UI_THREAD_NAME[] = "UI";
33 constexpr char UNKNOWN_THREAD_NAME[] = "unknown thread";
34 constexpr uint64_t ANR_INPUT_FREEZE_TIME = 5000;
35 constexpr int32_t IMMEDIATELY_PERIOD = 0;
36 constexpr int32_t ANR_DIALOG_BLOCK_TIME = 20;
37 
38 enum class State { NORMAL, WARNING, FREEZE };
39 
40 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
41 constexpr int32_t SIGNAL_FOR_GC = 60;
42 constexpr int32_t GC_CHECK_PERIOD = 1;
43 pthread_t g_signalThread;
44 
CheckGcSignal()45 void CheckGcSignal()
46 {
47     // Check if GC signal is in pending signal set
48     sigset_t sigSet;
49     sigemptyset(&sigSet);
50     sigaddset(&sigSet, SIGNAL_FOR_GC);
51     struct timespec interval = {
52         .tv_sec = 0,
53         .tv_nsec = 0,
54     };
55     int32_t result = sigtimedwait(&sigSet, nullptr, &interval);
56     if (result < 0) {
57         if (errno != EAGAIN && errno != EINTR) {
58             LOGE("Failed to wait signals, errno = %{public}d", errno);
59             return;
60         }
61     } else {
62         ACE_DCHECK(result == SIGNAL_FOR_GC);
63 
64         // Start GC
65         LOGE("Receive GC signal");
66         AceEngine::Get().TriggerGarbageCollection();
67     }
68 
69     // Check again
70     AnrThread::AnrThread::PostTaskToTaskRunner(CheckGcSignal, GC_CHECK_PERIOD, "ArkUIWatchDogCheckGcSignal");
71 }
72 
BlockGcSignal()73 inline int32_t BlockGcSignal()
74 {
75     // Block GC signal on current thread.
76     sigset_t sigSet;
77     sigemptyset(&sigSet);
78     sigaddset(&sigSet, SIGNAL_FOR_GC);
79     return pthread_sigmask(SIG_BLOCK, &sigSet, nullptr);
80 }
81 
OnSignalReceive(int32_t sigNum)82 void OnSignalReceive(int32_t sigNum)
83 {
84     // Forward GC signal to signal handling thread
85     pthread_kill(g_signalThread, sigNum);
86     BlockGcSignal();
87 }
88 
InitializeGcTrigger()89 void InitializeGcTrigger()
90 {
91     // Record watch dog thread as signal handling thread
92     g_signalThread = pthread_self();
93 
94     int32_t result = BlockGcSignal();
95     if (result != 0) {
96         LOGE("Failed to block GC signal, errno = %{public}d", result);
97         return;
98     }
99 
100     // Start to receive GC signal
101     signal(SIGNAL_FOR_GC, OnSignalReceive);
102     // Start check GC signal
103     CheckGcSignal();
104 }
105 #endif // #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
106 
107 } // namespace
108 
109 class ThreadWatcher final : public Referenced {
110 public:
111     ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread = false);
112     ~ThreadWatcher() override;
113 
114     void SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor);
115 
116     void BuriedBomb(uint64_t bombId);
117     void DefusingBomb();
118 
119 private:
120     void InitThreadName();
121     void CheckAndResetIfNeeded();
122     bool IsThreadStuck();
123     void HiviewReport() const;
124     void RawReport(RawEventType type) const;
125     void PostCheckTask();
126     void TagIncrease();
127     void Check();
128     void ShowDialog() const;
129     void DefusingTopBomb();
130     void DetonatedBomb();
131 
132     mutable std::shared_mutex mutex_;
133     int32_t instanceId_ = 0;
134     TaskExecutor::TaskType type_;
135     std::string threadName_;
136     int32_t loopTime_ = 0;
137     int32_t threadTag_ = 0;
138     int32_t lastLoopTime_ = 0;
139     int32_t lastThreadTag_ = 0;
140     int32_t freezeCount_ = 0;
141     int64_t lastTaskId_ = -1;
142     State state_ = State::NORMAL;
143     WeakPtr<TaskExecutor> taskExecutor_;
144     std::queue<uint64_t> inputTaskIds_;
145     bool canShowDialog_ = true;
146     int32_t showDialogCount_ = 0;
147     bool useUIAsJSThread_ = false;
148 };
149 
ThreadWatcher(int32_t instanceId,TaskExecutor::TaskType type,bool useUIAsJSThread)150 ThreadWatcher::ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread)
151     : instanceId_(instanceId), type_(type), useUIAsJSThread_(useUIAsJSThread)
152 {
153     InitThreadName();
154     AnrThread::PostTaskToTaskRunner(
155         [weak = Referenced::WeakClaim(this)]() {
156             auto sp = weak.Upgrade();
157             CHECK_NULL_VOID(sp);
158             sp->Check();
159         },
160         NORMAL_CHECK_PERIOD, "ArkUIWatchDogCheck");
161 }
162 
~ThreadWatcher()163 ThreadWatcher::~ThreadWatcher() {}
164 
SetTaskExecutor(const RefPtr<TaskExecutor> & taskExecutor)165 void ThreadWatcher::SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor)
166 {
167     taskExecutor_ = taskExecutor;
168 }
169 
BuriedBomb(uint64_t bombId)170 void ThreadWatcher::BuriedBomb(uint64_t bombId)
171 {
172     std::unique_lock<std::shared_mutex> lock(mutex_);
173     inputTaskIds_.emplace(bombId);
174 }
175 
DefusingBomb()176 void ThreadWatcher::DefusingBomb()
177 {
178     auto taskExecutor = taskExecutor_.Upgrade();
179     CHECK_NULL_VOID(taskExecutor);
180     taskExecutor->PostTask(
181         [weak = Referenced::WeakClaim(this)]() {
182             auto sp = weak.Upgrade();
183             if (sp) {
184                 sp->DefusingTopBomb();
185             }
186         },
187         type_, "ArkUIWatchDogDefusingTopBomb");
188 }
189 
DefusingTopBomb()190 void ThreadWatcher::DefusingTopBomb()
191 {
192     std::unique_lock<std::shared_mutex> lock(mutex_);
193     if (inputTaskIds_.empty()) {
194         return;
195     }
196 
197     inputTaskIds_.pop();
198 }
199 
InitThreadName()200 void ThreadWatcher::InitThreadName()
201 {
202     switch (type_) {
203         case TaskExecutor::TaskType::JS:
204             threadName_ = JS_THREAD_NAME;
205             break;
206         case TaskExecutor::TaskType::UI:
207             threadName_ = UI_THREAD_NAME;
208             break;
209         default:
210             threadName_ = UNKNOWN_THREAD_NAME;
211             break;
212     }
213 }
214 
DetonatedBomb()215 void ThreadWatcher::DetonatedBomb()
216 {
217     std::shared_lock<std::shared_mutex> lock(mutex_);
218     if (inputTaskIds_.empty()) {
219         return;
220     }
221 
222     uint64_t currentTime = GetMilliseconds();
223     uint64_t bombId = inputTaskIds_.front();
224     if (currentTime - bombId > ANR_INPUT_FREEZE_TIME) {
225         LOGE("Detonated the Bomb, which bombId is %{public}s and currentTime is %{public}s",
226             std::to_string(bombId).c_str(), std::to_string(currentTime).c_str());
227         if (canShowDialog_) {
228             ShowDialog();
229             canShowDialog_ = false;
230             showDialogCount_ = 0;
231         } else {
232             LOGE("Can not show dialog when detonated the Bomb.");
233         }
234 
235         std::queue<uint64_t> empty;
236         std::swap(empty, inputTaskIds_);
237     }
238 }
239 
Check()240 void ThreadWatcher::Check()
241 {
242     int32_t period = NORMAL_CHECK_PERIOD;
243     if (!IsThreadStuck()) {
244         if (state_ == State::FREEZE) {
245             RawReport(RawEventType::RECOVER);
246         }
247         freezeCount_ = 0;
248         state_ = State::NORMAL;
249         canShowDialog_ = true;
250         showDialogCount_ = 0;
251     } else {
252         if (state_ == State::NORMAL) {
253             HiviewReport();
254             RawReport(RawEventType::WARNING);
255             state_ = State::WARNING;
256             period = WARNING_CHECK_PERIOD;
257         } else if (state_ == State::WARNING) {
258             RawReport(RawEventType::FREEZE);
259             state_ = State::FREEZE;
260             period = FREEZE_CHECK_PERIOD;
261             DetonatedBomb();
262         } else {
263             if (!canShowDialog_) {
264                 showDialogCount_++;
265                 if (showDialogCount_ >= ANR_DIALOG_BLOCK_TIME) {
266                     canShowDialog_ = true;
267                     showDialogCount_ = 0;
268                 }
269             }
270 
271             if (++freezeCount_ >= 5) {
272                 RawReport(RawEventType::FREEZE);
273                 freezeCount_ = 0;
274             }
275             period = FREEZE_CHECK_PERIOD;
276             DetonatedBomb();
277         }
278     }
279 
280     AnrThread::PostTaskToTaskRunner(
281         [weak = Referenced::WeakClaim(this)]() {
282             auto sp = weak.Upgrade();
283             CHECK_NULL_VOID(sp);
284             sp->Check();
285         },
286         period, "ArkUIWatchDogThreadStateCheck");
287 }
288 
CheckAndResetIfNeeded()289 void ThreadWatcher::CheckAndResetIfNeeded()
290 {
291     {
292         std::shared_lock<std::shared_mutex> lock(mutex_);
293         if (loopTime_ < INT32_MAX) {
294             return;
295         }
296     }
297 
298     std::unique_lock<std::shared_mutex> lock(mutex_);
299     loopTime_ = 0;
300     threadTag_ = 0;
301 }
302 
IsThreadStuck()303 bool ThreadWatcher::IsThreadStuck()
304 {
305     bool res = false;
306     auto taskExecutor = taskExecutor_.Upgrade();
307     CHECK_NULL_RETURN(taskExecutor, false);
308     uint32_t taskId = taskExecutor->GetTotalTaskNum(type_);
309     if (useUIAsJSThread_) {
310         taskId += taskExecutor->GetTotalTaskNum(TaskExecutor::TaskType::JS);
311     }
312     {
313         std::shared_lock<std::shared_mutex> lock(mutex_);
314         if (((loopTime_ - threadTag_) > (lastLoopTime_ - lastThreadTag_)) && (lastTaskId_ == taskId)) {
315             std::string abilityName;
316             if (AceEngine::Get().GetContainer(instanceId_) != nullptr) {
317                 abilityName = AceEngine::Get().GetContainer(instanceId_)->GetHostClassName();
318             }
319             LOGE("thread stuck, ability: %{public}s, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
320                  "checktime: %{public}d",
321                 abilityName.c_str(), instanceId_, threadName_.c_str(), loopTime_, threadTag_);
322             res = true;
323         }
324         lastTaskId_ = taskId;
325         lastLoopTime_ = loopTime_;
326         lastThreadTag_ = threadTag_;
327     }
328     CheckAndResetIfNeeded();
329     PostCheckTask();
330     return res;
331 }
332 
HiviewReport() const333 void ThreadWatcher::HiviewReport() const
334 {
335     if (type_ == TaskExecutor::TaskType::JS) {
336         EventReport::SendJsException(JsExcepType::JS_THREAD_STUCK);
337     } else if (type_ == TaskExecutor::TaskType::UI) {
338         EventReport::SendRenderException(RenderExcepType::UI_THREAD_STUCK);
339     }
340 }
341 
RawReport(RawEventType type) const342 void ThreadWatcher::RawReport(RawEventType type) const
343 {
344     std::string message;
345     int32_t tid = 0;
346     auto taskExecutor = taskExecutor_.Upgrade();
347     if (taskExecutor) {
348         if (type == RawEventType::FREEZE &&
349             (type_ == TaskExecutor::TaskType::JS || (useUIAsJSThread_ && (type_ == TaskExecutor::TaskType::UI)))) {
350             auto m = std::make_shared<std::mutex>();
351             std::lock_guard lk(*m);
352             auto engine = EngineHelper::GetEngine(instanceId_);
353             message = engine ? engine->GetStacktraceMessage() : "";
354 
355             taskExecutor->PostTask(
356                 [engine, m]() mutable {
357                     std::lock_guard lk(*m);
358                     engine.Reset();
359                 },
360                 TaskExecutor::TaskType::JS, "ArkUIWatchDogEngineReset");
361         }
362         tid = taskExecutor->GetTid(type_);
363     }
364 
365     std::string threadInfo = "Blocked thread id = " + std::to_string(tid) + "\n";
366     threadInfo += "JSVM instance id = " + std::to_string(instanceId_) + "\n";
367     message = threadInfo + message;
368     EventReport::ANRRawReport(type, AceApplicationInfo::GetInstance().GetUid(),
369         AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName(),
370         message);
371 }
372 
ShowDialog() const373 void ThreadWatcher::ShowDialog() const
374 {
375     EventReport::ANRShowDialog(AceApplicationInfo::GetInstance().GetUid(),
376         AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName());
377 }
378 
PostCheckTask()379 void ThreadWatcher::PostCheckTask()
380 {
381     auto taskExecutor = taskExecutor_.Upgrade();
382     if (taskExecutor) {
383         // post task to specified thread to check it
384         taskExecutor->PostTask(
385             [weak = Referenced::WeakClaim(this)]() {
386                 auto sp = weak.Upgrade();
387                 CHECK_NULL_VOID(sp);
388                 sp->TagIncrease();
389             },
390             type_, "ArkUIWatchDogTagIncrease");
391         std::unique_lock<std::shared_mutex> lock(mutex_);
392         ++loopTime_;
393         if (state_ != State::NORMAL) {
394             LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
395                  "checktime: %{public}d",
396                 instanceId_, threadName_.c_str(), loopTime_, threadTag_);
397         }
398     } else {
399         LOGW("task executor with instanceId %{public}d invalid when check %{public}s thread whether stuck or not",
400             instanceId_, threadName_.c_str());
401     }
402 }
403 
TagIncrease()404 void ThreadWatcher::TagIncrease()
405 {
406     std::unique_lock<std::shared_mutex> lock(mutex_);
407     ++threadTag_;
408     if (state_ != State::NORMAL) {
409         LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
410              "checktime: %{public}d",
411             instanceId_, threadName_.c_str(), loopTime_, threadTag_);
412     }
413 }
414 
WatchDog()415 WatchDog::WatchDog()
416 {
417     AnrThread::Start();
418 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
419     AnrThread::PostTaskToTaskRunner(InitializeGcTrigger, GC_CHECK_PERIOD, "ArkUIWatchDogInitGcTrigger");
420 #endif
421 }
422 
~WatchDog()423 WatchDog::~WatchDog()
424 {
425     AnrThread::Stop();
426 }
427 
Register(int32_t instanceId,const RefPtr<TaskExecutor> & taskExecutor,bool useUIAsJSThread)428 void WatchDog::Register(int32_t instanceId, const RefPtr<TaskExecutor>& taskExecutor, bool useUIAsJSThread)
429 {
430     Watchers watchers = {
431         .jsWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::JS),
432         .uiWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::UI, useUIAsJSThread),
433     };
434     watchers.uiWatcher->SetTaskExecutor(taskExecutor);
435     if (!useUIAsJSThread) {
436         watchers.jsWatcher->SetTaskExecutor(taskExecutor);
437     } else {
438         watchers.jsWatcher = nullptr;
439     }
440     const auto resExecutor = watchMap_.try_emplace(instanceId, watchers);
441     if (!resExecutor.second) {
442         LOGW("Duplicate instance id: %{public}d when register to watch dog", instanceId);
443     }
444 }
445 
Unregister(int32_t instanceId)446 void WatchDog::Unregister(int32_t instanceId)
447 {
448     auto num = watchMap_.erase(instanceId);
449     if (num == 0) {
450         LOGW("Unregister from watch dog failed with instanceID %{public}d", instanceId);
451     }
452 }
453 
BuriedBomb(int32_t instanceId,uint64_t bombId)454 void WatchDog::BuriedBomb(int32_t instanceId, uint64_t bombId)
455 {
456     auto iter = watchMap_.find(instanceId);
457     if (iter == watchMap_.end()) {
458         return;
459     }
460 
461     Watchers watchers = iter->second;
462     AnrThread::PostTaskToTaskRunner(
463         [watchers, bombId]() {
464             if (watchers.jsWatcher) {
465                 watchers.jsWatcher->BuriedBomb(bombId);
466             }
467 
468             if (watchers.uiWatcher) {
469                 watchers.uiWatcher->BuriedBomb(bombId);
470             }
471         },
472         IMMEDIATELY_PERIOD, "ArkUIWatchDogBuriedBomb");
473 }
474 
DefusingBomb(int32_t instanceId)475 void WatchDog::DefusingBomb(int32_t instanceId)
476 {
477     auto iter = watchMap_.find(instanceId);
478     if (iter == watchMap_.end()) {
479         return;
480     }
481 
482     Watchers watchers = iter->second;
483     AnrThread::PostTaskToTaskRunner(
484         [watchers]() {
485             if (watchers.jsWatcher) {
486                 watchers.jsWatcher->DefusingBomb();
487             }
488 
489             if (watchers.uiWatcher) {
490                 watchers.uiWatcher->DefusingBomb();
491             }
492         },
493         IMMEDIATELY_PERIOD, "ArkUIWatchDogDefusingBomb");
494 }
495 } // namespace OHOS::Ace
496