1 /*
2  * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "platform_monitor.h"
16 
17 #include <algorithm>
18 #include <cinttypes>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <vector>
23 
24 #include "hisysevent.h"
25 #include "hiview_global.h"
26 #include "hiview_logger.h"
27 #include "pipeline.h"
28 #include "sys_event_dao.h"
29 #include "sys_event.h"
30 #include "time_util.h"
31 #include "monitor_config.h"
32 
33 namespace OHOS {
34 namespace HiviewDFX {
35 DEFINE_LOG_TAG("HiView-Monitor");
36 namespace {
37 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
38 };
39 
AccumulateTimeInterval(uint64_t costTime,std::map<int8_t,uint32_t> & stat)40 void PlatformMonitor::AccumulateTimeInterval(uint64_t costTime, std::map<int8_t, uint32_t> &stat)
41 {
42     std::lock_guard<std::mutex> lock(statMutex_);
43     auto it = std::lower_bound(intervals_, intervals_ + sizeof(intervals_) / sizeof(intervals_[0]), costTime);
44     int index = it - intervals_;
45     stat[index] += 1;
46 }
47 
CollectEvent(std::shared_ptr<PipelineEvent> event)48 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
49 {
50     if (event == nullptr) {
51         return;
52     }
53     std::lock_guard<std::mutex> lock(topMutex_);
54     topDomains_[event->domain_]++;
55     topEvents_[event->eventName_]++;
56 }
57 
CollectCostTime(PipelineEvent * event)58 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
59 {
60     // collect data after event destory
61     if (event == nullptr) {
62         return;
63     }
64     onceTotalCnt_++;
65     onceTotalRealTime_ += event->realtime_;
66     onceTotalProcTime_ += event->processTime_;
67     uint64_t waitTime = event->processTime_ > event->realtime_ ? (event->processTime_ - event->realtime_) : 0;
68     onceTotalWaitTime_ += waitTime;
69     AccumulateTimeInterval(event->realtime_, realStat_);
70     AccumulateTimeInterval(event->processTime_, processStat_);
71     AccumulateTimeInterval(waitTime, waitTimeStat_);
72     if (event->realtime_ > realTimeBenchMark_) {
73         overRealTotalCount_++;
74     }
75     if (event->processTime_ > processTimeBenchMark_) {
76         overProcessTotalCount_++;
77     }
78     finishedCount_++;
79     HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
80         "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
81         "finishedCount_=%{public}u",
82         onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
83         onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
84         finishedCount_);
85 }
86 
CollectPerfProfiler()87 void PlatformMonitor::CollectPerfProfiler()
88 {
89     HIVIEW_LOGI("collect performance profiler");
90     // collect data every 5 minute
91     // collect event max size and max count
92     if (maxTotalCount_ < SysEvent::totalCount_) {
93         maxTotalCount_.store(SysEvent::totalCount_);
94     }
95     if (maxTotalSize_ < SysEvent::totalSize_) {
96         maxTotalSize_.store(SysEvent::totalSize_);
97     }
98     // total count, total size
99     totalCount_ = SysEvent::totalCount_;
100     totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
101     // min speed, max speed
102     uint32_t onceTotalRealTime = onceTotalRealTime_;
103     uint32_t onceTotalProcTime = onceTotalProcTime_;
104     uint32_t onceTotalWaitTime = onceTotalWaitTime_;
105     uint32_t onceTotalCnt = onceTotalCnt_;
106     onceTotalRealTime_ = 0;
107     onceTotalProcTime_ = 0;
108     onceTotalWaitTime_ = 0;
109     onceTotalCnt_ = 0;
110     if (onceTotalRealTime > 0) {
111         curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
112         if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
113             minSpeed_ = curRealSpeed_;
114         }
115         if (curRealSpeed_ > maxSpeed_) {
116             maxSpeed_ = curRealSpeed_;
117         }
118     } else {
119         minSpeed_ = 0;
120         maxSpeed_ = 0;
121         curRealSpeed_ = 0;
122     }
123     if (onceTotalProcTime > 0) {
124         curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
125     } else {
126         curProcSpeed_ = 0;
127     }
128     if (onceTotalCnt > 0) {
129         avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
130         avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
131         avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
132     }
133     HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
134         "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
135         "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
136         "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
137         "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
138         maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
139         onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
140         onceTotalCnt, minSpeed_, maxSpeed_,
141         curRealSpeed_, curProcSpeed_,
142         avgRealTime_, avgProcessTime_, avgWaitTime_);
143 }
144 
GetDomainsStat(PerfMeasure & perfMeasure)145 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
146 {
147     std::lock_guard<std::mutex> lock(topMutex_);
148     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
149         perfMeasure.domains.emplace_back(it->first);
150         perfMeasure.domainCounts.emplace_back(it->second);
151     }
152     topDomains_.clear();
153     topEvents_.clear();
154 }
155 
GetCostTimeInterval(PerfMeasure & perfMeasure)156 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
157 {
158     std::lock_guard<std::mutex> lock(statMutex_);
159     for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
160         uint32_t realCount = realStat_[index];
161         perfMeasure.realCounts.emplace_back(realCount);
162         uint32_t processCount = processStat_[index];
163         perfMeasure.processCounts.emplace_back(processCount);
164         uint32_t waitCount = waitTimeStat_[index];
165         perfMeasure.waitCounts.emplace_back(waitCount);
166     }
167     realStat_.clear();
168     processStat_.clear();
169     waitTimeStat_.clear();
170 }
171 
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)172 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
173 {
174     perfMeasure.finishedCount = finishedCount_;
175     perfMeasure.overRealTotalCount = overRealTotalCount_;
176     perfMeasure.overProcessTotalCount = overProcessTotalCount_;
177     finishedCount_ = 0;
178     overRealTotalCount_ = 0;
179     overProcessTotalCount_ = 0;
180 
181     if (perfMeasure.finishedCount > 0) {
182         perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
183     } else if (perfMeasure.overRealTotalCount > 0) {
184         perfMeasure.realPercent = PCT;
185     }
186 
187     if (perfMeasure.finishedCount > 0) {
188         perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
189     } else if (perfMeasure.overProcessTotalCount > 0) {
190         perfMeasure.processpercent = PCT;
191     }
192 }
193 
GetMaxTotalMeasure(PerfMeasure & perfMeasure)194 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
195 {
196     perfMeasure.maxTotalCount = maxTotalCount_.load();
197     maxTotalCount_.store(0);
198 
199     perfMeasure.maxTotalSize = maxTotalSize_.load();
200     maxTotalSize_.store(0);
201 }
202 
GetBreakStat(PerfMeasure & perfMeasure)203 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
204 {
205     perfMeasure.totalCount = totalCount_;
206     totalCount_ = 0;
207 
208     perfMeasure.totalSize = totalSize_;
209     totalSize_ = 0;
210 
211     perfMeasure.breakCount = breakCount_;
212     breakCount_ = 0;
213 
214     perfMeasure.breakDuration = breakDuration_;
215     breakDuration_ = 0;
216 }
217 
GetMaxSpeed(PerfMeasure & perfMeasure) const218 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
219 {
220     perfMeasure.minSpeed = minSpeed_;
221     perfMeasure.maxSpeed = maxSpeed_;
222 }
223 
ReportProfile(const PerfMeasure & perfMeasure)224 void PlatformMonitor::ReportProfile(const PerfMeasure& perfMeasure)
225 {
226     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "PROFILE_STAT", HiSysEvent::EventType::STATISTIC,
227         "MAX_TOTAL_COUNT", perfMeasure.maxTotalCount, "MAX_TOTAL_SIZE", perfMeasure.maxTotalSize,
228         "DOMAINS", perfMeasure.domains, "DOMAIN_DETAIL", perfMeasure.domainCounts,
229         "TOTAL_COUNT", perfMeasure.totalCount, "TOTAL_SIZE", perfMeasure.totalSize,
230         "BREAK_COUNT", perfMeasure.breakCount, "BREAK_DURATION", perfMeasure.breakDuration,
231         "MIN_SPEED", perfMeasure.minSpeed, "MAX_SPEED", perfMeasure.maxSpeed, "REAL_COUNT", perfMeasure.realCounts,
232         "PROCESS_COUNT", perfMeasure.processCounts, "WAIT_COUNT", perfMeasure.waitCounts,
233         "FINISHED_COUNT", perfMeasure.finishedCount, "OVER_REAL_COUNT", perfMeasure.overRealTotalCount,
234         "OVER_REAL_PCT", perfMeasure.realPercent, "OVER_PROC_COUNT", perfMeasure.overProcessTotalCount,
235         "OVER_PROC_PCT", perfMeasure.processpercent);
236     if (ret != SUCCESS) {
237         HIVIEW_LOGE("failed to write PROFILE_STAT event, ret is %{public}d", ret);
238     }
239 }
240 
ReportCycleProfile()241 void PlatformMonitor::ReportCycleProfile()
242 {
243     HIVIEW_LOGI("report performance profile");
244     PerfMeasure perfMeasure;
245     // report max event size and count
246     GetMaxTotalMeasure(perfMeasure);
247 
248     // report event number of each domain
249     GetDomainsStat(perfMeasure);
250 
251     // report total number of event, time of break, duration of break
252     GetBreakStat(perfMeasure);
253 
254     // report min speed, max speed
255     GetMaxSpeed(perfMeasure);
256 
257     // report real time, process time, wait time of cost time interval
258     GetCostTimeInterval(perfMeasure);
259 
260     // report percent and total number of over benchmark
261     CalcOverBenckMarkPct(perfMeasure);
262 
263     ReportProfile(perfMeasure);
264     HIVIEW_LOGI("report performance profile have done");
265 }
266 
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)267 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
268 {
269     std::lock_guard<std::mutex> lock(topMutex_);
270     uint8_t topN = 3; // top n
271     if (topDomains_.size() <= topN) {
272         for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
273             domains.emplace_back(it->first);
274             counts.emplace_back(it->second);
275         }
276         return;
277     }
278 
279     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
280         counts.emplace_back(it->second);
281     }
282     std::sort(counts.begin(), counts.end(), std::greater<int>());
283     counts.resize(topN);
284     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
285         if (domains.size() >= topN) {
286             break;
287         }
288         if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
289             domains.emplace_back(it->first);
290         }
291     }
292     return;
293 }
294 
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)295 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
296 {
297     std::lock_guard<std::mutex> lock(topMutex_);
298     uint8_t topN = 3; // top n
299     if (topEvents_.size() <= topN) {
300         for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
301             events.emplace_back(it->first);
302             counts.emplace_back(it->second);
303         }
304         return;
305     }
306 
307     for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
308         counts.emplace_back(it->second);
309     }
310     std::sort(counts.begin(), counts.end(), std::greater<int>());
311     counts.resize(topN);
312     for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
313         if (events.size() >= topN) {
314             break;
315         }
316         if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
317             events.emplace_back(it->first);
318         }
319     }
320     return;
321 }
322 
ReportBreakProfile()323 void PlatformMonitor::ReportBreakProfile()
324 {
325     // report current event size and count
326     uint32_t curTotalCount_ = SysEvent::totalCount_;
327     uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
328 
329     // report current speed
330     uint32_t curRealSpeed = curRealSpeed_;
331     uint32_t curProcessSpeed = curProcSpeed_;
332 
333     // report average real time, process time, wait time
334     double avgRealTime = avgRealTime_;
335     double avgProcessTime = avgProcessTime_;
336     double avgWaitTime = avgWaitTime_;
337 
338     // report topk cost time event
339     std::vector<std::string> events;
340     std::vector<uint32_t> eventCounts;
341     GetTopEvents(events, eventCounts);
342 
343     // report topk event and count
344     std::vector<std::string> domains;
345     std::vector<uint32_t> domainCounts;
346     GetTopDomains(domains, domainCounts);
347     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "BREAK", HiSysEvent::EventType::BEHAVIOR,
348         "TOTAL_COUNT", curTotalCount_, "TOTAL_SIZE", curTotalSize_, "REAL_SPEED", curRealSpeed,
349         "PROC_SPEED", curProcessSpeed, "AVG_REAL_TIME", avgRealTime, "AVG_PROC_TIME", avgProcessTime,
350         "AVG_WAIT_TIME", avgWaitTime, "TOP_EVENT", events, "TOP_EVENT_COUNT", eventCounts, "TOP_DOMAIN", domains,
351         "TOP_DOMAIN_COUNT", domainCounts);
352     if (ret != SUCCESS) {
353         HIVIEW_LOGE("failed to write BREAK event, ret is %{public}d", ret);
354     }
355 }
356 
ReportRecoverProfile()357 void PlatformMonitor::ReportRecoverProfile()
358 {
359     // report break duration when recovery
360     int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
361     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "RECOVER", HiSysEvent::EventType::BEHAVIOR,
362         "DURATION", duration);
363     if (ret != SUCCESS) {
364         HIVIEW_LOGE("failed to write RECOVER event, ret is %{public}d", ret);
365     }
366 }
367 
Breaking()368 void PlatformMonitor::Breaking()
369 {
370     // collect break count and duration every break
371     if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
372         return;
373     }
374 
375     HIVIEW_LOGE("break as event reach critical size %{public}" PRId64, SysEvent::totalSize_.load());
376     breakTimestamp_ = TimeUtil::GenerateTimestamp();
377     ReportBreakProfile();
378     int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
379     while (true) {
380         if (SysEvent::totalSize_ <= recoveryBenchMark) {
381             break;
382         }
383         TimeUtil::Sleep(SLEEP_TEN_SECONDS);
384     }
385     breakCount_++;
386     recoverTimestamp_ = TimeUtil::GenerateTimestamp();
387     breakDuration_ += recoverTimestamp_ - breakTimestamp_;
388     HIVIEW_LOGW("recover after break duration %{public}" PRIu64, breakDuration_);
389     ReportRecoverProfile();
390 }
391 
InitData()392 void PlatformMonitor::InitData()
393 {
394     MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
395     if (!monitorConfig.Parse()) {
396         return;
397     }
398 
399     monitorConfig.ReadParam("collectPeriod", collectPeriod_);
400     monitorConfig.ReadParam("reportPeriod", reportPeriod_);
401     monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
402     monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
403     monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
404 }
405 
StartMonitor(std::shared_ptr<EventLoop> looper)406 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
407 {
408     if (looper == nullptr) {
409         HIVIEW_LOGE("can not get share looper");
410         return;
411     }
412     InitData();
413 
414     looper_ = looper;
415 
416     auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
417     looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
418     auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
419     looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
420 }
421 } // namespace HiviewDFX
422 } // namespace OHOS