1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "platform_monitor.h"
16
17 #include <algorithm>
18 #include <cinttypes>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <vector>
23
24 #include "hisysevent.h"
25 #include "hiview_global.h"
26 #include "hiview_logger.h"
27 #include "pipeline.h"
28 #include "sys_event_dao.h"
29 #include "sys_event.h"
30 #include "time_util.h"
31 #include "monitor_config.h"
32
33 namespace OHOS {
34 namespace HiviewDFX {
35 DEFINE_LOG_TAG("HiView-Monitor");
36 namespace {
37 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
38 };
39
AccumulateTimeInterval(uint64_t costTime,std::map<int8_t,uint32_t> & stat)40 void PlatformMonitor::AccumulateTimeInterval(uint64_t costTime, std::map<int8_t, uint32_t> &stat)
41 {
42 std::lock_guard<std::mutex> lock(statMutex_);
43 auto it = std::lower_bound(intervals_, intervals_ + sizeof(intervals_) / sizeof(intervals_[0]), costTime);
44 int index = it - intervals_;
45 stat[index] += 1;
46 }
47
CollectEvent(std::shared_ptr<PipelineEvent> event)48 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
49 {
50 if (event == nullptr) {
51 return;
52 }
53 std::lock_guard<std::mutex> lock(topMutex_);
54 topDomains_[event->domain_]++;
55 topEvents_[event->eventName_]++;
56 }
57
CollectCostTime(PipelineEvent * event)58 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
59 {
60 // collect data after event destory
61 if (event == nullptr) {
62 return;
63 }
64 onceTotalCnt_++;
65 onceTotalRealTime_ += event->realtime_;
66 onceTotalProcTime_ += event->processTime_;
67 uint64_t waitTime = event->processTime_ > event->realtime_ ? (event->processTime_ - event->realtime_) : 0;
68 onceTotalWaitTime_ += waitTime;
69 AccumulateTimeInterval(event->realtime_, realStat_);
70 AccumulateTimeInterval(event->processTime_, processStat_);
71 AccumulateTimeInterval(waitTime, waitTimeStat_);
72 if (event->realtime_ > realTimeBenchMark_) {
73 overRealTotalCount_++;
74 }
75 if (event->processTime_ > processTimeBenchMark_) {
76 overProcessTotalCount_++;
77 }
78 finishedCount_++;
79 HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
80 "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
81 "finishedCount_=%{public}u",
82 onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
83 onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
84 finishedCount_);
85 }
86
CollectPerfProfiler()87 void PlatformMonitor::CollectPerfProfiler()
88 {
89 HIVIEW_LOGI("collect performance profiler");
90 // collect data every 5 minute
91 // collect event max size and max count
92 if (maxTotalCount_ < SysEvent::totalCount_) {
93 maxTotalCount_.store(SysEvent::totalCount_);
94 }
95 if (maxTotalSize_ < SysEvent::totalSize_) {
96 maxTotalSize_.store(SysEvent::totalSize_);
97 }
98 // total count, total size
99 totalCount_ = SysEvent::totalCount_;
100 totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
101 // min speed, max speed
102 uint32_t onceTotalRealTime = onceTotalRealTime_;
103 uint32_t onceTotalProcTime = onceTotalProcTime_;
104 uint32_t onceTotalWaitTime = onceTotalWaitTime_;
105 uint32_t onceTotalCnt = onceTotalCnt_;
106 onceTotalRealTime_ = 0;
107 onceTotalProcTime_ = 0;
108 onceTotalWaitTime_ = 0;
109 onceTotalCnt_ = 0;
110 if (onceTotalRealTime > 0) {
111 curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
112 if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
113 minSpeed_ = curRealSpeed_;
114 }
115 if (curRealSpeed_ > maxSpeed_) {
116 maxSpeed_ = curRealSpeed_;
117 }
118 } else {
119 minSpeed_ = 0;
120 maxSpeed_ = 0;
121 curRealSpeed_ = 0;
122 }
123 if (onceTotalProcTime > 0) {
124 curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
125 } else {
126 curProcSpeed_ = 0;
127 }
128 if (onceTotalCnt > 0) {
129 avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
130 avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
131 avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
132 }
133 HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
134 "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
135 "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
136 "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
137 "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
138 maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
139 onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
140 onceTotalCnt, minSpeed_, maxSpeed_,
141 curRealSpeed_, curProcSpeed_,
142 avgRealTime_, avgProcessTime_, avgWaitTime_);
143 }
144
GetDomainsStat(PerfMeasure & perfMeasure)145 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
146 {
147 std::lock_guard<std::mutex> lock(topMutex_);
148 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
149 perfMeasure.domains.emplace_back(it->first);
150 perfMeasure.domainCounts.emplace_back(it->second);
151 }
152 topDomains_.clear();
153 topEvents_.clear();
154 }
155
GetCostTimeInterval(PerfMeasure & perfMeasure)156 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
157 {
158 std::lock_guard<std::mutex> lock(statMutex_);
159 for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
160 uint32_t realCount = realStat_[index];
161 perfMeasure.realCounts.emplace_back(realCount);
162 uint32_t processCount = processStat_[index];
163 perfMeasure.processCounts.emplace_back(processCount);
164 uint32_t waitCount = waitTimeStat_[index];
165 perfMeasure.waitCounts.emplace_back(waitCount);
166 }
167 realStat_.clear();
168 processStat_.clear();
169 waitTimeStat_.clear();
170 }
171
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)172 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
173 {
174 perfMeasure.finishedCount = finishedCount_;
175 perfMeasure.overRealTotalCount = overRealTotalCount_;
176 perfMeasure.overProcessTotalCount = overProcessTotalCount_;
177 finishedCount_ = 0;
178 overRealTotalCount_ = 0;
179 overProcessTotalCount_ = 0;
180
181 if (perfMeasure.finishedCount > 0) {
182 perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
183 } else if (perfMeasure.overRealTotalCount > 0) {
184 perfMeasure.realPercent = PCT;
185 }
186
187 if (perfMeasure.finishedCount > 0) {
188 perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
189 } else if (perfMeasure.overProcessTotalCount > 0) {
190 perfMeasure.processpercent = PCT;
191 }
192 }
193
GetMaxTotalMeasure(PerfMeasure & perfMeasure)194 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
195 {
196 perfMeasure.maxTotalCount = maxTotalCount_.load();
197 maxTotalCount_.store(0);
198
199 perfMeasure.maxTotalSize = maxTotalSize_.load();
200 maxTotalSize_.store(0);
201 }
202
GetBreakStat(PerfMeasure & perfMeasure)203 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
204 {
205 perfMeasure.totalCount = totalCount_;
206 totalCount_ = 0;
207
208 perfMeasure.totalSize = totalSize_;
209 totalSize_ = 0;
210
211 perfMeasure.breakCount = breakCount_;
212 breakCount_ = 0;
213
214 perfMeasure.breakDuration = breakDuration_;
215 breakDuration_ = 0;
216 }
217
GetMaxSpeed(PerfMeasure & perfMeasure) const218 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
219 {
220 perfMeasure.minSpeed = minSpeed_;
221 perfMeasure.maxSpeed = maxSpeed_;
222 }
223
ReportProfile(const PerfMeasure & perfMeasure)224 void PlatformMonitor::ReportProfile(const PerfMeasure& perfMeasure)
225 {
226 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "PROFILE_STAT", HiSysEvent::EventType::STATISTIC,
227 "MAX_TOTAL_COUNT", perfMeasure.maxTotalCount, "MAX_TOTAL_SIZE", perfMeasure.maxTotalSize,
228 "DOMAINS", perfMeasure.domains, "DOMAIN_DETAIL", perfMeasure.domainCounts,
229 "TOTAL_COUNT", perfMeasure.totalCount, "TOTAL_SIZE", perfMeasure.totalSize,
230 "BREAK_COUNT", perfMeasure.breakCount, "BREAK_DURATION", perfMeasure.breakDuration,
231 "MIN_SPEED", perfMeasure.minSpeed, "MAX_SPEED", perfMeasure.maxSpeed, "REAL_COUNT", perfMeasure.realCounts,
232 "PROCESS_COUNT", perfMeasure.processCounts, "WAIT_COUNT", perfMeasure.waitCounts,
233 "FINISHED_COUNT", perfMeasure.finishedCount, "OVER_REAL_COUNT", perfMeasure.overRealTotalCount,
234 "OVER_REAL_PCT", perfMeasure.realPercent, "OVER_PROC_COUNT", perfMeasure.overProcessTotalCount,
235 "OVER_PROC_PCT", perfMeasure.processpercent);
236 if (ret != SUCCESS) {
237 HIVIEW_LOGE("failed to write PROFILE_STAT event, ret is %{public}d", ret);
238 }
239 }
240
ReportCycleProfile()241 void PlatformMonitor::ReportCycleProfile()
242 {
243 HIVIEW_LOGI("report performance profile");
244 PerfMeasure perfMeasure;
245 // report max event size and count
246 GetMaxTotalMeasure(perfMeasure);
247
248 // report event number of each domain
249 GetDomainsStat(perfMeasure);
250
251 // report total number of event, time of break, duration of break
252 GetBreakStat(perfMeasure);
253
254 // report min speed, max speed
255 GetMaxSpeed(perfMeasure);
256
257 // report real time, process time, wait time of cost time interval
258 GetCostTimeInterval(perfMeasure);
259
260 // report percent and total number of over benchmark
261 CalcOverBenckMarkPct(perfMeasure);
262
263 ReportProfile(perfMeasure);
264 HIVIEW_LOGI("report performance profile have done");
265 }
266
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)267 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
268 {
269 std::lock_guard<std::mutex> lock(topMutex_);
270 uint8_t topN = 3; // top n
271 if (topDomains_.size() <= topN) {
272 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
273 domains.emplace_back(it->first);
274 counts.emplace_back(it->second);
275 }
276 return;
277 }
278
279 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
280 counts.emplace_back(it->second);
281 }
282 std::sort(counts.begin(), counts.end(), std::greater<int>());
283 counts.resize(topN);
284 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
285 if (domains.size() >= topN) {
286 break;
287 }
288 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
289 domains.emplace_back(it->first);
290 }
291 }
292 return;
293 }
294
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)295 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
296 {
297 std::lock_guard<std::mutex> lock(topMutex_);
298 uint8_t topN = 3; // top n
299 if (topEvents_.size() <= topN) {
300 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
301 events.emplace_back(it->first);
302 counts.emplace_back(it->second);
303 }
304 return;
305 }
306
307 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
308 counts.emplace_back(it->second);
309 }
310 std::sort(counts.begin(), counts.end(), std::greater<int>());
311 counts.resize(topN);
312 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
313 if (events.size() >= topN) {
314 break;
315 }
316 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
317 events.emplace_back(it->first);
318 }
319 }
320 return;
321 }
322
ReportBreakProfile()323 void PlatformMonitor::ReportBreakProfile()
324 {
325 // report current event size and count
326 uint32_t curTotalCount_ = SysEvent::totalCount_;
327 uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
328
329 // report current speed
330 uint32_t curRealSpeed = curRealSpeed_;
331 uint32_t curProcessSpeed = curProcSpeed_;
332
333 // report average real time, process time, wait time
334 double avgRealTime = avgRealTime_;
335 double avgProcessTime = avgProcessTime_;
336 double avgWaitTime = avgWaitTime_;
337
338 // report topk cost time event
339 std::vector<std::string> events;
340 std::vector<uint32_t> eventCounts;
341 GetTopEvents(events, eventCounts);
342
343 // report topk event and count
344 std::vector<std::string> domains;
345 std::vector<uint32_t> domainCounts;
346 GetTopDomains(domains, domainCounts);
347 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "BREAK", HiSysEvent::EventType::BEHAVIOR,
348 "TOTAL_COUNT", curTotalCount_, "TOTAL_SIZE", curTotalSize_, "REAL_SPEED", curRealSpeed,
349 "PROC_SPEED", curProcessSpeed, "AVG_REAL_TIME", avgRealTime, "AVG_PROC_TIME", avgProcessTime,
350 "AVG_WAIT_TIME", avgWaitTime, "TOP_EVENT", events, "TOP_EVENT_COUNT", eventCounts, "TOP_DOMAIN", domains,
351 "TOP_DOMAIN_COUNT", domainCounts);
352 if (ret != SUCCESS) {
353 HIVIEW_LOGE("failed to write BREAK event, ret is %{public}d", ret);
354 }
355 }
356
ReportRecoverProfile()357 void PlatformMonitor::ReportRecoverProfile()
358 {
359 // report break duration when recovery
360 int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
361 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "RECOVER", HiSysEvent::EventType::BEHAVIOR,
362 "DURATION", duration);
363 if (ret != SUCCESS) {
364 HIVIEW_LOGE("failed to write RECOVER event, ret is %{public}d", ret);
365 }
366 }
367
Breaking()368 void PlatformMonitor::Breaking()
369 {
370 // collect break count and duration every break
371 if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
372 return;
373 }
374
375 HIVIEW_LOGE("break as event reach critical size %{public}" PRId64, SysEvent::totalSize_.load());
376 breakTimestamp_ = TimeUtil::GenerateTimestamp();
377 ReportBreakProfile();
378 int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
379 while (true) {
380 if (SysEvent::totalSize_ <= recoveryBenchMark) {
381 break;
382 }
383 TimeUtil::Sleep(SLEEP_TEN_SECONDS);
384 }
385 breakCount_++;
386 recoverTimestamp_ = TimeUtil::GenerateTimestamp();
387 breakDuration_ += recoverTimestamp_ - breakTimestamp_;
388 HIVIEW_LOGW("recover after break duration %{public}" PRIu64, breakDuration_);
389 ReportRecoverProfile();
390 }
391
InitData()392 void PlatformMonitor::InitData()
393 {
394 MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
395 if (!monitorConfig.Parse()) {
396 return;
397 }
398
399 monitorConfig.ReadParam("collectPeriod", collectPeriod_);
400 monitorConfig.ReadParam("reportPeriod", reportPeriod_);
401 monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
402 monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
403 monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
404 }
405
StartMonitor(std::shared_ptr<EventLoop> looper)406 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
407 {
408 if (looper == nullptr) {
409 HIVIEW_LOGE("can not get share looper");
410 return;
411 }
412 InitData();
413
414 looper_ = looper;
415
416 auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
417 looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
418 auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
419 looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
420 }
421 } // namespace HiviewDFX
422 } // namespace OHOS