1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "thread_sampler.h"
17
18 #include <atomic>
19 #include <condition_variable>
20 #include <memory>
21 #include <queue>
22 #include <set>
23 #include <string>
24
25 #include <sys/mman.h>
26 #include <sys/prctl.h>
27 #include <syscall.h>
28 #include <csignal>
29
30 #include "unwinder.h"
31 #include "dfx_regs.h"
32 #include "dfx_elf.h"
33 #include "dfx_frame_formatter.h"
34 #include "sample_stack_printer.h"
35 #include "thread_sampler_utils.h"
36 #include "file_ex.h"
37
38 #define NO_SANITIZER __attribute__((no_sanitize("address"), no_sanitize("hwaddress")))
39
40 namespace OHOS {
41 namespace HiviewDFX {
ThreadSamplerSignalHandler(int sig,siginfo_t * si,void * context)42 void ThreadSampler::ThreadSamplerSignalHandler(int sig, siginfo_t* si, void* context)
43 {
44 #if defined(__aarch64__)
45 int preErrno = errno;
46 ThreadSampler::GetInstance().WriteContext(context);
47 errno = preErrno;
48 #endif
49 }
50
ThreadSampler()51 ThreadSampler::ThreadSampler()
52 {
53 XCOLLIE_LOGI("Create ThreadSampler.\n");
54 }
55
~ThreadSampler()56 ThreadSampler::~ThreadSampler()
57 {
58 XCOLLIE_LOGI("Destroy ThreadSampler.\n");
59 }
60
FindUnwindTable(uintptr_t pc,UnwindTableInfo & outTableInfo,void * arg)61 int ThreadSampler::FindUnwindTable(uintptr_t pc, UnwindTableInfo& outTableInfo, void *arg)
62 {
63 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
64 if (unwindInfo == nullptr) {
65 XCOLLIE_LOGE("invalid FindUnwindTable param\n");
66 return -1;
67 }
68
69 std::shared_ptr<DfxMap> map;
70 if (unwindInfo->maps->FindMapByAddr(pc, map)) {
71 if (map == nullptr) {
72 XCOLLIE_LOGE("FindUnwindTable: map is nullptr\n");
73 return -1;
74 }
75 auto elf = map->GetElf(getpid());
76 if (elf != nullptr) {
77 return elf->FindUnwindTableInfo(pc, map, outTableInfo);
78 }
79 }
80 return -1;
81 }
82
AccessMem(uintptr_t addr,uintptr_t * val,void * arg)83 int ThreadSampler::AccessMem(uintptr_t addr, uintptr_t *val, void *arg)
84 {
85 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
86 if (unwindInfo == nullptr || addr + sizeof(uintptr_t) < addr) {
87 XCOLLIE_LOGE("invalid AccessMem param\n");
88 return -1;
89 }
90
91 *val = 0;
92 if (addr < unwindInfo->context->sp ||
93 addr + sizeof(uintptr_t) >= unwindInfo->context->sp + STACK_BUFFER_SIZE) {
94 return ThreadSampler::GetInstance().AccessElfMem(addr, val);
95 } else {
96 size_t stackOffset = addr - unwindInfo->context->sp;
97 if (stackOffset >= STACK_BUFFER_SIZE) {
98 XCOLLIE_LOGE("limit stack\n");
99 return -1;
100 }
101 *val = *(reinterpret_cast<uintptr_t *>(&unwindInfo->context->buffer[stackOffset]));
102 }
103 return 0;
104 }
105
GetMapByPc(uintptr_t pc,std::shared_ptr<DfxMap> & map,void * arg)106 int ThreadSampler::GetMapByPc(uintptr_t pc, std::shared_ptr<DfxMap>& map, void *arg)
107 {
108 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
109 if (unwindInfo == nullptr) {
110 XCOLLIE_LOGE("invalid GetMapByPc param\n");
111 return -1;
112 }
113
114 return unwindInfo->maps->FindMapByAddr(pc, map) ? 0 : -1;
115 }
116
Init(int collectStackCount)117 bool ThreadSampler::Init(int collectStackCount)
118 {
119 if (init_) {
120 return true;
121 }
122
123 if (!InitRecordBuffer()) {
124 XCOLLIE_LOGE("Failed to InitRecordBuffer\n");
125 Deinit();
126 return false;
127 }
128
129 if (!InitUnwinder()) {
130 XCOLLIE_LOGE("Failed to InitUnwinder\n");
131 Deinit();
132 return false;
133 }
134
135 pid_ = getprocpid();
136 if (!InitUniqueStackTable()) {
137 XCOLLIE_LOGE("Failed to InitUniqueStackTable\n");
138 Deinit();
139 return false;
140 }
141
142 if (collectStackCount <= 0) {
143 XCOLLIE_LOGE("Invalid collectStackCount\n");
144 Deinit();
145 return false;
146 }
147 stackIdCount_.reserve(collectStackCount);
148
149 init_ = true;
150 return true;
151 }
152
InitRecordBuffer()153 bool ThreadSampler::InitRecordBuffer()
154 {
155 if (mmapStart_ != MAP_FAILED) {
156 return true;
157 }
158 // create buffer
159 bufferSize_ = SAMPLER_MAX_BUFFER_SZ * sizeof(struct ThreadUnwindContext);
160 mmapStart_ = mmap(nullptr, bufferSize_,
161 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
162 if (mmapStart_ == MAP_FAILED) {
163 XCOLLIE_LOGE("Failed to create buffer for thread sampler!(%{public}d)\n", errno);
164 return false;
165 }
166
167 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, mmapStart_, bufferSize_, "sampler_buf");
168 return true;
169 }
170
ReleaseRecordBuffer()171 void ThreadSampler::ReleaseRecordBuffer()
172 {
173 if (mmapStart_ == MAP_FAILED) {
174 return;
175 }
176 // release buffer
177 if (munmap(mmapStart_, bufferSize_) != 0) {
178 XCOLLIE_LOGE("Failed to release buffer!(%{public}d)\n", errno);
179 return;
180 }
181 mmapStart_ = MAP_FAILED;
182 }
183
InitUnwinder()184 bool ThreadSampler::InitUnwinder()
185 {
186 accessors_ = std::make_shared<OHOS::HiviewDFX::UnwindAccessors>();
187 accessors_->AccessReg = nullptr;
188 accessors_->AccessMem = &ThreadSampler::AccessMem;
189 accessors_->GetMapByPc = &ThreadSampler::GetMapByPc;
190 accessors_->FindUnwindTable = &ThreadSampler::FindUnwindTable;
191 unwinder_ = std::make_shared<Unwinder>(accessors_, true);
192
193 maps_ = DfxMaps::Create();
194 if (maps_ == nullptr) {
195 XCOLLIE_LOGE("maps is nullptr\n");
196 return false;
197 }
198 if (!maps_->GetStackRange(stackBegin_, stackEnd_)) {
199 XCOLLIE_LOGE("Failed to get stack range\n");
200 return false;
201 }
202 return true;
203 }
204
InitUniqueStackTable()205 bool ThreadSampler::InitUniqueStackTable()
206 {
207 uniqueStackTable_ = std::make_unique<UniqueStackTable>(pid_, uniqueStackTableSize_);
208 if (!uniqueStackTable_->Init()) {
209 XCOLLIE_LOGE("Failed to init unique_table\n");
210 return false;
211 }
212 void* uniqueTableBufMMap = reinterpret_cast<void*>(uniqueStackTable_->GetHeadNode());
213 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, uniqueTableBufMMap, uniqueStackTableSize_, uniTableMMapName_.c_str());
214 return true;
215 }
216
DeinitUniqueStackTable()217 void ThreadSampler::DeinitUniqueStackTable()
218 {
219 uniqueStackTable_.reset();
220 }
221
DestroyUnwinder()222 void ThreadSampler::DestroyUnwinder()
223 {
224 maps_.reset();
225 unwinder_.reset();
226 accessors_.reset();
227 }
228
AccessElfMem(uintptr_t addr,uintptr_t * val)229 int ThreadSampler::AccessElfMem(uintptr_t addr, uintptr_t *val)
230 {
231 std::shared_ptr<DfxMap> map;
232 if (maps_->FindMapByAddr(addr, map)) {
233 if (map == nullptr) {
234 XCOLLIE_LOGE("AccessElfMem: map is nullptr\n");
235 return -1;
236 }
237 auto elf = map->GetElf(getpid());
238 if (elf != nullptr) {
239 uint64_t foff = addr - map->begin + map->offset - elf->GetBaseOffset();
240 if (elf->Read(foff, val, sizeof(uintptr_t))) {
241 return 0;
242 }
243 }
244 }
245 return -1;
246 }
247
GetReadContext()248 ThreadUnwindContext* ThreadSampler::GetReadContext()
249 {
250 if (mmapStart_ == MAP_FAILED) {
251 return nullptr;
252 }
253 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
254 int32_t index = readIndex_;
255 if (contextArray[index].requestTime == 0 || contextArray[index].snapshotTime == 0) {
256 return nullptr;
257 }
258
259 ThreadUnwindContext* ret = &contextArray[index];
260 readIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
261 return ret;
262 }
263
GetWriteContext()264 ThreadUnwindContext* ThreadSampler::GetWriteContext()
265 {
266 if (mmapStart_ == MAP_FAILED) {
267 return nullptr;
268 }
269 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
270 int32_t index = writeIndex_;
271 if (contextArray[index].requestTime > 0 &&
272 (contextArray[index].snapshotTime == 0 || contextArray[index].processTime == 0)) {
273 return nullptr;
274 }
275 return &contextArray[index];
276 }
277
WriteContext(void * context)278 NO_SANITIZER void ThreadSampler::WriteContext(void* context)
279 {
280 #if defined(__aarch64__)
281 if (!init_) {
282 return;
283 }
284 #if defined(CONSUME_STATISTICS)
285 uint64_t begin = GetCurrentTimeNanoseconds();
286 #endif
287 if (mmapStart_ == MAP_FAILED) {
288 return;
289 }
290 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
291 int32_t index = writeIndex_;
292 #if defined(CONSUME_STATISTICS)
293 signalTimeCost_ += begin - contextArray[index].requestTime;
294 #endif
295
296 // current buffer has not been processed, stop copy
297 if (contextArray[index].snapshotTime > 0 && contextArray[index].processTime == 0) {
298 return;
299 }
300
301 contextArray[index].fp = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_FP];
302 contextArray[index].lr = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_LR];
303 contextArray[index].sp = static_cast<ucontext_t*>(context)->uc_mcontext.sp;
304 contextArray[index].pc = static_cast<ucontext_t*>(context)->uc_mcontext.pc;
305 if (contextArray[index].sp < stackBegin_ ||
306 contextArray[index].sp >= stackEnd_) {
307 return;
308 }
309
310 uintptr_t curStackSz = stackEnd_ - contextArray[index].sp;
311 uintptr_t cpySz = curStackSz > STACK_BUFFER_SIZE ? STACK_BUFFER_SIZE : curStackSz;
312
313 for (uintptr_t pos = 0; pos < cpySz; pos++) {
314 reinterpret_cast<char*>(contextArray[index].buffer)[pos] =
315 reinterpret_cast<const char*>(contextArray[index].sp)[pos];
316 }
317
318 writeIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
319 uint64_t end = GetCurrentTimeNanoseconds();
320 contextArray[index].processTime.store(0, std::memory_order_relaxed);
321 contextArray[index].snapshotTime.store(end, std::memory_order_release);
322
323 #if defined(CONSUME_STATISTICS)
324 copyStackCount_++;
325 copyStackTimeCost_ += end - begin;
326 #endif
327 #endif // #if defined(__aarch64__)
328 }
329
SendSampleRequest()330 void ThreadSampler::SendSampleRequest()
331 {
332 ThreadUnwindContext* ptr = GetWriteContext();
333 if (ptr == nullptr) {
334 return;
335 }
336
337 uint64_t ts = GetCurrentTimeNanoseconds();
338
339 ptr->requestTime = ts;
340 siginfo_t si {0};
341 si.si_signo = MUSL_SIGNAL_SAMPLE_STACK;
342 si.si_errno = 0;
343 si.si_code = -1;
344 if (syscall(SYS_rt_tgsigqueueinfo, pid_, pid_, si.si_signo, &si) != 0) {
345 XCOLLIE_LOGE("Failed to queue signal(%{public}d) to %{public}d, errno(%{public}d).\n",
346 si.si_signo, pid_, errno);
347 return;
348 }
349 #if defined (CONSUME_STATISTICS)
350 requestCount_++;
351 #endif
352 }
353
ProcessStackBuffer()354 void ThreadSampler::ProcessStackBuffer()
355 {
356 #if defined(__aarch64__)
357 if (!init_) {
358 XCOLLIE_LOGE("sampler has not initialized.\n");
359 return;
360 }
361 while (true) {
362 ThreadUnwindContext* context = GetReadContext();
363 if (context == nullptr) {
364 break;
365 }
366
367 UnwindInfo unwindInfo = {
368 .context = context,
369 .maps = maps_.get(),
370 };
371
372 struct TimeAndFrames taf;
373 taf.requestTime = unwindInfo.context->requestTime;
374 taf.snapshotTime = unwindInfo.context->snapshotTime;
375
376 #if defined(CONSUME_STATISTICS)
377 uint64_t unwindStart = GetCurrentTimeNanoseconds();
378 #endif
379 DoUnwind(unwinder_, unwindInfo);
380 #if defined(CONSUME_STATISTICS)
381 uint64_t unwindEnd = GetCurrentTimeNanoseconds();
382 #endif
383 /* for print full stack */
384 auto frames = unwinder_->GetFrames();
385 taf.frameList = frames;
386 timeAndFrameList_.emplace_back(taf);
387 /* for print tree format stack */
388 auto pcs = unwinder_->GetPcs();
389 uint64_t stackId = 0;
390 auto stackIdPtr = reinterpret_cast<OHOS::HiviewDFX::StackId*>(&stackId);
391 uniqueStackTable_->PutPcsInTable(stackIdPtr, pcs.data(), pcs.size());
392 PutStackId(stackIdCount_, stackId);
393
394 uint64_t ts = GetCurrentTimeNanoseconds();
395
396 #if defined(CONSUME_STATISTICS)
397 processTimeCost_ += ts - unwindStart;
398 processCount_++;
399 unwindCount_++;
400 unwindTimeCost_ += unwindEnd - unwindStart;
401 #endif //#if defined(CONSUME_STATISTICS)
402 context->requestTime.store(0, std::memory_order_release);
403 context->snapshotTime.store(0, std::memory_order_release);
404 context->processTime.store(ts, std::memory_order_release);
405 }
406 #endif // #if defined(__aarch64__)
407 }
408
Sample()409 int32_t ThreadSampler::Sample()
410 {
411 if (!init_) {
412 XCOLLIE_LOGE("sampler has not initialized.\n");
413 return -1;
414 }
415 #if defined(CONSUME_STATISTICS)
416 sampleCount_++;
417 #endif
418 SendSampleRequest();
419 ProcessStackBuffer();
420 return 0;
421 }
422
ResetConsumeInfo()423 void ThreadSampler::ResetConsumeInfo()
424 {
425 #if defined(CONSUME_STATISTICS)
426 sampleCount_ = 0;
427 requestCount_ = 0;
428 copyStackCount_ = 0;
429 copyStackTimeCost_ = 0;
430 processTimeCost_ = 0;
431 processCount_ = 0;
432 unwindCount_ = 0;
433 unwindTimeCost_ = 0;
434 signalTimeCost_ = 0;
435 #endif // #if defined(CONSUME_STATISTICS)
436 }
437
CollectStack(std::string & stack,bool treeFormat)438 bool ThreadSampler::CollectStack(std::string& stack, bool treeFormat)
439 {
440 ProcessStackBuffer();
441
442 if (!init_) {
443 XCOLLIE_LOGE("sampler has not initialized.\n");
444 }
445
446 stack.clear();
447 if (timeAndFrameList_.empty() && stackIdCount_.empty()) {
448 if (!LoadStringFromFile("/proc/self/wchan", stack)) {
449 XCOLLIE_LOGE("read file failed.\n");
450 }
451 if (stack.empty()) {
452 stack += "empty";
453 }
454 stack += "\n";
455 #if defined(CONSUME_STATISTICS)
456 ResetConsumeInfo();
457 #endif
458 return false;
459 }
460
461 #if defined(CONSUME_STATISTICS)
462 uint64_t collectStart = GetCurrentTimeNanoseconds();
463 #endif
464 auto printer = std::make_unique<SampleStackPrinter>(unwinder_, maps_);
465 if (!treeFormat) {
466 stack = printer->GetFullStack(timeAndFrameList_);
467 } else {
468 stack = printer->GetTreeStack(stackIdCount_, uniqueStackTable_);
469 }
470 timeAndFrameList_.clear();
471 stackIdCount_.clear();
472
473 #if defined(CONSUME_STATISTICS)
474 uint64_t collectEnd = GetCurrentTimeNanoseconds();
475 uint64_t elapse = collectEnd - collectStart;
476 XCOLLIE_LOGI("Sample count:%{public}llu\nRequest count:%{public}llu\n\
477 Snapshot count:%{public}llu\nAverage copy stack time:%{public}llu ns\n",
478 (unsigned long long)sampleCount_, (unsigned long long)requestCount_,
479 (unsigned long long)copyStackCount_, (unsigned long long)copyStackTimeCost_ / copyStackCount_);
480 XCOLLIE_LOGI("Average process time:%{public}llu ns\n", (unsigned long long)processTimeCost_/processCount_);
481 XCOLLIE_LOGI("Average unwind time:%{public}llu ns\n", (unsigned long long)unwindTimeCost_/unwindCount_);
482 XCOLLIE_LOGI("FormatStack time:%{public}llu ns\n", (unsigned long long)elapse);
483 ResetConsumeInfo();
484 #endif
485 return true;
486 }
487
Deinit()488 bool ThreadSampler::Deinit()
489 {
490 if (!init_) {
491 return true;
492 }
493 DeinitUniqueStackTable();
494 DestroyUnwinder();
495 ReleaseRecordBuffer();
496 init_ = false;
497 return !init_;
498 }
499 } // end of namespace HiviewDFX
500 } // end of namespace OHOS
501