1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "thread_sampler.h"
17 
18 #include <atomic>
19 #include <condition_variable>
20 #include <memory>
21 #include <queue>
22 #include <set>
23 #include <string>
24 
25 #include <sys/mman.h>
26 #include <sys/prctl.h>
27 #include <syscall.h>
28 #include <csignal>
29 
30 #include "unwinder.h"
31 #include "dfx_regs.h"
32 #include "dfx_elf.h"
33 #include "dfx_frame_formatter.h"
34 #include "sample_stack_printer.h"
35 #include "thread_sampler_utils.h"
36 #include "file_ex.h"
37 
38 #define NO_SANITIZER __attribute__((no_sanitize("address"), no_sanitize("hwaddress")))
39 
40 namespace OHOS {
41 namespace HiviewDFX {
ThreadSamplerSignalHandler(int sig,siginfo_t * si,void * context)42 void ThreadSampler::ThreadSamplerSignalHandler(int sig, siginfo_t* si, void* context)
43 {
44 #if defined(__aarch64__)
45     int preErrno = errno;
46     ThreadSampler::GetInstance().WriteContext(context);
47     errno = preErrno;
48 #endif
49 }
50 
ThreadSampler()51 ThreadSampler::ThreadSampler()
52 {
53     XCOLLIE_LOGI("Create ThreadSampler.\n");
54 }
55 
~ThreadSampler()56 ThreadSampler::~ThreadSampler()
57 {
58     XCOLLIE_LOGI("Destroy ThreadSampler.\n");
59 }
60 
FindUnwindTable(uintptr_t pc,UnwindTableInfo & outTableInfo,void * arg)61 int ThreadSampler::FindUnwindTable(uintptr_t pc, UnwindTableInfo& outTableInfo, void *arg)
62 {
63     UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
64     if (unwindInfo == nullptr) {
65         XCOLLIE_LOGE("invalid FindUnwindTable param\n");
66         return -1;
67     }
68 
69     std::shared_ptr<DfxMap> map;
70     if (unwindInfo->maps->FindMapByAddr(pc, map)) {
71         if (map == nullptr) {
72             XCOLLIE_LOGE("FindUnwindTable: map is nullptr\n");
73             return -1;
74         }
75         auto elf = map->GetElf(getpid());
76         if (elf != nullptr) {
77             return elf->FindUnwindTableInfo(pc, map, outTableInfo);
78         }
79     }
80     return -1;
81 }
82 
AccessMem(uintptr_t addr,uintptr_t * val,void * arg)83 int ThreadSampler::AccessMem(uintptr_t addr, uintptr_t *val, void *arg)
84 {
85     UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
86     if (unwindInfo == nullptr || addr + sizeof(uintptr_t) < addr) {
87         XCOLLIE_LOGE("invalid AccessMem param\n");
88         return -1;
89     }
90 
91     *val = 0;
92     if (addr < unwindInfo->context->sp ||
93         addr + sizeof(uintptr_t) >= unwindInfo->context->sp + STACK_BUFFER_SIZE) {
94         return ThreadSampler::GetInstance().AccessElfMem(addr, val);
95     } else {
96         size_t stackOffset = addr - unwindInfo->context->sp;
97         if (stackOffset >= STACK_BUFFER_SIZE) {
98             XCOLLIE_LOGE("limit stack\n");
99             return -1;
100         }
101         *val = *(reinterpret_cast<uintptr_t *>(&unwindInfo->context->buffer[stackOffset]));
102     }
103     return 0;
104 }
105 
GetMapByPc(uintptr_t pc,std::shared_ptr<DfxMap> & map,void * arg)106 int ThreadSampler::GetMapByPc(uintptr_t pc, std::shared_ptr<DfxMap>& map, void *arg)
107 {
108     UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
109     if (unwindInfo == nullptr) {
110         XCOLLIE_LOGE("invalid GetMapByPc param\n");
111         return -1;
112     }
113 
114     return unwindInfo->maps->FindMapByAddr(pc, map) ? 0 : -1;
115 }
116 
Init(int collectStackCount)117 bool ThreadSampler::Init(int collectStackCount)
118 {
119     if (init_) {
120         return true;
121     }
122 
123     if (!InitRecordBuffer()) {
124         XCOLLIE_LOGE("Failed to InitRecordBuffer\n");
125         Deinit();
126         return false;
127     }
128 
129     if (!InitUnwinder()) {
130         XCOLLIE_LOGE("Failed to InitUnwinder\n");
131         Deinit();
132         return false;
133     }
134 
135     pid_ = getprocpid();
136     if (!InitUniqueStackTable()) {
137         XCOLLIE_LOGE("Failed to InitUniqueStackTable\n");
138         Deinit();
139         return false;
140     }
141 
142     if (collectStackCount <= 0) {
143         XCOLLIE_LOGE("Invalid collectStackCount\n");
144         Deinit();
145         return false;
146     }
147     stackIdCount_.reserve(collectStackCount);
148 
149     init_ = true;
150     return true;
151 }
152 
InitRecordBuffer()153 bool ThreadSampler::InitRecordBuffer()
154 {
155     if (mmapStart_ != MAP_FAILED) {
156         return true;
157     }
158     // create buffer
159     bufferSize_ = SAMPLER_MAX_BUFFER_SZ * sizeof(struct ThreadUnwindContext);
160     mmapStart_ = mmap(nullptr, bufferSize_,
161         PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
162     if (mmapStart_ == MAP_FAILED) {
163         XCOLLIE_LOGE("Failed to create buffer for thread sampler!(%{public}d)\n", errno);
164         return false;
165     }
166 
167     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, mmapStart_, bufferSize_, "sampler_buf");
168     return true;
169 }
170 
ReleaseRecordBuffer()171 void ThreadSampler::ReleaseRecordBuffer()
172 {
173     if (mmapStart_ == MAP_FAILED) {
174         return;
175     }
176     // release buffer
177     if (munmap(mmapStart_, bufferSize_) != 0) {
178         XCOLLIE_LOGE("Failed to release buffer!(%{public}d)\n", errno);
179         return;
180     }
181     mmapStart_ = MAP_FAILED;
182 }
183 
InitUnwinder()184 bool ThreadSampler::InitUnwinder()
185 {
186     accessors_ = std::make_shared<OHOS::HiviewDFX::UnwindAccessors>();
187     accessors_->AccessReg = nullptr;
188     accessors_->AccessMem = &ThreadSampler::AccessMem;
189     accessors_->GetMapByPc = &ThreadSampler::GetMapByPc;
190     accessors_->FindUnwindTable = &ThreadSampler::FindUnwindTable;
191     unwinder_ = std::make_shared<Unwinder>(accessors_, true);
192 
193     maps_ = DfxMaps::Create();
194     if (maps_ == nullptr) {
195         XCOLLIE_LOGE("maps is nullptr\n");
196         return false;
197     }
198     if (!maps_->GetStackRange(stackBegin_, stackEnd_)) {
199         XCOLLIE_LOGE("Failed to get stack range\n");
200         return false;
201     }
202     return true;
203 }
204 
InitUniqueStackTable()205 bool ThreadSampler::InitUniqueStackTable()
206 {
207     uniqueStackTable_ = std::make_unique<UniqueStackTable>(pid_, uniqueStackTableSize_);
208     if (!uniqueStackTable_->Init()) {
209         XCOLLIE_LOGE("Failed to init unique_table\n");
210         return false;
211     }
212     void* uniqueTableBufMMap = reinterpret_cast<void*>(uniqueStackTable_->GetHeadNode());
213     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, uniqueTableBufMMap, uniqueStackTableSize_, uniTableMMapName_.c_str());
214     return true;
215 }
216 
DeinitUniqueStackTable()217 void ThreadSampler::DeinitUniqueStackTable()
218 {
219     uniqueStackTable_.reset();
220 }
221 
DestroyUnwinder()222 void ThreadSampler::DestroyUnwinder()
223 {
224     maps_.reset();
225     unwinder_.reset();
226     accessors_.reset();
227 }
228 
AccessElfMem(uintptr_t addr,uintptr_t * val)229 int ThreadSampler::AccessElfMem(uintptr_t addr, uintptr_t *val)
230 {
231     std::shared_ptr<DfxMap> map;
232     if (maps_->FindMapByAddr(addr, map)) {
233         if (map == nullptr) {
234             XCOLLIE_LOGE("AccessElfMem: map is nullptr\n");
235             return -1;
236         }
237         auto elf = map->GetElf(getpid());
238         if (elf != nullptr) {
239             uint64_t foff = addr - map->begin + map->offset - elf->GetBaseOffset();
240             if (elf->Read(foff, val, sizeof(uintptr_t))) {
241                 return 0;
242             }
243         }
244     }
245     return -1;
246 }
247 
GetReadContext()248 ThreadUnwindContext* ThreadSampler::GetReadContext()
249 {
250     if (mmapStart_ == MAP_FAILED) {
251         return nullptr;
252     }
253     ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
254     int32_t index = readIndex_;
255     if (contextArray[index].requestTime == 0 || contextArray[index].snapshotTime == 0) {
256         return nullptr;
257     }
258 
259     ThreadUnwindContext* ret = &contextArray[index];
260     readIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
261     return ret;
262 }
263 
GetWriteContext()264 ThreadUnwindContext* ThreadSampler::GetWriteContext()
265 {
266     if (mmapStart_ == MAP_FAILED) {
267         return nullptr;
268     }
269     ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
270     int32_t index = writeIndex_;
271     if (contextArray[index].requestTime > 0 &&
272         (contextArray[index].snapshotTime == 0 || contextArray[index].processTime == 0)) {
273         return nullptr;
274     }
275     return &contextArray[index];
276 }
277 
WriteContext(void * context)278 NO_SANITIZER void ThreadSampler::WriteContext(void* context)
279 {
280 #if defined(__aarch64__)
281     if (!init_) {
282         return;
283     }
284 #if defined(CONSUME_STATISTICS)
285     uint64_t begin = GetCurrentTimeNanoseconds();
286 #endif
287     if (mmapStart_ == MAP_FAILED) {
288         return;
289     }
290     ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
291     int32_t index = writeIndex_;
292 #if defined(CONSUME_STATISTICS)
293     signalTimeCost_ += begin - contextArray[index].requestTime;
294 #endif
295 
296     // current buffer has not been processed, stop copy
297     if (contextArray[index].snapshotTime > 0 && contextArray[index].processTime == 0) {
298         return;
299     }
300 
301     contextArray[index].fp = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_FP];
302     contextArray[index].lr = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_LR];
303     contextArray[index].sp = static_cast<ucontext_t*>(context)->uc_mcontext.sp;
304     contextArray[index].pc = static_cast<ucontext_t*>(context)->uc_mcontext.pc;
305     if (contextArray[index].sp < stackBegin_ ||
306         contextArray[index].sp >= stackEnd_) {
307         return;
308     }
309 
310     uintptr_t curStackSz = stackEnd_ - contextArray[index].sp;
311     uintptr_t cpySz = curStackSz  > STACK_BUFFER_SIZE ? STACK_BUFFER_SIZE : curStackSz;
312 
313     for (uintptr_t pos = 0; pos < cpySz; pos++) {
314         reinterpret_cast<char*>(contextArray[index].buffer)[pos] =
315             reinterpret_cast<const char*>(contextArray[index].sp)[pos];
316     }
317 
318     writeIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
319     uint64_t end = GetCurrentTimeNanoseconds();
320     contextArray[index].processTime.store(0, std::memory_order_relaxed);
321     contextArray[index].snapshotTime.store(end, std::memory_order_release);
322 
323 #if defined(CONSUME_STATISTICS)
324     copyStackCount_++;
325     copyStackTimeCost_ += end - begin;
326 #endif
327 #endif  // #if defined(__aarch64__)
328 }
329 
SendSampleRequest()330 void ThreadSampler::SendSampleRequest()
331 {
332     ThreadUnwindContext* ptr = GetWriteContext();
333     if (ptr == nullptr) {
334         return;
335     }
336 
337     uint64_t ts = GetCurrentTimeNanoseconds();
338 
339     ptr->requestTime = ts;
340     siginfo_t si {0};
341     si.si_signo = MUSL_SIGNAL_SAMPLE_STACK;
342     si.si_errno = 0;
343     si.si_code = -1;
344     if (syscall(SYS_rt_tgsigqueueinfo, pid_, pid_, si.si_signo, &si) != 0) {
345         XCOLLIE_LOGE("Failed to queue signal(%{public}d) to %{public}d, errno(%{public}d).\n",
346             si.si_signo, pid_, errno);
347         return;
348     }
349 #if defined (CONSUME_STATISTICS)
350     requestCount_++;
351 #endif
352 }
353 
ProcessStackBuffer()354 void ThreadSampler::ProcessStackBuffer()
355 {
356 #if defined(__aarch64__)
357     if (!init_) {
358         XCOLLIE_LOGE("sampler has not initialized.\n");
359         return;
360     }
361     while (true) {
362         ThreadUnwindContext* context = GetReadContext();
363         if (context == nullptr) {
364             break;
365         }
366 
367         UnwindInfo unwindInfo = {
368             .context = context,
369             .maps = maps_.get(),
370         };
371 
372         struct TimeAndFrames taf;
373         taf.requestTime = unwindInfo.context->requestTime;
374         taf.snapshotTime = unwindInfo.context->snapshotTime;
375 
376 #if defined(CONSUME_STATISTICS)
377         uint64_t unwindStart = GetCurrentTimeNanoseconds();
378 #endif
379         DoUnwind(unwinder_, unwindInfo);
380 #if defined(CONSUME_STATISTICS)
381         uint64_t unwindEnd = GetCurrentTimeNanoseconds();
382 #endif
383         /* for print full stack */
384         auto frames = unwinder_->GetFrames();
385         taf.frameList = frames;
386         timeAndFrameList_.emplace_back(taf);
387         /* for print tree format stack */
388         auto pcs = unwinder_->GetPcs();
389         uint64_t stackId = 0;
390         auto stackIdPtr = reinterpret_cast<OHOS::HiviewDFX::StackId*>(&stackId);
391         uniqueStackTable_->PutPcsInTable(stackIdPtr, pcs.data(), pcs.size());
392         PutStackId(stackIdCount_, stackId);
393 
394         uint64_t ts = GetCurrentTimeNanoseconds();
395 
396 #if defined(CONSUME_STATISTICS)
397         processTimeCost_ += ts - unwindStart;
398         processCount_++;
399         unwindCount_++;
400         unwindTimeCost_ += unwindEnd - unwindStart;
401 #endif  //#if defined(CONSUME_STATISTICS)
402         context->requestTime.store(0, std::memory_order_release);
403         context->snapshotTime.store(0, std::memory_order_release);
404         context->processTime.store(ts, std::memory_order_release);
405     }
406 #endif  // #if defined(__aarch64__)
407 }
408 
Sample()409 int32_t ThreadSampler::Sample()
410 {
411     if (!init_) {
412         XCOLLIE_LOGE("sampler has not initialized.\n");
413         return -1;
414     }
415 #if defined(CONSUME_STATISTICS)
416     sampleCount_++;
417 #endif
418     SendSampleRequest();
419     ProcessStackBuffer();
420     return 0;
421 }
422 
ResetConsumeInfo()423 void ThreadSampler::ResetConsumeInfo()
424 {
425 #if defined(CONSUME_STATISTICS)
426     sampleCount_ = 0;
427     requestCount_ = 0;
428     copyStackCount_ = 0;
429     copyStackTimeCost_ = 0;
430     processTimeCost_ = 0;
431     processCount_ = 0;
432     unwindCount_ = 0;
433     unwindTimeCost_ = 0;
434     signalTimeCost_ = 0;
435 #endif // #if defined(CONSUME_STATISTICS)
436 }
437 
CollectStack(std::string & stack,bool treeFormat)438 bool ThreadSampler::CollectStack(std::string& stack, bool treeFormat)
439 {
440     ProcessStackBuffer();
441 
442     if (!init_) {
443         XCOLLIE_LOGE("sampler has not initialized.\n");
444     }
445 
446     stack.clear();
447     if (timeAndFrameList_.empty() && stackIdCount_.empty()) {
448         if (!LoadStringFromFile("/proc/self/wchan", stack)) {
449             XCOLLIE_LOGE("read file failed.\n");
450         }
451         if (stack.empty()) {
452             stack += "empty";
453         }
454         stack += "\n";
455 #if defined(CONSUME_STATISTICS)
456         ResetConsumeInfo();
457 #endif
458         return false;
459     }
460 
461 #if defined(CONSUME_STATISTICS)
462     uint64_t collectStart = GetCurrentTimeNanoseconds();
463 #endif
464     auto printer = std::make_unique<SampleStackPrinter>(unwinder_, maps_);
465     if (!treeFormat) {
466         stack = printer->GetFullStack(timeAndFrameList_);
467     } else {
468         stack = printer->GetTreeStack(stackIdCount_, uniqueStackTable_);
469     }
470     timeAndFrameList_.clear();
471     stackIdCount_.clear();
472 
473 #if defined(CONSUME_STATISTICS)
474     uint64_t collectEnd = GetCurrentTimeNanoseconds();
475     uint64_t elapse = collectEnd - collectStart;
476     XCOLLIE_LOGI("Sample count:%{public}llu\nRequest count:%{public}llu\n\
477         Snapshot count:%{public}llu\nAverage copy stack time:%{public}llu ns\n",
478         (unsigned long long)sampleCount_, (unsigned long long)requestCount_,
479         (unsigned long long)copyStackCount_, (unsigned long long)copyStackTimeCost_ / copyStackCount_);
480     XCOLLIE_LOGI("Average process time:%{public}llu ns\n", (unsigned long long)processTimeCost_/processCount_);
481     XCOLLIE_LOGI("Average unwind time:%{public}llu ns\n", (unsigned long long)unwindTimeCost_/unwindCount_);
482     XCOLLIE_LOGI("FormatStack time:%{public}llu ns\n", (unsigned long long)elapse);
483     ResetConsumeInfo();
484 #endif
485     return true;
486 }
487 
Deinit()488 bool ThreadSampler::Deinit()
489 {
490     if (!init_) {
491         return true;
492     }
493     DeinitUniqueStackTable();
494     DestroyUnwinder();
495     ReleaseRecordBuffer();
496     init_ = false;
497     return !init_;
498 }
499 } // end of namespace HiviewDFX
500 } // end of namespace OHOS
501