1 /*
2  * Copyright 2016, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23 
24 #include <array>
25 #include <deque>
26 #include <string>
27 #include <unordered_map>
28 #include <utility>
29 
30 #include <event2/event.h>
31 #include <event2/listener.h>
32 #include <event2/thread.h>
33 
34 #include <android-base/cmsg.h>
35 #include <android-base/logging.h>
36 #include <android-base/properties.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/unique_fd.h>
39 #include <cutils/sockets.h>
40 
41 #include "debuggerd/handler.h"
42 #include "dump_type.h"
43 #include "protocol.h"
44 #include "util.h"
45 
46 #include "intercept_manager.h"
47 
48 using android::base::GetIntProperty;
49 using android::base::SendFileDescriptors;
50 using android::base::StringPrintf;
51 
52 using android::base::borrowed_fd;
53 using android::base::unique_fd;
54 
55 static InterceptManager* intercept_manager;
56 
57 enum CrashStatus {
58   kCrashStatusRunning,
59   kCrashStatusQueued,
60 };
61 
62 struct CrashArtifact {
63   unique_fd fd;
64   std::optional<std::string> temporary_path;
65 
devnullCrashArtifact66   static CrashArtifact devnull() {
67     CrashArtifact result;
68     result.fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
69     return result;
70   }
71 };
72 
73 struct CrashArtifactPaths {
74   std::string text;
75   std::optional<std::string> proto;
76 };
77 
78 struct CrashOutput {
79   CrashArtifact text;
80   std::optional<CrashArtifact> proto;
81 };
82 
83 // Ownership of Crash is a bit messy.
84 // It's either owned by an active event that must have a timeout, or owned by
85 // queued_requests, in the case that multiple crashes come in at the same time.
86 struct Crash {
~CrashCrash87   ~Crash() { event_free(crash_event); }
88 
89   CrashOutput output;
90   unique_fd crash_socket_fd;
91   pid_t crash_pid;
92   event* crash_event = nullptr;
93 
94   DebuggerdDumpType crash_type;
95 };
96 
97 class CrashQueue {
98  public:
CrashQueue(const std::string & dir_path,const std::string & file_name_prefix,size_t max_artifacts,size_t max_concurrent_dumps,bool supports_proto)99   CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
100              size_t max_concurrent_dumps, bool supports_proto)
101       : file_name_prefix_(file_name_prefix),
102         dir_path_(dir_path),
103         dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
104         max_artifacts_(max_artifacts),
105         next_artifact_(0),
106         max_concurrent_dumps_(max_concurrent_dumps),
107         num_concurrent_dumps_(0),
108         supports_proto_(supports_proto) {
109     if (dir_fd_ == -1) {
110       PLOG(FATAL) << "failed to open directory: " << dir_path;
111     }
112 
113     // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
114     // same filename could be handed out to multiple processes.
115     CHECK(max_artifacts_ > max_concurrent_dumps_);
116 
117     find_oldest_artifact();
118   }
119 
for_crash(const Crash * crash)120   static CrashQueue* for_crash(const Crash* crash) {
121     return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
122   }
123 
for_crash(const std::unique_ptr<Crash> & crash)124   static CrashQueue* for_crash(const std::unique_ptr<Crash>& crash) {
125     return for_crash(crash.get());
126   }
127 
for_tombstones()128   static CrashQueue* for_tombstones() {
129     static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
130                             GetIntProperty("tombstoned.max_tombstone_count", 32),
131                             1 /* max_concurrent_dumps */, true /* supports_proto */);
132     return &queue;
133   }
134 
for_anrs()135   static CrashQueue* for_anrs() {
136     static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
137                             GetIntProperty("tombstoned.max_anr_count", 64),
138                             4 /* max_concurrent_dumps */, false /* supports_proto */);
139     return &queue;
140   }
141 
create_temporary_file() const142   CrashArtifact create_temporary_file() const {
143     CrashArtifact result;
144 
145     std::optional<std::string> path;
146     result.fd.reset(openat(dir_fd_, ".", O_WRONLY | O_APPEND | O_TMPFILE | O_CLOEXEC, 0660));
147     if (result.fd == -1) {
148       // We might not have O_TMPFILE. Try creating with an arbitrary filename instead.
149       static size_t counter = 0;
150       std::string tmp_filename = StringPrintf(".temporary%zu", counter++);
151       result.fd.reset(openat(dir_fd_, tmp_filename.c_str(),
152                              O_WRONLY | O_APPEND | O_CREAT | O_TRUNC | O_CLOEXEC, 0660));
153       if (result.fd == -1) {
154         PLOG(FATAL) << "failed to create temporary tombstone in " << dir_path_;
155       }
156 
157       result.temporary_path = std::move(tmp_filename);
158     }
159 
160     return std::move(result);
161   }
162 
get_output(DebuggerdDumpType dump_type)163   std::optional<CrashOutput> get_output(DebuggerdDumpType dump_type) {
164     CrashOutput result;
165 
166     switch (dump_type) {
167       case kDebuggerdNativeBacktrace:
168         // Don't generate tombstones for native backtrace requests.
169         return {};
170 
171       case kDebuggerdTombstoneProto:
172         if (!supports_proto_) {
173           LOG(ERROR) << "received kDebuggerdTombstoneProto on a queue that doesn't support proto";
174           return {};
175         }
176         result.proto = create_temporary_file();
177         result.text = create_temporary_file();
178         break;
179 
180       case kDebuggerdJavaBacktrace:
181       case kDebuggerdTombstone:
182         result.text = create_temporary_file();
183         break;
184 
185       default:
186         LOG(ERROR) << "unexpected dump type: " << dump_type;
187         return {};
188     }
189 
190     return result;
191   }
192 
dir_fd()193   borrowed_fd dir_fd() { return dir_fd_; }
194 
get_next_artifact_paths()195   CrashArtifactPaths get_next_artifact_paths() {
196     CrashArtifactPaths result;
197     result.text = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_);
198 
199     if (supports_proto_) {
200       result.proto = StringPrintf("%s%02d.pb", file_name_prefix_.c_str(), next_artifact_);
201     }
202 
203     next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
204     return result;
205   }
206 
207   // Consumes crash if it returns true, otherwise leaves it untouched.
maybe_enqueue_crash(std::unique_ptr<Crash> && crash)208   bool maybe_enqueue_crash(std::unique_ptr<Crash>&& crash) {
209     if (num_concurrent_dumps_ == max_concurrent_dumps_) {
210       queued_requests_.emplace_back(std::move(crash));
211       return true;
212     }
213 
214     return false;
215   }
216 
maybe_dequeue_crashes(void (* handler)(std::unique_ptr<Crash> crash))217   void maybe_dequeue_crashes(void (*handler)(std::unique_ptr<Crash> crash)) {
218     while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
219       std::unique_ptr<Crash> next_crash = std::move(queued_requests_.front());
220       queued_requests_.pop_front();
221       handler(std::move(next_crash));
222     }
223   }
224 
on_crash_started()225   void on_crash_started() { ++num_concurrent_dumps_; }
226 
on_crash_completed()227   void on_crash_completed() { --num_concurrent_dumps_; }
228 
229  private:
find_oldest_artifact()230   void find_oldest_artifact() {
231     size_t oldest_tombstone = 0;
232     time_t oldest_time = std::numeric_limits<time_t>::max();
233 
234     for (size_t i = 0; i < max_artifacts_; ++i) {
235       std::string path =
236           StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
237       struct stat st;
238       if (stat(path.c_str(), &st) != 0) {
239         if (errno == ENOENT) {
240           oldest_tombstone = i;
241           break;
242         } else {
243           PLOG(ERROR) << "failed to stat " << path;
244           continue;
245         }
246       }
247 
248       if (st.st_mtime < oldest_time) {
249         oldest_tombstone = i;
250         oldest_time = st.st_mtime;
251       }
252     }
253 
254     next_artifact_ = oldest_tombstone;
255   }
256 
257   const std::string file_name_prefix_;
258 
259   const std::string dir_path_;
260   const int dir_fd_;
261 
262   const size_t max_artifacts_;
263   int next_artifact_;
264 
265   const size_t max_concurrent_dumps_;
266   size_t num_concurrent_dumps_;
267 
268   bool supports_proto_;
269 
270   std::deque<std::unique_ptr<Crash>> queued_requests_;
271 
272   DISALLOW_COPY_AND_ASSIGN(CrashQueue);
273 };
274 
275 // Whether java trace dumps are produced via tombstoned.
276 static constexpr bool kJavaTraceDumpsEnabled = true;
277 
278 // Forward declare the callbacks so they can be placed in a sensible order.
279 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
280                             void*);
281 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
282 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
283 
perform_request(std::unique_ptr<Crash> crash)284 static void perform_request(std::unique_ptr<Crash> crash) {
285   unique_fd output_fd;
286   bool intercepted =
287       intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd);
288   if (intercepted) {
289     if (crash->crash_type == kDebuggerdTombstoneProto) {
290       crash->output.proto = CrashArtifact::devnull();
291     }
292   } else {
293     if (auto o = CrashQueue::for_crash(crash.get())->get_output(crash->crash_type); o) {
294       crash->output = std::move(*o);
295       output_fd.reset(dup(crash->output.text.fd));
296     } else {
297       LOG(ERROR) << "failed to get crash output for type " << crash->crash_type;
298       return;
299     }
300   }
301 
302   TombstonedCrashPacket response = {.packet_type = CrashPacketType::kPerformDump};
303 
304   ssize_t rc = -1;
305   if (crash->output.proto) {
306     rc = SendFileDescriptors(crash->crash_socket_fd, &response, sizeof(response), output_fd.get(),
307                              crash->output.proto->fd.get());
308   } else {
309     rc = SendFileDescriptors(crash->crash_socket_fd, &response, sizeof(response), output_fd.get());
310   }
311 
312   output_fd.reset();
313 
314   if (rc == -1) {
315     PLOG(WARNING) << "failed to send response to CrashRequest";
316     return;
317   } else if (rc != sizeof(response)) {
318     PLOG(WARNING) << "crash socket write returned short";
319     return;
320   }
321 
322   // TODO: Make this configurable by the interceptor?
323   struct timeval timeout = {10 * android::base::HwTimeoutMultiplier(), 0};
324 
325   event_base* base = event_get_base(crash->crash_event);
326 
327   event_assign(crash->crash_event, base, crash->crash_socket_fd, EV_TIMEOUT | EV_READ,
328                crash_completed_cb, crash.get());
329   event_add(crash->crash_event, &timeout);
330   CrashQueue::for_crash(crash)->on_crash_started();
331 
332   // The crash is now owned by the event loop.
333   crash.release();
334 }
335 
crash_accept_cb(evconnlistener * listener,evutil_socket_t sockfd,sockaddr *,int,void *)336 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
337                             void*) {
338   event_base* base = evconnlistener_get_base(listener);
339   Crash* crash = new Crash();
340 
341   // TODO: Make sure that only java crashes come in on the java socket
342   // and only native crashes on the native socket.
343   struct timeval timeout = {1 * android::base::HwTimeoutMultiplier(), 0};
344   event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
345   crash->crash_socket_fd.reset(sockfd);
346   crash->crash_event = crash_event;
347   event_add(crash_event, &timeout);
348 }
349 
crash_request_cb(evutil_socket_t sockfd,short ev,void * arg)350 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
351   std::unique_ptr<Crash> crash(static_cast<Crash*>(arg));
352   TombstonedCrashPacket request = {};
353 
354   if ((ev & EV_TIMEOUT) != 0) {
355     LOG(WARNING) << "crash request timed out";
356     return;
357   } else if ((ev & EV_READ) == 0) {
358     LOG(WARNING) << "tombstoned received unexpected event from crash socket";
359     return;
360   }
361 
362   ssize_t rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
363   if (rc == -1) {
364     PLOG(WARNING) << "failed to read from crash socket";
365     return;
366   } else if (rc != sizeof(request)) {
367     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
368                  << sizeof(request) << ")";
369     return;
370   }
371 
372   if (request.packet_type != CrashPacketType::kDumpRequest) {
373     LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
374                  << StringPrintf("%#2hhX", request.packet_type);
375     return;
376   }
377 
378   crash->crash_type = request.packet.dump_request.dump_type;
379   if (crash->crash_type < 0 || crash->crash_type > kDebuggerdTombstoneProto) {
380     LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
381     return;
382   }
383 
384   if (crash->crash_type != kDebuggerdJavaBacktrace) {
385     crash->crash_pid = request.packet.dump_request.pid;
386   } else {
387     // Requests for java traces are sent from untrusted processes, so we
388     // must not trust the PID sent down with the request. Instead, we ask the
389     // kernel.
390     ucred cr = {};
391     socklen_t len = sizeof(cr);
392     int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
393     if (ret != 0) {
394       PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
395       return;
396     }
397 
398     crash->crash_pid = cr.pid;
399   }
400 
401   pid_t crash_pid = crash->crash_pid;
402   LOG(INFO) << "received crash request for pid " << crash_pid;
403 
404   if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(std::move(crash))) {
405     LOG(INFO) << "enqueueing crash request for pid " << crash_pid;
406   } else {
407     perform_request(std::move(crash));
408   }
409 }
410 
rename_tombstone_fd(borrowed_fd fd,borrowed_fd dirfd,const std::string & path)411 static bool rename_tombstone_fd(borrowed_fd fd, borrowed_fd dirfd, const std::string& path) {
412   // Always try to unlink the tombstone file.
413   // linkat doesn't let us replace a file, so we need to unlink before linking
414   // our results onto disk, and if we fail for some reason, we should delete
415   // stale tombstones to avoid confusing inconsistency.
416   int rc = unlinkat(dirfd.get(), path.c_str(), 0);
417   if (rc != 0 && errno != ENOENT) {
418     PLOG(ERROR) << "failed to unlink tombstone at " << path;
419     return false;
420   }
421 
422   std::string fd_path = StringPrintf("/proc/self/fd/%d", fd.get());
423   rc = linkat(AT_FDCWD, fd_path.c_str(), dirfd.get(), path.c_str(), AT_SYMLINK_FOLLOW);
424   if (rc != 0) {
425     PLOG(ERROR) << "failed to link tombstone at " << path;
426     return false;
427   }
428   return true;
429 }
430 
crash_completed(borrowed_fd sockfd,std::unique_ptr<Crash> crash)431 static void crash_completed(borrowed_fd sockfd, std::unique_ptr<Crash> crash) {
432   TombstonedCrashPacket request = {};
433   CrashQueue* queue = CrashQueue::for_crash(crash);
434 
435   ssize_t rc = TEMP_FAILURE_RETRY(read(sockfd.get(), &request, sizeof(request)));
436   if (rc == -1) {
437     PLOG(WARNING) << "failed to read from crash socket";
438     return;
439   } else if (rc != sizeof(request)) {
440     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
441                  << sizeof(request) << ")";
442     return;
443   }
444 
445   if (request.packet_type != CrashPacketType::kCompletedDump) {
446     LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
447                  << uint32_t(request.packet_type);
448     return;
449   }
450 
451   if (crash->output.text.fd == -1) {
452     LOG(WARNING) << "skipping tombstone file creation due to intercept";
453     return;
454   }
455 
456   CrashArtifactPaths paths = queue->get_next_artifact_paths();
457 
458   if (rename_tombstone_fd(crash->output.text.fd, queue->dir_fd(), paths.text)) {
459     if (crash->crash_type == kDebuggerdJavaBacktrace) {
460       LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << paths.text;
461     } else {
462       // NOTE: Several tools parse this log message to figure out where the
463       // tombstone associated with a given native crash was written. Any changes
464       // to this message must be carefully considered.
465       LOG(ERROR) << "Tombstone written to: " << paths.text;
466     }
467   }
468 
469   if (crash->output.proto && crash->output.proto->fd != -1) {
470     if (!paths.proto) {
471       LOG(ERROR) << "missing path for proto tombstone";
472     } else {
473       rename_tombstone_fd(crash->output.proto->fd, queue->dir_fd(), *paths.proto);
474     }
475   }
476 
477   // If we don't have O_TMPFILE, we need to clean up after ourselves.
478   if (crash->output.text.temporary_path) {
479     rc = unlinkat(queue->dir_fd().get(), crash->output.text.temporary_path->c_str(), 0);
480     if (rc != 0) {
481       PLOG(ERROR) << "failed to unlink temporary tombstone at " << paths.text;
482     }
483   }
484   if (crash->output.proto && crash->output.proto->temporary_path) {
485     rc = unlinkat(queue->dir_fd().get(), crash->output.proto->temporary_path->c_str(), 0);
486     if (rc != 0) {
487       PLOG(ERROR) << "failed to unlink temporary proto tombstone";
488     }
489   }
490 }
491 
crash_completed_cb(evutil_socket_t sockfd,short ev,void * arg)492 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
493   std::unique_ptr<Crash> crash(static_cast<Crash*>(arg));
494   CrashQueue* queue = CrashQueue::for_crash(crash);
495 
496   queue->on_crash_completed();
497 
498   if ((ev & EV_READ) == EV_READ) {
499     crash_completed(sockfd, std::move(crash));
500   }
501 
502   // If there's something queued up, let them proceed.
503   queue->maybe_dequeue_crashes(perform_request);
504 }
505 
main(int,char * [])506 int main(int, char* []) {
507   umask(0117);
508 
509   // Don't try to connect to ourselves if we crash.
510   struct sigaction action = {};
511   action.sa_handler = [](int signal) {
512     LOG(ERROR) << "received fatal signal " << signal;
513     _exit(1);
514   };
515   debuggerd_register_handlers(&action);
516 
517   int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
518   int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
519 
520   if (intercept_socket == -1 || crash_socket == -1) {
521     PLOG(FATAL) << "failed to get socket from init";
522   }
523 
524   evutil_make_socket_nonblocking(intercept_socket);
525   evutil_make_socket_nonblocking(crash_socket);
526 
527   event_base* base = event_base_new();
528   if (!base) {
529     LOG(FATAL) << "failed to create event_base";
530   }
531 
532   intercept_manager = new InterceptManager(base, intercept_socket);
533 
534   evconnlistener* tombstone_listener =
535       evconnlistener_new(base, crash_accept_cb, CrashQueue::for_tombstones(), LEV_OPT_CLOSE_ON_FREE,
536                          -1 /* backlog */, crash_socket);
537   if (!tombstone_listener) {
538     LOG(FATAL) << "failed to create evconnlistener for tombstones.";
539   }
540 
541   if (kJavaTraceDumpsEnabled) {
542     const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
543     if (java_trace_socket == -1) {
544       PLOG(FATAL) << "failed to get socket from init";
545     }
546 
547     evutil_make_socket_nonblocking(java_trace_socket);
548     evconnlistener* java_trace_listener =
549         evconnlistener_new(base, crash_accept_cb, CrashQueue::for_anrs(), LEV_OPT_CLOSE_ON_FREE,
550                            -1 /* backlog */, java_trace_socket);
551     if (!java_trace_listener) {
552       LOG(FATAL) << "failed to create evconnlistener for java traces.";
553     }
554   }
555 
556   LOG(INFO) << "tombstoned successfully initialized";
557   event_base_dispatch(base);
558 }
559