1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <arpa/inet.h>
18 #include <cutils/sockets.h>
19 #include <errno.h>
20 #include <netinet/in.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/socket.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 
28 #include <android-base/cmsg.h>
29 #include <android-base/logging.h>
30 #include <android-base/properties.h>
31 #include <android-base/scopeguard.h>
32 #include <android-base/strings.h>
33 #include <fs_mgr/file_wait.h>
34 #include <snapuserd/snapuserd_client.h>
35 #include "snapuserd_server.h"
36 
37 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
38 #include <sys/_system_properties.h>
39 
40 namespace android {
41 namespace snapshot {
42 
43 using namespace std::string_literals;
44 
45 using android::base::borrowed_fd;
46 using android::base::unique_fd;
47 
Resolveop(std::string & input)48 DaemonOps UserSnapshotServer::Resolveop(std::string& input) {
49     if (input == "init") return DaemonOps::INIT;
50     if (input == "start") return DaemonOps::START;
51     if (input == "stop") return DaemonOps::STOP;
52     if (input == "query") return DaemonOps::QUERY;
53     if (input == "delete") return DaemonOps::DELETE;
54     if (input == "detach") return DaemonOps::DETACH;
55     if (input == "supports") return DaemonOps::SUPPORTS;
56     if (input == "initiate_merge") return DaemonOps::INITIATE;
57     if (input == "merge_percent") return DaemonOps::PERCENTAGE;
58     if (input == "getstatus") return DaemonOps::GETSTATUS;
59     if (input == "update-verify") return DaemonOps::UPDATE_VERIFY;
60 
61     return DaemonOps::INVALID;
62 }
63 
UserSnapshotServer()64 UserSnapshotServer::UserSnapshotServer() {
65     monitor_merge_event_fd_.reset(eventfd(0, EFD_CLOEXEC));
66     if (monitor_merge_event_fd_ == -1) {
67         PLOG(FATAL) << "monitor_merge_event_fd_: failed to create eventfd";
68     }
69     terminating_ = false;
70 }
71 
~UserSnapshotServer()72 UserSnapshotServer::~UserSnapshotServer() {
73     // Close any client sockets that were added via AcceptClient().
74     for (size_t i = 1; i < watched_fds_.size(); i++) {
75         close(watched_fds_[i].fd);
76     }
77 }
78 
GetDaemonStatus()79 std::string UserSnapshotServer::GetDaemonStatus() {
80     std::string msg = "";
81 
82     if (IsTerminating())
83         msg = "passive";
84     else
85         msg = "active";
86 
87     return msg;
88 }
89 
Parsemsg(std::string const & msg,const char delim,std::vector<std::string> & out)90 void UserSnapshotServer::Parsemsg(std::string const& msg, const char delim,
91                                   std::vector<std::string>& out) {
92     std::stringstream ss(msg);
93     std::string s;
94 
95     while (std::getline(ss, s, delim)) {
96         out.push_back(s);
97     }
98 }
99 
ShutdownThreads()100 void UserSnapshotServer::ShutdownThreads() {
101     terminating_ = true;
102     JoinAllThreads();
103 }
104 
HandlerThread(std::shared_ptr<SnapshotHandler> snapuserd)105 HandlerThread::HandlerThread(std::shared_ptr<SnapshotHandler> snapuserd)
106     : snapuserd_(snapuserd), misc_name_(snapuserd_->GetMiscName()) {}
107 
Sendmsg(android::base::borrowed_fd fd,const std::string & msg)108 bool UserSnapshotServer::Sendmsg(android::base::borrowed_fd fd, const std::string& msg) {
109     ssize_t ret = TEMP_FAILURE_RETRY(send(fd.get(), msg.data(), msg.size(), MSG_NOSIGNAL));
110     if (ret < 0) {
111         PLOG(ERROR) << "Snapuserd:server: send() failed";
112         return false;
113     }
114 
115     if (ret < msg.size()) {
116         LOG(ERROR) << "Partial send; expected " << msg.size() << " bytes, sent " << ret;
117         return false;
118     }
119     return true;
120 }
121 
Recv(android::base::borrowed_fd fd,std::string * data)122 bool UserSnapshotServer::Recv(android::base::borrowed_fd fd, std::string* data) {
123     char msg[kMaxPacketSize];
124     ssize_t rv = TEMP_FAILURE_RETRY(recv(fd.get(), msg, sizeof(msg), 0));
125     if (rv < 0) {
126         PLOG(ERROR) << "recv failed";
127         return false;
128     }
129     *data = std::string(msg, rv);
130     return true;
131 }
132 
Receivemsg(android::base::borrowed_fd fd,const std::string & str)133 bool UserSnapshotServer::Receivemsg(android::base::borrowed_fd fd, const std::string& str) {
134     const char delim = ',';
135 
136     std::vector<std::string> out;
137     Parsemsg(str, delim, out);
138     DaemonOps op = Resolveop(out[0]);
139 
140     switch (op) {
141         case DaemonOps::INIT: {
142             // Message format:
143             // init,<misc_name>,<cow_device_path>,<backing_device>,<base_path_merge>
144             //
145             // Reads the metadata and send the number of sectors
146             if (out.size() != 5) {
147                 LOG(ERROR) << "Malformed init message, " << out.size() << " parts";
148                 return Sendmsg(fd, "fail");
149             }
150 
151             auto handler = AddHandler(out[1], out[2], out[3], out[4]);
152             if (!handler) {
153                 return Sendmsg(fd, "fail");
154             }
155 
156             auto retval = "success," + std::to_string(handler->snapuserd()->GetNumSectors());
157             return Sendmsg(fd, retval);
158         }
159         case DaemonOps::START: {
160             // Message format:
161             // start,<misc_name>
162             //
163             // Start the new thread which binds to dm-user misc device
164             if (out.size() != 2) {
165                 LOG(ERROR) << "Malformed start message, " << out.size() << " parts";
166                 return Sendmsg(fd, "fail");
167             }
168 
169             std::lock_guard<std::mutex> lock(lock_);
170             auto iter = FindHandler(&lock, out[1]);
171             if (iter == dm_users_.end()) {
172                 LOG(ERROR) << "Could not find handler: " << out[1];
173                 return Sendmsg(fd, "fail");
174             }
175             if (!(*iter)->snapuserd() || (*iter)->snapuserd()->IsAttached()) {
176                 LOG(ERROR) << "Tried to re-attach control device: " << out[1];
177                 return Sendmsg(fd, "fail");
178             }
179             if (!StartHandler(*iter)) {
180                 return Sendmsg(fd, "fail");
181             }
182             return Sendmsg(fd, "success");
183         }
184         case DaemonOps::STOP: {
185             // Message format: stop
186             //
187             // Stop all the threads gracefully and then shutdown the
188             // main thread
189             SetTerminating();
190             ShutdownThreads();
191             return true;
192         }
193         case DaemonOps::QUERY: {
194             // Message format: query
195             //
196             // As part of transition, Second stage daemon will be
197             // created before terminating the first stage daemon. Hence,
198             // for a brief period client may have to distiguish between
199             // first stage daemon and second stage daemon.
200             //
201             // Second stage daemon is marked as active and hence will
202             // be ready to receive control message.
203             return Sendmsg(fd, GetDaemonStatus());
204         }
205         case DaemonOps::DELETE: {
206             // Message format:
207             // delete,<misc_name>
208             if (out.size() != 2) {
209                 LOG(ERROR) << "Malformed delete message, " << out.size() << " parts";
210                 return Sendmsg(fd, "fail");
211             }
212             {
213                 std::lock_guard<std::mutex> lock(lock_);
214                 auto iter = FindHandler(&lock, out[1]);
215                 if (iter == dm_users_.end()) {
216                     // After merge is completed, we swap dm-user table with
217                     // the underlying dm-linear base device. Hence, worker
218                     // threads would have terminted and was removed from
219                     // the list.
220                     LOG(DEBUG) << "Could not find handler: " << out[1];
221                     return Sendmsg(fd, "success");
222                 }
223 
224                 if (!(*iter)->ThreadTerminated()) {
225                     (*iter)->snapuserd()->NotifyIOTerminated();
226                 }
227             }
228             if (!RemoveAndJoinHandler(out[1])) {
229                 return Sendmsg(fd, "fail");
230             }
231             return Sendmsg(fd, "success");
232         }
233         case DaemonOps::DETACH: {
234             std::lock_guard<std::mutex> lock(lock_);
235             TerminateMergeThreads(&lock);
236             terminating_ = true;
237             return true;
238         }
239         case DaemonOps::SUPPORTS: {
240             if (out.size() != 2) {
241                 LOG(ERROR) << "Malformed supports message, " << out.size() << " parts";
242                 return Sendmsg(fd, "fail");
243             }
244             if (out[1] == "second_stage_socket_handoff") {
245                 return Sendmsg(fd, "success");
246             }
247             return Sendmsg(fd, "fail");
248         }
249         case DaemonOps::INITIATE: {
250             if (out.size() != 2) {
251                 LOG(ERROR) << "Malformed initiate-merge message, " << out.size() << " parts";
252                 return Sendmsg(fd, "fail");
253             }
254             if (out[0] == "initiate_merge") {
255                 std::lock_guard<std::mutex> lock(lock_);
256                 auto iter = FindHandler(&lock, out[1]);
257                 if (iter == dm_users_.end()) {
258                     LOG(ERROR) << "Could not find handler: " << out[1];
259                     return Sendmsg(fd, "fail");
260                 }
261 
262                 if (!StartMerge(&lock, *iter)) {
263                     return Sendmsg(fd, "fail");
264                 }
265 
266                 return Sendmsg(fd, "success");
267             }
268             return Sendmsg(fd, "fail");
269         }
270         case DaemonOps::PERCENTAGE: {
271             std::lock_guard<std::mutex> lock(lock_);
272             double percentage = GetMergePercentage(&lock);
273 
274             return Sendmsg(fd, std::to_string(percentage));
275         }
276         case DaemonOps::GETSTATUS: {
277             // Message format:
278             // getstatus,<misc_name>
279             if (out.size() != 2) {
280                 LOG(ERROR) << "Malformed delete message, " << out.size() << " parts";
281                 return Sendmsg(fd, "snapshot-merge-failed");
282             }
283             {
284                 std::lock_guard<std::mutex> lock(lock_);
285                 auto iter = FindHandler(&lock, out[1]);
286                 if (iter == dm_users_.end()) {
287                     LOG(ERROR) << "Could not find handler: " << out[1];
288                     return Sendmsg(fd, "snapshot-merge-failed");
289                 }
290 
291                 std::string merge_status = GetMergeStatus(*iter);
292                 return Sendmsg(fd, merge_status);
293             }
294         }
295         case DaemonOps::UPDATE_VERIFY: {
296             std::lock_guard<std::mutex> lock(lock_);
297             if (!UpdateVerification(&lock)) {
298                 return Sendmsg(fd, "fail");
299             }
300 
301             return Sendmsg(fd, "success");
302         }
303         default: {
304             LOG(ERROR) << "Received unknown message type from client";
305             Sendmsg(fd, "fail");
306             return false;
307         }
308     }
309 }
310 
RunThread(std::shared_ptr<HandlerThread> handler)311 void UserSnapshotServer::RunThread(std::shared_ptr<HandlerThread> handler) {
312     LOG(INFO) << "Entering thread for handler: " << handler->misc_name();
313 
314     if (!handler->snapuserd()->Start()) {
315         LOG(ERROR) << " Failed to launch all worker threads";
316     }
317 
318     handler->snapuserd()->CloseFds();
319     bool merge_completed = handler->snapuserd()->CheckMergeCompletionStatus();
320     handler->snapuserd()->UnmapBufferRegion();
321 
322     auto misc_name = handler->misc_name();
323     LOG(INFO) << "Handler thread about to exit: " << misc_name;
324 
325     {
326         std::lock_guard<std::mutex> lock(lock_);
327         if (merge_completed) {
328             num_partitions_merge_complete_ += 1;
329             active_merge_threads_ -= 1;
330             WakeupMonitorMergeThread();
331         }
332         handler->SetThreadTerminated();
333         auto iter = FindHandler(&lock, handler->misc_name());
334         if (iter == dm_users_.end()) {
335             // RemoveAndJoinHandler() already removed us from the list, and is
336             // now waiting on a join(), so just return. Additionally, release
337             // all the resources held by snapuserd object which are shared
338             // by worker threads. This should be done when the last reference
339             // of "handler" is released; but we will explicitly release here
340             // to make sure snapuserd object is freed as it is the biggest
341             // consumer of memory in the daemon.
342             handler->FreeResources();
343             LOG(INFO) << "Exiting handler thread to allow for join: " << misc_name;
344             return;
345         }
346 
347         LOG(INFO) << "Exiting handler thread and freeing resources: " << misc_name;
348 
349         if (handler->snapuserd()->IsAttached()) {
350             handler->thread().detach();
351         }
352 
353         // Important: free resources within the lock. This ensures that if
354         // WaitForDelete() is called, the handler is either in the list, or
355         // it's not and its resources are guaranteed to be freed.
356         handler->FreeResources();
357         dm_users_.erase(iter);
358     }
359 }
360 
Start(const std::string & socketname)361 bool UserSnapshotServer::Start(const std::string& socketname) {
362     bool start_listening = true;
363 
364     sockfd_.reset(android_get_control_socket(socketname.c_str()));
365     if (sockfd_ < 0) {
366         sockfd_.reset(socket_local_server(socketname.c_str(), ANDROID_SOCKET_NAMESPACE_RESERVED,
367                                           SOCK_STREAM));
368         if (sockfd_ < 0) {
369             PLOG(ERROR) << "Failed to create server socket " << socketname;
370             return false;
371         }
372         start_listening = false;
373     }
374     return StartWithSocket(start_listening);
375 }
376 
StartWithSocket(bool start_listening)377 bool UserSnapshotServer::StartWithSocket(bool start_listening) {
378     if (start_listening && listen(sockfd_.get(), 4) < 0) {
379         PLOG(ERROR) << "listen socket failed";
380         return false;
381     }
382 
383     AddWatchedFd(sockfd_, POLLIN);
384     is_socket_present_ = true;
385 
386     // If started in first-stage init, the property service won't be online.
387     if (access("/dev/socket/property_service", F_OK) == 0) {
388         if (!android::base::SetProperty("snapuserd.ready", "true")) {
389             LOG(ERROR) << "Failed to set snapuserd.ready property";
390             return false;
391         }
392     }
393 
394     LOG(DEBUG) << "Snapuserd server now accepting connections";
395     return true;
396 }
397 
Run()398 bool UserSnapshotServer::Run() {
399     LOG(INFO) << "Now listening on snapuserd socket";
400 
401     while (!IsTerminating()) {
402         int rv = TEMP_FAILURE_RETRY(poll(watched_fds_.data(), watched_fds_.size(), -1));
403         if (rv < 0) {
404             PLOG(ERROR) << "poll failed";
405             return false;
406         }
407         if (!rv) {
408             continue;
409         }
410 
411         if (watched_fds_[0].revents) {
412             AcceptClient();
413         }
414 
415         auto iter = watched_fds_.begin() + 1;
416         while (iter != watched_fds_.end()) {
417             if (iter->revents && !HandleClient(iter->fd, iter->revents)) {
418                 close(iter->fd);
419                 iter = watched_fds_.erase(iter);
420             } else {
421                 iter++;
422             }
423         }
424     }
425 
426     JoinAllThreads();
427     return true;
428 }
429 
JoinAllThreads()430 void UserSnapshotServer::JoinAllThreads() {
431     // Acquire the thread list within the lock.
432     std::vector<std::shared_ptr<HandlerThread>> dm_users;
433     {
434         std::lock_guard<std::mutex> guard(lock_);
435         dm_users = std::move(dm_users_);
436     }
437 
438     for (auto& client : dm_users) {
439         auto& th = client->thread();
440 
441         if (th.joinable()) th.join();
442     }
443 
444     stop_monitor_merge_thread_ = true;
445     WakeupMonitorMergeThread();
446 }
447 
AddWatchedFd(android::base::borrowed_fd fd,int events)448 void UserSnapshotServer::AddWatchedFd(android::base::borrowed_fd fd, int events) {
449     struct pollfd p = {};
450     p.fd = fd.get();
451     p.events = events;
452     watched_fds_.emplace_back(std::move(p));
453 }
454 
AcceptClient()455 void UserSnapshotServer::AcceptClient() {
456     int fd = TEMP_FAILURE_RETRY(accept4(sockfd_.get(), nullptr, nullptr, SOCK_CLOEXEC));
457     if (fd < 0) {
458         PLOG(ERROR) << "accept4 failed";
459         return;
460     }
461 
462     AddWatchedFd(fd, POLLIN);
463 }
464 
HandleClient(android::base::borrowed_fd fd,int revents)465 bool UserSnapshotServer::HandleClient(android::base::borrowed_fd fd, int revents) {
466     std::string str;
467     if (!Recv(fd, &str)) {
468         return false;
469     }
470     if (str.empty() && (revents & POLLHUP)) {
471         LOG(DEBUG) << "Snapuserd client disconnected";
472         return false;
473     }
474     if (!Receivemsg(fd, str)) {
475         LOG(ERROR) << "Encountered error handling client message, revents: " << revents;
476         return false;
477     }
478     return true;
479 }
480 
Interrupt()481 void UserSnapshotServer::Interrupt() {
482     // Force close the socket so poll() fails.
483     sockfd_ = {};
484     SetTerminating();
485 }
486 
AddHandler(const std::string & misc_name,const std::string & cow_device_path,const std::string & backing_device,const std::string & base_path_merge)487 std::shared_ptr<HandlerThread> UserSnapshotServer::AddHandler(const std::string& misc_name,
488                                                               const std::string& cow_device_path,
489                                                               const std::string& backing_device,
490                                                               const std::string& base_path_merge) {
491     // We will need multiple worker threads only during
492     // device boot after OTA. For all other purposes,
493     // one thread is sufficient. We don't want to consume
494     // unnecessary memory especially during OTA install phase
495     // when daemon will be up during entire post install phase.
496     //
497     // During boot up, we need multiple threads primarily for
498     // update-verification.
499     int num_worker_threads = kNumWorkerThreads;
500     if (is_socket_present_) {
501         num_worker_threads = 1;
502     }
503 
504     bool perform_verification = true;
505     if (android::base::EndsWith(misc_name, "-init") || is_socket_present_) {
506         perform_verification = false;
507     }
508 
509     auto snapuserd = std::make_shared<SnapshotHandler>(misc_name, cow_device_path, backing_device,
510                                                        base_path_merge, num_worker_threads,
511                                                        io_uring_enabled_, perform_verification);
512     if (!snapuserd->InitCowDevice()) {
513         LOG(ERROR) << "Failed to initialize Snapuserd";
514         return nullptr;
515     }
516 
517     if (!snapuserd->InitializeWorkers()) {
518         LOG(ERROR) << "Failed to initialize workers";
519         return nullptr;
520     }
521 
522     auto handler = std::make_shared<HandlerThread>(snapuserd);
523     {
524         std::lock_guard<std::mutex> lock(lock_);
525         if (FindHandler(&lock, misc_name) != dm_users_.end()) {
526             LOG(ERROR) << "Handler already exists: " << misc_name;
527             return nullptr;
528         }
529         dm_users_.push_back(handler);
530     }
531     return handler;
532 }
533 
StartHandler(const std::shared_ptr<HandlerThread> & handler)534 bool UserSnapshotServer::StartHandler(const std::shared_ptr<HandlerThread>& handler) {
535     if (handler->snapuserd()->IsAttached()) {
536         LOG(ERROR) << "Handler already attached";
537         return false;
538     }
539 
540     handler->snapuserd()->AttachControlDevice();
541 
542     handler->thread() = std::thread(std::bind(&UserSnapshotServer::RunThread, this, handler));
543     return true;
544 }
545 
StartMerge(std::lock_guard<std::mutex> * proof_of_lock,const std::shared_ptr<HandlerThread> & handler)546 bool UserSnapshotServer::StartMerge(std::lock_guard<std::mutex>* proof_of_lock,
547                                     const std::shared_ptr<HandlerThread>& handler) {
548     CHECK(proof_of_lock);
549 
550     if (!handler->snapuserd()->IsAttached()) {
551         LOG(ERROR) << "Handler not attached to dm-user - Merge thread cannot be started";
552         return false;
553     }
554 
555     handler->snapuserd()->MonitorMerge();
556 
557     if (!is_merge_monitor_started_.has_value()) {
558         std::thread(&UserSnapshotServer::MonitorMerge, this).detach();
559         is_merge_monitor_started_ = true;
560     }
561 
562     merge_handlers_.push(handler);
563     WakeupMonitorMergeThread();
564     return true;
565 }
566 
FindHandler(std::lock_guard<std::mutex> * proof_of_lock,const std::string & misc_name)567 auto UserSnapshotServer::FindHandler(std::lock_guard<std::mutex>* proof_of_lock,
568                                      const std::string& misc_name) -> HandlerList::iterator {
569     CHECK(proof_of_lock);
570 
571     for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
572         if ((*iter)->misc_name() == misc_name) {
573             return iter;
574         }
575     }
576     return dm_users_.end();
577 }
578 
TerminateMergeThreads(std::lock_guard<std::mutex> * proof_of_lock)579 void UserSnapshotServer::TerminateMergeThreads(std::lock_guard<std::mutex>* proof_of_lock) {
580     CHECK(proof_of_lock);
581 
582     for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
583         if (!(*iter)->ThreadTerminated()) {
584             (*iter)->snapuserd()->NotifyIOTerminated();
585         }
586     }
587 }
588 
GetMergeStatus(const std::shared_ptr<HandlerThread> & handler)589 std::string UserSnapshotServer::GetMergeStatus(const std::shared_ptr<HandlerThread>& handler) {
590     return handler->snapuserd()->GetMergeStatus();
591 }
592 
GetMergePercentage(std::lock_guard<std::mutex> * proof_of_lock)593 double UserSnapshotServer::GetMergePercentage(std::lock_guard<std::mutex>* proof_of_lock) {
594     CHECK(proof_of_lock);
595     double percentage = 0.0;
596     int n = 0;
597 
598     for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
599         auto& th = (*iter)->thread();
600         if (th.joinable()) {
601             // Merge percentage by individual partitions wherein merge is still
602             // in-progress
603             percentage += (*iter)->snapuserd()->GetMergePercentage();
604             n += 1;
605         }
606     }
607 
608     // Calculate final merge including those partitions where merge was already
609     // completed - num_partitions_merge_complete_ will track them when each
610     // thread exists in RunThread.
611     int total_partitions = n + num_partitions_merge_complete_;
612 
613     if (total_partitions) {
614         percentage = ((num_partitions_merge_complete_ * 100.0) + percentage) / total_partitions;
615     }
616 
617     LOG(DEBUG) << "Merge %: " << percentage
618                << " num_partitions_merge_complete_: " << num_partitions_merge_complete_
619                << " total_partitions: " << total_partitions << " n: " << n;
620     return percentage;
621 }
622 
RemoveAndJoinHandler(const std::string & misc_name)623 bool UserSnapshotServer::RemoveAndJoinHandler(const std::string& misc_name) {
624     std::shared_ptr<HandlerThread> handler;
625     {
626         std::lock_guard<std::mutex> lock(lock_);
627 
628         auto iter = FindHandler(&lock, misc_name);
629         if (iter == dm_users_.end()) {
630             // Client already deleted.
631             return true;
632         }
633         handler = std::move(*iter);
634         dm_users_.erase(iter);
635     }
636 
637     auto& th = handler->thread();
638     if (th.joinable()) {
639         th.join();
640     }
641     return true;
642 }
643 
WakeupMonitorMergeThread()644 void UserSnapshotServer::WakeupMonitorMergeThread() {
645     uint64_t notify = 1;
646     ssize_t rc = TEMP_FAILURE_RETRY(write(monitor_merge_event_fd_.get(), &notify, sizeof(notify)));
647     if (rc < 0) {
648         PLOG(FATAL) << "failed to notify monitor merge thread";
649     }
650 }
651 
MonitorMerge()652 void UserSnapshotServer::MonitorMerge() {
653     while (!stop_monitor_merge_thread_) {
654         uint64_t testVal;
655         ssize_t ret =
656                 TEMP_FAILURE_RETRY(read(monitor_merge_event_fd_.get(), &testVal, sizeof(testVal)));
657         if (ret == -1) {
658             PLOG(FATAL) << "Failed to read from eventfd";
659         } else if (ret == 0) {
660             LOG(FATAL) << "Hit EOF on eventfd";
661         }
662 
663         LOG(INFO) << "MonitorMerge: active-merge-threads: " << active_merge_threads_;
664         {
665             std::lock_guard<std::mutex> lock(lock_);
666             while (active_merge_threads_ < kMaxMergeThreads && merge_handlers_.size() > 0) {
667                 auto handler = merge_handlers_.front();
668                 merge_handlers_.pop();
669 
670                 if (!handler->snapuserd()) {
671                     LOG(INFO) << "MonitorMerge: skipping deleted handler: " << handler->misc_name();
672                     continue;
673                 }
674 
675                 LOG(INFO) << "Starting merge for partition: "
676                           << handler->snapuserd()->GetMiscName();
677                 handler->snapuserd()->InitiateMerge();
678                 active_merge_threads_ += 1;
679             }
680         }
681     }
682 
683     LOG(INFO) << "Exiting MonitorMerge: size: " << merge_handlers_.size();
684 }
685 
WaitForSocket()686 bool UserSnapshotServer::WaitForSocket() {
687     auto scope_guard = android::base::make_scope_guard([this]() -> void { JoinAllThreads(); });
688 
689     auto socket_path = ANDROID_SOCKET_DIR "/"s + kSnapuserdSocketProxy;
690 
691     if (!android::fs_mgr::WaitForFile(socket_path, std::chrono::milliseconds::max())) {
692         LOG(ERROR)
693                 << "Failed to wait for proxy socket, second-stage snapuserd will fail to connect";
694         return false;
695     }
696 
697     // This initialization of system property is important. When daemon is
698     // launched post selinux transition (before init second stage),
699     // bionic libc initializes system property as part of __libc_init_common();
700     // however that initialization fails silently given that fact that we don't
701     // have /dev/__properties__ setup which is created at init second stage.
702     //
703     // At this point, we have the handlers setup and is safe to setup property.
704     __system_properties_init();
705 
706     if (!android::base::WaitForProperty("snapuserd.proxy_ready", "true")) {
707         LOG(ERROR)
708                 << "Failed to wait for proxy property, second-stage snapuserd will fail to connect";
709         return false;
710     }
711 
712     unique_fd fd(socket_local_client(kSnapuserdSocketProxy, ANDROID_SOCKET_NAMESPACE_RESERVED,
713                                      SOCK_SEQPACKET));
714     if (fd < 0) {
715         PLOG(ERROR) << "Failed to connect to socket proxy";
716         return false;
717     }
718 
719     char code[1];
720     std::vector<unique_fd> fds;
721     ssize_t rv = android::base::ReceiveFileDescriptorVector(fd, code, sizeof(code), 1, &fds);
722     if (rv < 0) {
723         PLOG(ERROR) << "Failed to receive server socket over proxy";
724         return false;
725     }
726     if (fds.empty()) {
727         LOG(ERROR) << "Expected at least one file descriptor from proxy";
728         return false;
729     }
730 
731     // We don't care if the ACK is received.
732     code[0] = 'a';
733     if (TEMP_FAILURE_RETRY(send(fd, code, sizeof(code), MSG_NOSIGNAL)) < 0) {
734         PLOG(ERROR) << "Failed to send ACK to proxy";
735         return false;
736     }
737 
738     sockfd_ = std::move(fds[0]);
739     if (!StartWithSocket(true)) {
740         return false;
741     }
742 
743     return Run();
744 }
745 
RunForSocketHandoff()746 bool UserSnapshotServer::RunForSocketHandoff() {
747     unique_fd proxy_fd(android_get_control_socket(kSnapuserdSocketProxy));
748     if (proxy_fd < 0) {
749         PLOG(FATAL) << "Proxy could not get android control socket " << kSnapuserdSocketProxy;
750     }
751     borrowed_fd server_fd(android_get_control_socket(kSnapuserdSocket));
752     if (server_fd < 0) {
753         PLOG(FATAL) << "Proxy could not get android control socket " << kSnapuserdSocket;
754     }
755 
756     if (listen(proxy_fd.get(), 4) < 0) {
757         PLOG(FATAL) << "Proxy listen socket failed";
758     }
759 
760     if (!android::base::SetProperty("snapuserd.proxy_ready", "true")) {
761         LOG(FATAL) << "Proxy failed to set ready property";
762     }
763 
764     unique_fd client_fd(
765             TEMP_FAILURE_RETRY(accept4(proxy_fd.get(), nullptr, nullptr, SOCK_CLOEXEC)));
766     if (client_fd < 0) {
767         PLOG(FATAL) << "Proxy accept failed";
768     }
769 
770     char code[1] = {'a'};
771     std::vector<int> fds = {server_fd.get()};
772     ssize_t rv = android::base::SendFileDescriptorVector(client_fd, code, sizeof(code), fds);
773     if (rv < 0) {
774         PLOG(FATAL) << "Proxy could not send file descriptor to snapuserd";
775     }
776     // Wait for an ACK - results don't matter, we just don't want to risk closing
777     // the proxy socket too early.
778     if (recv(client_fd, code, sizeof(code), 0) < 0) {
779         PLOG(FATAL) << "Proxy could not receive terminating code from snapuserd";
780     }
781     return true;
782 }
783 
UpdateVerification(std::lock_guard<std::mutex> * proof_of_lock)784 bool UserSnapshotServer::UpdateVerification(std::lock_guard<std::mutex>* proof_of_lock) {
785     CHECK(proof_of_lock);
786 
787     bool status = true;
788     for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
789         auto& th = (*iter)->thread();
790         if (th.joinable() && status) {
791             status = (*iter)->snapuserd()->CheckPartitionVerification() && status;
792         } else {
793             // return immediately if there is a failure
794             return false;
795         }
796     }
797 
798     return status;
799 }
800 
801 }  // namespace snapshot
802 }  // namespace android
803