1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "snapuserd_core.h"
18
19 namespace android {
20 namespace snapshot {
21
22 using namespace android;
23 using namespace android::dm;
24 using android::base::unique_fd;
25
Worker(const std::string & cow_device,const std::string & backing_device,const std::string & control_device,const std::string & misc_name,const std::string & base_path_merge,std::shared_ptr<SnapshotHandler> snapuserd)26 Worker::Worker(const std::string& cow_device, const std::string& backing_device,
27 const std::string& control_device, const std::string& misc_name,
28 const std::string& base_path_merge, std::shared_ptr<SnapshotHandler> snapuserd) {
29 cow_device_ = cow_device;
30 backing_store_device_ = backing_device;
31 control_device_ = control_device;
32 misc_name_ = misc_name;
33 base_path_merge_ = base_path_merge;
34 snapuserd_ = snapuserd;
35 }
36
InitializeFds()37 bool Worker::InitializeFds() {
38 backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
39 if (backing_store_fd_ < 0) {
40 SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
41 return false;
42 }
43
44 cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
45 if (cow_fd_ < 0) {
46 SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
47 return false;
48 }
49
50 ctrl_fd_.reset(open(control_device_.c_str(), O_RDWR));
51 if (ctrl_fd_ < 0) {
52 SNAP_PLOG(ERROR) << "Unable to open " << control_device_;
53 return false;
54 }
55
56 // Base device used by merge thread
57 base_path_merge_fd_.reset(open(base_path_merge_.c_str(), O_RDWR));
58 if (base_path_merge_fd_ < 0) {
59 SNAP_PLOG(ERROR) << "Open Failed: " << base_path_merge_;
60 return false;
61 }
62
63 return true;
64 }
65
InitReader()66 bool Worker::InitReader() {
67 reader_ = snapuserd_->CloneReaderForWorker();
68
69 if (!reader_->InitForMerge(std::move(cow_fd_))) {
70 return false;
71 }
72 return true;
73 }
74
75 // Start the replace operation. This will read the
76 // internal COW format and if the block is compressed,
77 // it will be de-compressed.
ProcessReplaceOp(const CowOperation * cow_op)78 bool Worker::ProcessReplaceOp(const CowOperation* cow_op) {
79 if (!reader_->ReadData(*cow_op, &bufsink_)) {
80 SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
81 return false;
82 }
83
84 return true;
85 }
86
ReadFromSourceDevice(const CowOperation * cow_op)87 bool Worker::ReadFromSourceDevice(const CowOperation* cow_op) {
88 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
89 if (buffer == nullptr) {
90 SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
91 return false;
92 }
93 SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block
94 << " Source: " << cow_op->source;
95 uint64_t offset = cow_op->source;
96 if (cow_op->type == kCowCopyOp) {
97 offset *= BLOCK_SZ;
98 }
99 if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ, offset)) {
100 std::string op;
101 if (cow_op->type == kCowCopyOp)
102 op = "Copy-op";
103 else {
104 op = "Xor-op";
105 }
106 SNAP_PLOG(ERROR) << op << " failed. Read from backing store: " << backing_store_device_
107 << "at block :" << offset / BLOCK_SZ << " offset:" << offset % BLOCK_SZ;
108 return false;
109 }
110
111 return true;
112 }
113
114 // Start the copy operation. This will read the backing
115 // block device which is represented by cow_op->source.
ProcessCopyOp(const CowOperation * cow_op)116 bool Worker::ProcessCopyOp(const CowOperation* cow_op) {
117 if (!ReadFromSourceDevice(cow_op)) {
118 return false;
119 }
120
121 return true;
122 }
123
ProcessXorOp(const CowOperation * cow_op)124 bool Worker::ProcessXorOp(const CowOperation* cow_op) {
125 if (!ReadFromSourceDevice(cow_op)) {
126 return false;
127 }
128 xorsink_.Reset();
129 if (!reader_->ReadData(*cow_op, &xorsink_)) {
130 SNAP_LOG(ERROR) << "ProcessXorOp failed for block " << cow_op->new_block;
131 return false;
132 }
133
134 return true;
135 }
136
ProcessZeroOp()137 bool Worker::ProcessZeroOp() {
138 // Zero out the entire block
139 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
140 if (buffer == nullptr) {
141 SNAP_LOG(ERROR) << "ProcessZeroOp: Failed to get payload buffer";
142 return false;
143 }
144
145 memset(buffer, 0, BLOCK_SZ);
146 return true;
147 }
148
ProcessOrderedOp(const CowOperation * cow_op)149 bool Worker::ProcessOrderedOp(const CowOperation* cow_op) {
150 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
151 if (buffer == nullptr) {
152 SNAP_LOG(ERROR) << "ProcessOrderedOp: Failed to get payload buffer";
153 return false;
154 }
155
156 MERGE_GROUP_STATE state = snapuserd_->ProcessMergingBlock(cow_op->new_block, buffer);
157
158 switch (state) {
159 case MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED: {
160 // Merge is completed for this COW op; just read directly from
161 // the base device
162 SNAP_LOG(DEBUG) << "Merge-completed: Reading from base device sector: "
163 << (cow_op->new_block >> SECTOR_SHIFT)
164 << " Block-number: " << cow_op->new_block;
165 if (!ReadDataFromBaseDevice(ChunkToSector(cow_op->new_block), BLOCK_SZ)) {
166 SNAP_LOG(ERROR) << "ReadDataFromBaseDevice at sector: "
167 << (cow_op->new_block >> SECTOR_SHIFT) << " after merge-complete.";
168 return false;
169 }
170 return true;
171 }
172 case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
173 bool ret;
174 if (cow_op->type == kCowCopyOp) {
175 ret = ProcessCopyOp(cow_op);
176 } else {
177 ret = ProcessXorOp(cow_op);
178 }
179
180 // I/O is complete - decrement the refcount irrespective of the return
181 // status
182 snapuserd_->NotifyIOCompletion(cow_op->new_block);
183 return ret;
184 }
185 // We already have the data in the buffer retrieved from RA thread.
186 // Nothing to process further.
187 case MERGE_GROUP_STATE::GROUP_MERGE_RA_READY: {
188 [[fallthrough]];
189 }
190 case MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS: {
191 return true;
192 }
193 default: {
194 // All other states, fail the I/O viz (GROUP_MERGE_FAILED and GROUP_INVALID)
195 return false;
196 }
197 }
198
199 return false;
200 }
201
ProcessCowOp(const CowOperation * cow_op)202 bool Worker::ProcessCowOp(const CowOperation* cow_op) {
203 if (cow_op == nullptr) {
204 SNAP_LOG(ERROR) << "ProcessCowOp: Invalid cow_op";
205 return false;
206 }
207
208 switch (cow_op->type) {
209 case kCowReplaceOp: {
210 return ProcessReplaceOp(cow_op);
211 }
212
213 case kCowZeroOp: {
214 return ProcessZeroOp();
215 }
216
217 case kCowCopyOp:
218 [[fallthrough]];
219 case kCowXorOp: {
220 return ProcessOrderedOp(cow_op);
221 }
222
223 default: {
224 SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
225 }
226 }
227 return false;
228 }
229
InitializeBufsink()230 void Worker::InitializeBufsink() {
231 // Allocate the buffer which is used to communicate between
232 // daemon and dm-user. The buffer comprises of header and a fixed payload.
233 // If the dm-user requests a big IO, the IO will be broken into chunks
234 // of PAYLOAD_BUFFER_SZ.
235 size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_BUFFER_SZ;
236 bufsink_.Initialize(buf_size);
237 }
238
Init()239 bool Worker::Init() {
240 InitializeBufsink();
241 xorsink_.Initialize(&bufsink_, BLOCK_SZ);
242
243 if (!InitializeFds()) {
244 return false;
245 }
246
247 if (!InitReader()) {
248 return false;
249 }
250
251 return true;
252 }
253
RunThread()254 bool Worker::RunThread() {
255 SNAP_LOG(INFO) << "Processing snapshot I/O requests....";
256
257 if (setpriority(PRIO_PROCESS, gettid(), kNiceValueForMergeThreads)) {
258 SNAP_PLOG(ERROR) << "Failed to set priority for TID: " << gettid();
259 }
260
261 // Start serving IO
262 while (true) {
263 if (!ProcessIORequest()) {
264 break;
265 }
266 }
267
268 CloseFds();
269 reader_->CloseCowFd();
270
271 return true;
272 }
273
274 // Read Header from dm-user misc device. This gives
275 // us the sector number for which IO is issued by dm-snapshot device
ReadDmUserHeader()276 bool Worker::ReadDmUserHeader() {
277 if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
278 if (errno != ENOTBLK) {
279 SNAP_PLOG(ERROR) << "Control-read failed";
280 }
281
282 SNAP_PLOG(DEBUG) << "ReadDmUserHeader failed....";
283 return false;
284 }
285
286 return true;
287 }
288
289 // Send the payload/data back to dm-user misc device.
WriteDmUserPayload(size_t size,bool header_response)290 bool Worker::WriteDmUserPayload(size_t size, bool header_response) {
291 size_t payload_size = size;
292 void* buf = bufsink_.GetPayloadBufPtr();
293 if (header_response) {
294 payload_size += sizeof(struct dm_user_header);
295 buf = bufsink_.GetBufPtr();
296 }
297
298 if (!android::base::WriteFully(ctrl_fd_, buf, payload_size)) {
299 SNAP_PLOG(ERROR) << "Write to dm-user failed size: " << payload_size;
300 return false;
301 }
302
303 return true;
304 }
305
ReadDataFromBaseDevice(sector_t sector,size_t read_size)306 bool Worker::ReadDataFromBaseDevice(sector_t sector, size_t read_size) {
307 CHECK(read_size <= BLOCK_SZ);
308
309 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
310 if (buffer == nullptr) {
311 SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
312 return false;
313 }
314
315 loff_t offset = sector << SECTOR_SHIFT;
316 if (!android::base::ReadFullyAtOffset(base_path_merge_fd_, buffer, read_size, offset)) {
317 SNAP_PLOG(ERROR) << "ReadDataFromBaseDevice failed. fd: " << base_path_merge_fd_
318 << "at sector :" << sector << " size: " << read_size;
319 return false;
320 }
321
322 return true;
323 }
324
ReadAlignedSector(sector_t sector,size_t sz,bool header_response)325 bool Worker::ReadAlignedSector(sector_t sector, size_t sz, bool header_response) {
326 struct dm_user_header* header = bufsink_.GetHeaderPtr();
327 size_t remaining_size = sz;
328 std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
329 bool io_error = false;
330 int ret = 0;
331
332 do {
333 // Process 1MB payload at a time
334 size_t read_size = std::min(PAYLOAD_BUFFER_SZ, remaining_size);
335
336 header->type = DM_USER_RESP_SUCCESS;
337 size_t total_bytes_read = 0;
338 io_error = false;
339 bufsink_.ResetBufferOffset();
340
341 while (read_size) {
342 // We need to check every 4k block to verify if it is
343 // present in the mapping.
344 size_t size = std::min(BLOCK_SZ, read_size);
345
346 auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(),
347 std::make_pair(sector, nullptr), SnapshotHandler::compare);
348 bool not_found = (it == chunk_vec.end() || it->first != sector);
349
350 if (not_found) {
351 // Block not found in map - which means this block was not
352 // changed as per the OTA. Just route the I/O to the base
353 // device.
354 if (!ReadDataFromBaseDevice(sector, size)) {
355 SNAP_LOG(ERROR) << "ReadDataFromBaseDevice failed";
356 header->type = DM_USER_RESP_ERROR;
357 }
358
359 ret = size;
360 } else {
361 // We found the sector in mapping. Check the type of COW OP and
362 // process it.
363 if (!ProcessCowOp(it->second)) {
364 SNAP_LOG(ERROR) << "ProcessCowOp failed";
365 header->type = DM_USER_RESP_ERROR;
366 }
367
368 ret = BLOCK_SZ;
369 }
370
371 // Just return the header if it is an error
372 if (header->type == DM_USER_RESP_ERROR) {
373 if (!RespondIOError(header_response)) {
374 return false;
375 }
376
377 io_error = true;
378 break;
379 }
380
381 read_size -= ret;
382 total_bytes_read += ret;
383 sector += (ret >> SECTOR_SHIFT);
384 bufsink_.UpdateBufferOffset(ret);
385 }
386
387 if (!io_error) {
388 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
389 return false;
390 }
391
392 SNAP_LOG(DEBUG) << "WriteDmUserPayload success total_bytes_read: " << total_bytes_read
393 << " header-response: " << header_response
394 << " remaining_size: " << remaining_size;
395 header_response = false;
396 remaining_size -= total_bytes_read;
397 }
398 } while (remaining_size > 0 && !io_error);
399
400 return true;
401 }
402
ReadUnalignedSector(sector_t sector,size_t size,std::vector<std::pair<sector_t,const CowOperation * >>::iterator & it)403 int Worker::ReadUnalignedSector(
404 sector_t sector, size_t size,
405 std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it) {
406 size_t skip_sector_size = 0;
407
408 SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
409 << " Aligned sector: " << it->first;
410
411 if (!ProcessCowOp(it->second)) {
412 SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed of size: " << size
413 << " Aligned sector: " << it->first;
414 return -1;
415 }
416
417 int num_sectors_skip = sector - it->first;
418
419 if (num_sectors_skip > 0) {
420 skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
421 char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
422 struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
423
424 if (skip_sector_size == BLOCK_SZ) {
425 SNAP_LOG(ERROR) << "Invalid un-aligned IO request at sector: " << sector
426 << " Base-sector: " << it->first;
427 return -1;
428 }
429
430 memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
431 (BLOCK_SZ - skip_sector_size));
432 }
433
434 bufsink_.ResetBufferOffset();
435 return std::min(size, (BLOCK_SZ - skip_sector_size));
436 }
437
ReadUnalignedSector(sector_t sector,size_t size)438 bool Worker::ReadUnalignedSector(sector_t sector, size_t size) {
439 struct dm_user_header* header = bufsink_.GetHeaderPtr();
440 header->type = DM_USER_RESP_SUCCESS;
441 bufsink_.ResetBufferOffset();
442 std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
443
444 auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(), std::make_pair(sector, nullptr),
445 SnapshotHandler::compare);
446
447 // |-------|-------|-------|
448 // 0 1 2 3
449 //
450 // Block 0 - op 1
451 // Block 1 - op 2
452 // Block 2 - op 3
453 //
454 // chunk_vec will have block 0, 1, 2 which maps to relavant COW ops.
455 //
456 // Each block is 4k bytes. Thus, the last block will span 8 sectors
457 // ranging till block 3 (However, block 3 won't be in chunk_vec as
458 // it doesn't have any mapping to COW ops. Now, if we get an I/O request for a sector
459 // spanning between block 2 and block 3, we need to step back
460 // and get hold of the last element.
461 //
462 // Additionally, we need to make sure that the requested sector is
463 // indeed within the range of the final sector. It is perfectly valid
464 // to get an I/O request for block 3 and beyond which are not mapped
465 // to any COW ops. In that case, we just need to read from the base
466 // device.
467 bool merge_complete = false;
468 bool header_response = true;
469 if (it == chunk_vec.end()) {
470 if (chunk_vec.size() > 0) {
471 // I/O request beyond the last mapped sector
472 it = std::prev(chunk_vec.end());
473 } else {
474 // This can happen when a partition merge is complete but snapshot
475 // state in /metadata is not yet deleted; during this window if the
476 // device is rebooted, subsequent attempt will mount the snapshot.
477 // However, since the merge was completed we wouldn't have any
478 // mapping to COW ops thus chunk_vec will be empty. In that case,
479 // mark this as merge_complete and route the I/O to the base device.
480 merge_complete = true;
481 }
482 } else if (it->first != sector) {
483 if (it != chunk_vec.begin()) {
484 --it;
485 }
486 } else {
487 return ReadAlignedSector(sector, size, header_response);
488 }
489
490 loff_t requested_offset = sector << SECTOR_SHIFT;
491
492 loff_t final_offset = 0;
493 if (!merge_complete) {
494 final_offset = it->first << SECTOR_SHIFT;
495 }
496
497 // Since a COW op span 4k block size, we need to make sure that the requested
498 // offset is within the 4k region. Consider the following case:
499 //
500 // |-------|-------|-------|
501 // 0 1 2 3
502 //
503 // Block 0 - op 1
504 // Block 1 - op 2
505 //
506 // We have an I/O request for a sector between block 2 and block 3. However,
507 // we have mapping to COW ops only for block 0 and block 1. Thus, the
508 // requested offset in this case is beyond the last mapped COW op size (which
509 // is block 1 in this case).
510
511 size_t total_bytes_read = 0;
512 size_t remaining_size = size;
513 int ret = 0;
514 if (!merge_complete && (requested_offset >= final_offset) &&
515 (requested_offset - final_offset) < BLOCK_SZ) {
516 // Read the partial un-aligned data
517 ret = ReadUnalignedSector(sector, remaining_size, it);
518 if (ret < 0) {
519 SNAP_LOG(ERROR) << "ReadUnalignedSector failed for sector: " << sector
520 << " size: " << size << " it->sector: " << it->first;
521 return RespondIOError(header_response);
522 }
523
524 remaining_size -= ret;
525 total_bytes_read += ret;
526 sector += (ret >> SECTOR_SHIFT);
527
528 // Send the data back
529 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
530 return false;
531 }
532
533 header_response = false;
534 // If we still have pending data to be processed, this will be aligned I/O
535 if (remaining_size) {
536 return ReadAlignedSector(sector, remaining_size, header_response);
537 }
538 } else {
539 // This is all about handling I/O request to be routed to base device
540 // as the I/O is not mapped to any of the COW ops.
541 loff_t aligned_offset = requested_offset;
542 // Align to nearest 4k
543 aligned_offset += BLOCK_SZ - 1;
544 aligned_offset &= ~(BLOCK_SZ - 1);
545 // Find the diff of the aligned offset
546 size_t diff_size = aligned_offset - requested_offset;
547 CHECK(diff_size <= BLOCK_SZ);
548 if (remaining_size < diff_size) {
549 if (!ReadDataFromBaseDevice(sector, remaining_size)) {
550 return RespondIOError(header_response);
551 }
552 total_bytes_read += remaining_size;
553
554 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
555 return false;
556 }
557 } else {
558 if (!ReadDataFromBaseDevice(sector, diff_size)) {
559 return RespondIOError(header_response);
560 }
561
562 total_bytes_read += diff_size;
563
564 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
565 return false;
566 }
567
568 remaining_size -= diff_size;
569 size_t num_sectors_read = (diff_size >> SECTOR_SHIFT);
570 sector += num_sectors_read;
571 CHECK(IsBlockAligned(sector << SECTOR_SHIFT));
572 header_response = false;
573
574 // If we still have pending data to be processed, this will be aligned I/O
575 return ReadAlignedSector(sector, remaining_size, header_response);
576 }
577 }
578
579 return true;
580 }
581
RespondIOError(bool header_response)582 bool Worker::RespondIOError(bool header_response) {
583 struct dm_user_header* header = bufsink_.GetHeaderPtr();
584 header->type = DM_USER_RESP_ERROR;
585 // This is an issue with the dm-user interface. There
586 // is no way to propagate the I/O error back to dm-user
587 // if we have already communicated the header back. Header
588 // is responded once at the beginning; however I/O can
589 // be processed in chunks. If we encounter an I/O error
590 // somewhere in the middle of the processing, we can't communicate
591 // this back to dm-user.
592 //
593 // TODO: Fix the interface
594 CHECK(header_response);
595
596 if (!WriteDmUserPayload(0, header_response)) {
597 return false;
598 }
599
600 // There is no need to process further as we have already seen
601 // an I/O error
602 return true;
603 }
604
DmuserReadRequest()605 bool Worker::DmuserReadRequest() {
606 struct dm_user_header* header = bufsink_.GetHeaderPtr();
607
608 // Unaligned I/O request
609 if (!IsBlockAligned(header->sector << SECTOR_SHIFT)) {
610 return ReadUnalignedSector(header->sector, header->len);
611 }
612
613 return ReadAlignedSector(header->sector, header->len, true);
614 }
615
ProcessIORequest()616 bool Worker::ProcessIORequest() {
617 struct dm_user_header* header = bufsink_.GetHeaderPtr();
618
619 if (!ReadDmUserHeader()) {
620 return false;
621 }
622
623 SNAP_LOG(DEBUG) << "Daemon: msg->seq: " << std::dec << header->seq;
624 SNAP_LOG(DEBUG) << "Daemon: msg->len: " << std::dec << header->len;
625 SNAP_LOG(DEBUG) << "Daemon: msg->sector: " << std::dec << header->sector;
626 SNAP_LOG(DEBUG) << "Daemon: msg->type: " << std::dec << header->type;
627 SNAP_LOG(DEBUG) << "Daemon: msg->flags: " << std::dec << header->flags;
628
629 switch (header->type) {
630 case DM_USER_REQ_MAP_READ: {
631 if (!DmuserReadRequest()) {
632 return false;
633 }
634 break;
635 }
636
637 case DM_USER_REQ_MAP_WRITE: {
638 // TODO: We should not get any write request
639 // to dm-user as we mount all partitions
640 // as read-only. Need to verify how are TRIM commands
641 // handled during mount.
642 return false;
643 }
644 }
645
646 return true;
647 }
648
649 } // namespace snapshot
650 } // namespace android
651