1 // Copyright (C) 2020 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include <linux/types.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 
22 #include <bitset>
23 #include <condition_variable>
24 #include <csignal>
25 #include <cstring>
26 #include <future>
27 #include <iostream>
28 #include <limits>
29 #include <map>
30 #include <mutex>
31 #include <string>
32 #include <thread>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <vector>
36 
37 #include <android-base/file.h>
38 #include <android-base/logging.h>
39 #include <android-base/stringprintf.h>
40 #include <android-base/unique_fd.h>
41 #include <ext4_utils/ext4_utils.h>
42 #include <libdm/dm.h>
43 #include <libsnapshot/cow_reader.h>
44 #include <libsnapshot/cow_writer.h>
45 #include <snapuserd/snapuserd_buffer.h>
46 #include <snapuserd/snapuserd_kernel.h>
47 
48 namespace android {
49 namespace snapshot {
50 
51 using android::base::unique_fd;
52 using namespace std::chrono_literals;
53 
54 static constexpr size_t PAYLOAD_SIZE = (1UL << 20);
55 static_assert(PAYLOAD_SIZE >= BLOCK_SZ);
56 
57 /*
58  * With 4 threads, we get optimal performance
59  * when update_verifier reads the partition during
60  * boot.
61  */
62 static constexpr int NUM_THREADS_PER_PARTITION = 4;
63 
64 /*
65  * State transitions between worker threads and read-ahead
66  * threads.
67  *
68  * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead
69  *                   thread to begin reading the copy operations
70  *                   for each bounded region.
71  *
72  * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight
73  *                         and reading the copy operations.
74  *
75  * IO_IN_PROGRESS: Merge operation is in-progress by worker threads.
76  *
77  * IO_TERMINATED: When all the worker threads are done, request the
78  *                read-ahead thread to terminate
79  *
80  * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead
81  *                     thread is reading from COW device.
82  *
83  * The transition of each states is described in snapuserd_readahead.cpp
84  */
85 enum class READ_AHEAD_IO_TRANSITION {
86     READ_AHEAD_BEGIN,
87     READ_AHEAD_IN_PROGRESS,
88     IO_IN_PROGRESS,
89     IO_TERMINATED,
90     READ_AHEAD_FAILURE,
91 };
92 
93 class Snapuserd;
94 
95 class ReadAheadThread {
96   public:
97     ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
98                     const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd);
99     bool RunThread();
100 
101   private:
102     void InitializeRAIter();
103     bool RAIterDone();
104     void RAIterNext();
105     const CowOperation* GetRAOpIter();
106     void InitializeBuffer();
107 
108     bool InitializeFds();
CloseFds()109     void CloseFds() {
110         cow_fd_ = {};
111         backing_store_fd_ = {};
112     }
113 
114     bool ReadAheadIOStart();
115     void PrepareReadAhead(uint64_t* source_offset, int* pending_ops, std::vector<uint64_t>& blocks);
116     bool ReconstructDataFromCow();
117     void CheckOverlap(const CowOperation* cow_op);
118 
119     void* read_ahead_buffer_;
120     void* metadata_buffer_;
121     std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_;
122     std::string cow_device_;
123     std::string backing_store_device_;
124     std::string misc_name_;
125 
126     unique_fd cow_fd_;
127     unique_fd backing_store_fd_;
128 
129     std::shared_ptr<Snapuserd> snapuserd_;
130 
131     std::unordered_set<uint64_t> dest_blocks_;
132     std::unordered_set<uint64_t> source_blocks_;
133     bool overlap_;
134 };
135 
136 class WorkerThread {
137   public:
138     WorkerThread(const std::string& cow_device, const std::string& backing_device,
139                  const std::string& control_device, const std::string& misc_name,
140                  std::shared_ptr<Snapuserd> snapuserd);
141     bool RunThread();
142 
143   private:
144     // Initialization
145     void InitializeBufsink();
146     bool InitializeFds();
147     bool InitReader();
CloseFds()148     void CloseFds() {
149         ctrl_fd_ = {};
150         backing_store_fd_ = {};
151     }
152 
153     // Functions interacting with dm-user
154     bool ReadDmUserHeader();
155     bool DmuserReadRequest();
156     bool DmuserWriteRequest();
157     bool ReadDmUserPayload(void* buffer, size_t size);
158     bool WriteDmUserPayload(size_t size, bool header_response);
159 
160     bool ReadDiskExceptions(chunk_t chunk, size_t size);
161     bool ZerofillDiskExceptions(size_t read_size);
162     void ConstructKernelCowHeader();
163 
164     // IO Path
165     bool ProcessIORequest();
166     int ReadData(sector_t sector, size_t size);
167     int ReadUnalignedSector(sector_t sector, size_t size,
168                             std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it);
169 
170     // Processing COW operations
171     bool ProcessCowOp(const CowOperation* cow_op);
172     bool ProcessReplaceOp(const CowOperation* cow_op);
173     // Handles Copy
174     bool ProcessCopyOp(const CowOperation* cow_op);
175     bool ProcessZeroOp();
176 
177     bool ReadFromBaseDevice(const CowOperation* cow_op);
178     bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op);
179 
180     // Merge related functions
181     bool ProcessMergeComplete(chunk_t chunk, void* buffer);
182     loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
183                                int* unmerged_exceptions);
184 
185     int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
186                              int unmerged_exceptions, bool* copy_op, bool* commit);
187 
ChunkToSector(chunk_t chunk)188     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
SectorToChunk(sector_t sector)189     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
190 
191     std::unique_ptr<CowReader> reader_;
192     BufferSink bufsink_;
193 
194     std::string cow_device_;
195     std::string backing_store_device_;
196     std::string control_device_;
197     std::string misc_name_;
198 
199     unique_fd cow_fd_;
200     unique_fd backing_store_fd_;
201     unique_fd ctrl_fd_;
202 
203     std::shared_ptr<Snapuserd> snapuserd_;
204     uint32_t exceptions_per_area_;
205 };
206 
207 class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
208   public:
209     Snapuserd(const std::string& misc_name, const std::string& cow_device,
210               const std::string& backing_device);
211     bool InitCowDevice();
212     bool Start();
GetControlDevicePath()213     const std::string& GetControlDevicePath() { return control_device_; }
GetMiscName()214     const std::string& GetMiscName() { return misc_name_; }
GetNumSectors()215     uint64_t GetNumSectors() { return num_sectors_; }
IsAttached()216     bool IsAttached() const { return attached_; }
AttachControlDevice()217     void AttachControlDevice() { attached_ = true; }
218 
219     void CheckMergeCompletionStatus();
220     bool CommitMerge(int num_merge_ops);
221 
CloseFds()222     void CloseFds() { cow_fd_ = {}; }
FreeResources()223     void FreeResources() {
224         worker_threads_.clear();
225         read_ahead_thread_ = nullptr;
226     }
GetMetadataAreaSize()227     size_t GetMetadataAreaSize() { return vec_.size(); }
GetExceptionBuffer(size_t i)228     void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
229 
230     bool InitializeWorkers();
231     std::unique_ptr<CowReader> CloneReaderForWorker();
GetSharedPtr()232     std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); }
233 
GetChunkVec()234     std::vector<std::pair<sector_t, const CowOperation*>>& GetChunkVec() { return chunk_vec_; }
GetMetadataVec()235     const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; }
236 
compare(std::pair<sector_t,const CowOperation * > p1,std::pair<sector_t,const CowOperation * > p2)237     static bool compare(std::pair<sector_t, const CowOperation*> p1,
238                         std::pair<sector_t, const CowOperation*> p2) {
239         return p1.first < p2.first;
240     }
241 
242     void UnmapBufferRegion();
243     bool MmapMetadata();
244 
245     // Read-ahead related functions
GetReadAheadOpsVec()246     std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; }
GetReadAheadMap()247     std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; }
GetMappedAddr()248     void* GetMappedAddr() { return mapped_addr_; }
IsReadAheadFeaturePresent()249     bool IsReadAheadFeaturePresent() { return read_ahead_feature_; }
250     void PrepareReadAhead();
251     void StartReadAhead();
252     void MergeCompleted();
253     bool ReadAheadIOCompleted(bool sync);
254     void ReadAheadIOFailed();
255     bool WaitForMergeToComplete();
256     bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer);
ReconstructDataFromCow()257     bool ReconstructDataFromCow() { return populate_data_from_cow_; }
ReconstructDataFromCowFinish()258     void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; }
259     bool WaitForReadAheadToStart();
260 
261     uint64_t GetBufferMetadataOffset();
262     size_t GetBufferMetadataSize();
263     size_t GetBufferDataOffset();
264     size_t GetBufferDataSize();
265 
266     // Final block to be merged in a given read-ahead buffer region
SetFinalBlockMerged(uint64_t x)267     void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; }
GetFinalBlockMerged()268     uint64_t GetFinalBlockMerged() { return final_block_merged_; }
269     // Total number of blocks to be merged in a given read-ahead buffer region
SetTotalRaBlocksMerged(int x)270     void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; }
GetTotalRaBlocksMerged()271     int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; }
SetSocketPresent(bool socket)272     void SetSocketPresent(bool socket) { is_socket_present_ = socket; }
273 
274   private:
275     bool IsChunkIdMetadata(chunk_t chunk);
276     chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
277 
278     bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer);
279     bool ReadMetadata();
ChunkToSector(chunk_t chunk)280     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
SectorToChunk(sector_t sector)281     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
IsBlockAligned(int read_size)282     bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
283     struct BufferState* GetBufferState();
284 
285     void ReadBlocks(const std::string& partition_name, const std::string& dm_block_device);
286     void ReadBlocksToCache(const std::string& dm_block_device, const std::string& partition_name,
287                            off_t offset, size_t size);
288 
289     std::string cow_device_;
290     std::string backing_store_device_;
291     std::string control_device_;
292     std::string misc_name_;
293 
294     unique_fd cow_fd_;
295 
296     uint32_t exceptions_per_area_;
297     uint64_t num_sectors_;
298 
299     std::unique_ptr<CowReader> reader_;
300 
301     // Vector of disk exception which is a
302     // mapping of old-chunk to new-chunk
303     std::vector<std::unique_ptr<uint8_t[]>> vec_;
304 
305     // chunk_vec stores the pseudo mapping of sector
306     // to COW operations.
307     std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_;
308 
309     std::mutex lock_;
310     std::condition_variable cv;
311 
312     void* mapped_addr_;
313     size_t total_mapped_addr_length_;
314 
315     std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
316     // Read-ahead related
317     std::unordered_map<uint64_t, void*> read_ahead_buffer_map_;
318     std::vector<const CowOperation*> read_ahead_ops_;
319     bool populate_data_from_cow_ = false;
320     bool read_ahead_feature_;
321     uint64_t final_block_merged_;
322     int total_ra_blocks_merged_ = 0;
323     READ_AHEAD_IO_TRANSITION io_state_;
324     std::unique_ptr<ReadAheadThread> read_ahead_thread_;
325 
326     bool merge_initiated_ = false;
327     bool attached_ = false;
328     bool is_socket_present_;
329 };
330 
331 }  // namespace snapshot
332 }  // namespace android
333