1 // Copyright (C) 2020 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #pragma once 16 17 #include <linux/types.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <sys/mman.h> 21 22 #include <bitset> 23 #include <condition_variable> 24 #include <csignal> 25 #include <cstring> 26 #include <future> 27 #include <iostream> 28 #include <limits> 29 #include <map> 30 #include <mutex> 31 #include <string> 32 #include <thread> 33 #include <unordered_map> 34 #include <unordered_set> 35 #include <vector> 36 37 #include <android-base/file.h> 38 #include <android-base/logging.h> 39 #include <android-base/stringprintf.h> 40 #include <android-base/unique_fd.h> 41 #include <ext4_utils/ext4_utils.h> 42 #include <libdm/dm.h> 43 #include <libsnapshot/cow_reader.h> 44 #include <libsnapshot/cow_writer.h> 45 #include <snapuserd/snapuserd_buffer.h> 46 #include <snapuserd/snapuserd_kernel.h> 47 48 namespace android { 49 namespace snapshot { 50 51 using android::base::unique_fd; 52 using namespace std::chrono_literals; 53 54 static constexpr size_t PAYLOAD_SIZE = (1UL << 20); 55 static_assert(PAYLOAD_SIZE >= BLOCK_SZ); 56 57 /* 58 * With 4 threads, we get optimal performance 59 * when update_verifier reads the partition during 60 * boot. 61 */ 62 static constexpr int NUM_THREADS_PER_PARTITION = 4; 63 64 /* 65 * State transitions between worker threads and read-ahead 66 * threads. 67 * 68 * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead 69 * thread to begin reading the copy operations 70 * for each bounded region. 71 * 72 * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight 73 * and reading the copy operations. 74 * 75 * IO_IN_PROGRESS: Merge operation is in-progress by worker threads. 76 * 77 * IO_TERMINATED: When all the worker threads are done, request the 78 * read-ahead thread to terminate 79 * 80 * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead 81 * thread is reading from COW device. 82 * 83 * The transition of each states is described in snapuserd_readahead.cpp 84 */ 85 enum class READ_AHEAD_IO_TRANSITION { 86 READ_AHEAD_BEGIN, 87 READ_AHEAD_IN_PROGRESS, 88 IO_IN_PROGRESS, 89 IO_TERMINATED, 90 READ_AHEAD_FAILURE, 91 }; 92 93 class Snapuserd; 94 95 class ReadAheadThread { 96 public: 97 ReadAheadThread(const std::string& cow_device, const std::string& backing_device, 98 const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd); 99 bool RunThread(); 100 101 private: 102 void InitializeRAIter(); 103 bool RAIterDone(); 104 void RAIterNext(); 105 const CowOperation* GetRAOpIter(); 106 void InitializeBuffer(); 107 108 bool InitializeFds(); CloseFds()109 void CloseFds() { 110 cow_fd_ = {}; 111 backing_store_fd_ = {}; 112 } 113 114 bool ReadAheadIOStart(); 115 void PrepareReadAhead(uint64_t* source_offset, int* pending_ops, std::vector<uint64_t>& blocks); 116 bool ReconstructDataFromCow(); 117 void CheckOverlap(const CowOperation* cow_op); 118 119 void* read_ahead_buffer_; 120 void* metadata_buffer_; 121 std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_; 122 std::string cow_device_; 123 std::string backing_store_device_; 124 std::string misc_name_; 125 126 unique_fd cow_fd_; 127 unique_fd backing_store_fd_; 128 129 std::shared_ptr<Snapuserd> snapuserd_; 130 131 std::unordered_set<uint64_t> dest_blocks_; 132 std::unordered_set<uint64_t> source_blocks_; 133 bool overlap_; 134 }; 135 136 class WorkerThread { 137 public: 138 WorkerThread(const std::string& cow_device, const std::string& backing_device, 139 const std::string& control_device, const std::string& misc_name, 140 std::shared_ptr<Snapuserd> snapuserd); 141 bool RunThread(); 142 143 private: 144 // Initialization 145 void InitializeBufsink(); 146 bool InitializeFds(); 147 bool InitReader(); CloseFds()148 void CloseFds() { 149 ctrl_fd_ = {}; 150 backing_store_fd_ = {}; 151 } 152 153 // Functions interacting with dm-user 154 bool ReadDmUserHeader(); 155 bool DmuserReadRequest(); 156 bool DmuserWriteRequest(); 157 bool ReadDmUserPayload(void* buffer, size_t size); 158 bool WriteDmUserPayload(size_t size, bool header_response); 159 160 bool ReadDiskExceptions(chunk_t chunk, size_t size); 161 bool ZerofillDiskExceptions(size_t read_size); 162 void ConstructKernelCowHeader(); 163 164 // IO Path 165 bool ProcessIORequest(); 166 int ReadData(sector_t sector, size_t size); 167 int ReadUnalignedSector(sector_t sector, size_t size, 168 std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it); 169 170 // Processing COW operations 171 bool ProcessCowOp(const CowOperation* cow_op); 172 bool ProcessReplaceOp(const CowOperation* cow_op); 173 // Handles Copy 174 bool ProcessCopyOp(const CowOperation* cow_op); 175 bool ProcessZeroOp(); 176 177 bool ReadFromBaseDevice(const CowOperation* cow_op); 178 bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op); 179 180 // Merge related functions 181 bool ProcessMergeComplete(chunk_t chunk, void* buffer); 182 loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer, 183 int* unmerged_exceptions); 184 185 int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset, 186 int unmerged_exceptions, bool* copy_op, bool* commit); 187 ChunkToSector(chunk_t chunk)188 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)189 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } 190 191 std::unique_ptr<CowReader> reader_; 192 BufferSink bufsink_; 193 194 std::string cow_device_; 195 std::string backing_store_device_; 196 std::string control_device_; 197 std::string misc_name_; 198 199 unique_fd cow_fd_; 200 unique_fd backing_store_fd_; 201 unique_fd ctrl_fd_; 202 203 std::shared_ptr<Snapuserd> snapuserd_; 204 uint32_t exceptions_per_area_; 205 }; 206 207 class Snapuserd : public std::enable_shared_from_this<Snapuserd> { 208 public: 209 Snapuserd(const std::string& misc_name, const std::string& cow_device, 210 const std::string& backing_device); 211 bool InitCowDevice(); 212 bool Start(); GetControlDevicePath()213 const std::string& GetControlDevicePath() { return control_device_; } GetMiscName()214 const std::string& GetMiscName() { return misc_name_; } GetNumSectors()215 uint64_t GetNumSectors() { return num_sectors_; } IsAttached()216 bool IsAttached() const { return attached_; } AttachControlDevice()217 void AttachControlDevice() { attached_ = true; } 218 219 void CheckMergeCompletionStatus(); 220 bool CommitMerge(int num_merge_ops); 221 CloseFds()222 void CloseFds() { cow_fd_ = {}; } FreeResources()223 void FreeResources() { 224 worker_threads_.clear(); 225 read_ahead_thread_ = nullptr; 226 } GetMetadataAreaSize()227 size_t GetMetadataAreaSize() { return vec_.size(); } GetExceptionBuffer(size_t i)228 void* GetExceptionBuffer(size_t i) { return vec_[i].get(); } 229 230 bool InitializeWorkers(); 231 std::unique_ptr<CowReader> CloneReaderForWorker(); GetSharedPtr()232 std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); } 233 GetChunkVec()234 std::vector<std::pair<sector_t, const CowOperation*>>& GetChunkVec() { return chunk_vec_; } GetMetadataVec()235 const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; } 236 compare(std::pair<sector_t,const CowOperation * > p1,std::pair<sector_t,const CowOperation * > p2)237 static bool compare(std::pair<sector_t, const CowOperation*> p1, 238 std::pair<sector_t, const CowOperation*> p2) { 239 return p1.first < p2.first; 240 } 241 242 void UnmapBufferRegion(); 243 bool MmapMetadata(); 244 245 // Read-ahead related functions GetReadAheadOpsVec()246 std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; } GetReadAheadMap()247 std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; } GetMappedAddr()248 void* GetMappedAddr() { return mapped_addr_; } IsReadAheadFeaturePresent()249 bool IsReadAheadFeaturePresent() { return read_ahead_feature_; } 250 void PrepareReadAhead(); 251 void StartReadAhead(); 252 void MergeCompleted(); 253 bool ReadAheadIOCompleted(bool sync); 254 void ReadAheadIOFailed(); 255 bool WaitForMergeToComplete(); 256 bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer); ReconstructDataFromCow()257 bool ReconstructDataFromCow() { return populate_data_from_cow_; } ReconstructDataFromCowFinish()258 void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; } 259 bool WaitForReadAheadToStart(); 260 261 uint64_t GetBufferMetadataOffset(); 262 size_t GetBufferMetadataSize(); 263 size_t GetBufferDataOffset(); 264 size_t GetBufferDataSize(); 265 266 // Final block to be merged in a given read-ahead buffer region SetFinalBlockMerged(uint64_t x)267 void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; } GetFinalBlockMerged()268 uint64_t GetFinalBlockMerged() { return final_block_merged_; } 269 // Total number of blocks to be merged in a given read-ahead buffer region SetTotalRaBlocksMerged(int x)270 void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; } GetTotalRaBlocksMerged()271 int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; } SetSocketPresent(bool socket)272 void SetSocketPresent(bool socket) { is_socket_present_ = socket; } 273 274 private: 275 bool IsChunkIdMetadata(chunk_t chunk); 276 chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); 277 278 bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer); 279 bool ReadMetadata(); ChunkToSector(chunk_t chunk)280 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)281 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } IsBlockAligned(int read_size)282 bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); } 283 struct BufferState* GetBufferState(); 284 285 void ReadBlocks(const std::string& partition_name, const std::string& dm_block_device); 286 void ReadBlocksToCache(const std::string& dm_block_device, const std::string& partition_name, 287 off_t offset, size_t size); 288 289 std::string cow_device_; 290 std::string backing_store_device_; 291 std::string control_device_; 292 std::string misc_name_; 293 294 unique_fd cow_fd_; 295 296 uint32_t exceptions_per_area_; 297 uint64_t num_sectors_; 298 299 std::unique_ptr<CowReader> reader_; 300 301 // Vector of disk exception which is a 302 // mapping of old-chunk to new-chunk 303 std::vector<std::unique_ptr<uint8_t[]>> vec_; 304 305 // chunk_vec stores the pseudo mapping of sector 306 // to COW operations. 307 std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_; 308 309 std::mutex lock_; 310 std::condition_variable cv; 311 312 void* mapped_addr_; 313 size_t total_mapped_addr_length_; 314 315 std::vector<std::unique_ptr<WorkerThread>> worker_threads_; 316 // Read-ahead related 317 std::unordered_map<uint64_t, void*> read_ahead_buffer_map_; 318 std::vector<const CowOperation*> read_ahead_ops_; 319 bool populate_data_from_cow_ = false; 320 bool read_ahead_feature_; 321 uint64_t final_block_merged_; 322 int total_ra_blocks_merged_ = 0; 323 READ_AHEAD_IO_TRANSITION io_state_; 324 std::unique_ptr<ReadAheadThread> read_ahead_thread_; 325 326 bool merge_initiated_ = false; 327 bool attached_ = false; 328 bool is_socket_present_; 329 }; 330 331 } // namespace snapshot 332 } // namespace android 333