1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server.devicepolicy; 18 19 import static java.util.concurrent.TimeUnit.MILLISECONDS; 20 import static java.util.concurrent.TimeUnit.NANOSECONDS; 21 22 import android.app.admin.DeviceAdminReceiver; 23 import android.app.admin.SecurityLog; 24 import android.app.admin.SecurityLog.SecurityEvent; 25 import android.os.Process; 26 import android.os.SystemClock; 27 import android.util.Log; 28 import android.util.Slog; 29 30 import com.android.internal.annotations.GuardedBy; 31 import com.android.internal.annotations.VisibleForTesting; 32 33 import java.io.IOException; 34 import java.util.ArrayList; 35 import java.util.List; 36 import java.util.concurrent.Semaphore; 37 import java.util.concurrent.TimeUnit; 38 import java.util.concurrent.locks.Lock; 39 import java.util.concurrent.locks.ReentrantLock; 40 41 /** 42 * A class managing access to the security logs. It maintains an internal buffer of pending 43 * logs to be retrieved by the device owner. The logs are retrieved from the logd daemon via 44 * JNI binding, and kept until device owner has retrieved to prevent loss of logs. Access to 45 * the logs from the device owner is rate-limited, and device owner is notified when the logs 46 * are ready to be retrieved. This happens every two hours, or when our internal buffer is 47 * larger than a certain threshold. 48 */ 49 class SecurityLogMonitor implements Runnable { 50 private final DevicePolicyManagerService mService; 51 52 private final Lock mLock = new ReentrantLock(); 53 54 private int mEnabledUser; 55 SecurityLogMonitor(DevicePolicyManagerService service)56 SecurityLogMonitor(DevicePolicyManagerService service) { 57 this(service, 0 /* id */); 58 } 59 60 @VisibleForTesting SecurityLogMonitor(DevicePolicyManagerService service, long id)61 SecurityLogMonitor(DevicePolicyManagerService service, long id) { 62 mService = service; 63 mId = id; 64 mLastForceNanos = System.nanoTime(); 65 } 66 67 private static final boolean DEBUG = false; // STOPSHIP if true. 68 private static final String TAG = "SecurityLogMonitor"; 69 /** 70 * Each log entry can hold up to 4K bytes (but as of {@link android.os.Build.VERSION_CODES#N} 71 * it should be less than 100 bytes), setting 1024 entries as the threshold to notify Device 72 * Owner. 73 */ 74 @VisibleForTesting static final int BUFFER_ENTRIES_NOTIFICATION_LEVEL = 1024; 75 /** 76 * The maximum number of entries we should store before dropping earlier logs, to limit the 77 * memory usage. 78 */ 79 private static final int BUFFER_ENTRIES_MAXIMUM_LEVEL = BUFFER_ENTRIES_NOTIFICATION_LEVEL * 10; 80 /** 81 * Critical log buffer level, 90% of capacity. 82 */ 83 private static final int BUFFER_ENTRIES_CRITICAL_LEVEL = BUFFER_ENTRIES_MAXIMUM_LEVEL * 9 / 10; 84 /** 85 * How often should Device Owner be notified under normal circumstances. 86 */ 87 private static final long RATE_LIMIT_INTERVAL_MS = TimeUnit.HOURS.toMillis(2); 88 /** 89 * How often to retry the notification about available logs if it is ignored or missed by DO. 90 */ 91 private static final long BROADCAST_RETRY_INTERVAL_MS = TimeUnit.MINUTES.toMillis(30); 92 /** 93 * Internally how often should the monitor poll the security logs from logd. 94 */ 95 private static final long POLLING_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1); 96 /** 97 * Overlap between two subsequent log requests, required to avoid losing out of order events. 98 */ 99 private static final long OVERLAP_NS = TimeUnit.SECONDS.toNanos(3); 100 101 /** Minimum time between forced fetch attempts. */ 102 private static final long FORCE_FETCH_THROTTLE_NS = TimeUnit.SECONDS.toNanos(10); 103 104 @GuardedBy("mLock") 105 private Thread mMonitorThread = null; 106 @GuardedBy("mLock") 107 private ArrayList<SecurityEvent> mPendingLogs = new ArrayList<>(); 108 @GuardedBy("mLock") 109 private long mId; 110 @GuardedBy("mLock") 111 private boolean mAllowedToRetrieve = false; 112 113 // Whether we have already logged the fact that log buffer reached 90%, to avoid dupes. 114 @GuardedBy("mLock") 115 private boolean mCriticalLevelLogged = false; 116 117 /** 118 * Last events fetched from log to check for overlap between batches. We can leave it empty if 119 * we are sure there will be no overlap anymore, e.g. when we get empty batch. 120 */ 121 private final ArrayList<SecurityEvent> mLastEvents = new ArrayList<>(); 122 /** Timestamp of the very last event, -1 means request from the beginning of time. */ 123 private long mLastEventNanos = -1; 124 125 /** 126 * When DO will be allowed to retrieve the log, in milliseconds since boot (as per 127 * {@link SystemClock#elapsedRealtime()}). After that it will mark the time to retry broadcast. 128 */ 129 @GuardedBy("mLock") 130 private long mNextAllowedRetrievalTimeMillis = -1; 131 @GuardedBy("mLock") 132 private boolean mPaused = false; 133 134 /** Semaphore used to force log fetching on request from adb. */ 135 private final Semaphore mForceSemaphore = new Semaphore(0 /* permits */); 136 137 /** The last timestamp when force fetch was used, to prevent abuse. */ 138 @GuardedBy("mForceSemaphore") 139 private long mLastForceNanos = 0; 140 141 /** 142 * Start security logging. 143 * 144 * @param enabledUser which user logging is enabled on, or USER_ALL to enable logging for all 145 * users on the device. 146 */ start(int enabledUser)147 void start(int enabledUser) { 148 Slog.i(TAG, "Starting security logging for user " + enabledUser); 149 mEnabledUser = enabledUser; 150 SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STARTED); 151 mLock.lock(); 152 try { 153 if (mMonitorThread == null) { 154 mPendingLogs = new ArrayList<>(); 155 mCriticalLevelLogged = false; 156 mId = 0; 157 mAllowedToRetrieve = false; 158 mNextAllowedRetrievalTimeMillis = -1; 159 mPaused = false; 160 161 mMonitorThread = new Thread(this); 162 mMonitorThread.start(); 163 } 164 } finally { 165 mLock.unlock(); 166 } 167 } 168 stop()169 void stop() { 170 Slog.i(TAG, "Stopping security logging."); 171 SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STOPPED); 172 mLock.lock(); 173 try { 174 if (mMonitorThread != null) { 175 mMonitorThread.interrupt(); 176 try { 177 mMonitorThread.join(TimeUnit.SECONDS.toMillis(5)); 178 } catch (InterruptedException e) { 179 Log.e(TAG, "Interrupted while waiting for thread to stop", e); 180 } 181 // Reset state and clear buffer 182 mPendingLogs = new ArrayList<>(); 183 mId = 0; 184 mAllowedToRetrieve = false; 185 mNextAllowedRetrievalTimeMillis = -1; 186 mPaused = false; 187 mMonitorThread = null; 188 } 189 } finally { 190 mLock.unlock(); 191 } 192 } 193 194 /** 195 * If logs are being collected, keep collecting them but stop notifying the device owner that 196 * new logs are available (since they cannot be retrieved). 197 */ pause()198 void pause() { 199 Slog.i(TAG, "Paused."); 200 201 mLock.lock(); 202 mPaused = true; 203 mAllowedToRetrieve = false; 204 mLock.unlock(); 205 } 206 207 /** 208 * If logs are being collected, start notifying the device owner when logs are ready to be 209 * retrieved again (if it was paused). 210 * <p>If logging is enabled and there are logs ready to be retrieved, this method will attempt 211 * to notify the device owner. Therefore calling identity should be cleared before calling it 212 * (in case the method is called from a user other than the DO's user). 213 */ resume()214 void resume() { 215 mLock.lock(); 216 try { 217 if (!mPaused) { 218 Log.d(TAG, "Attempted to resume, but logging is not paused."); 219 return; 220 } 221 mPaused = false; 222 mAllowedToRetrieve = false; 223 } finally { 224 mLock.unlock(); 225 } 226 227 Slog.i(TAG, "Resumed."); 228 try { 229 notifyDeviceOwnerOrProfileOwnerIfNeeded(false /* force */); 230 } catch (InterruptedException e) { 231 Log.w(TAG, "Thread interrupted.", e); 232 } 233 } 234 235 /** 236 * Discard all collected logs. 237 */ discardLogs()238 void discardLogs() { 239 mLock.lock(); 240 mAllowedToRetrieve = false; 241 mPendingLogs = new ArrayList<>(); 242 mCriticalLevelLogged = false; 243 mLock.unlock(); 244 Slog.i(TAG, "Discarded all logs."); 245 } 246 247 /** 248 * Returns the new batch of logs since the last call to this method. Returns null if 249 * rate limit is exceeded. 250 */ retrieveLogs()251 List<SecurityEvent> retrieveLogs() { 252 mLock.lock(); 253 try { 254 if (mAllowedToRetrieve) { 255 mAllowedToRetrieve = false; 256 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime() 257 + RATE_LIMIT_INTERVAL_MS; 258 List<SecurityEvent> result = mPendingLogs; 259 mPendingLogs = new ArrayList<>(); 260 mCriticalLevelLogged = false; 261 return result; 262 } else { 263 return null; 264 } 265 } finally { 266 mLock.unlock(); 267 } 268 } 269 270 /** 271 * Requests the next (or the first) batch of events from the log with appropriate timestamp. 272 */ getNextBatch(ArrayList<SecurityEvent> newLogs)273 private void getNextBatch(ArrayList<SecurityEvent> newLogs) throws IOException { 274 if (mLastEventNanos < 0) { 275 // Non-blocking read that returns all logs immediately. 276 if (DEBUG) Slog.d(TAG, "SecurityLog.readEvents"); 277 SecurityLog.readEvents(newLogs); 278 } else { 279 // If we have last events from the previous batch, request log events with time overlap 280 // with previously retrieved messages to avoid losing events due to reordering in logd. 281 final long startNanos = mLastEvents.isEmpty() 282 ? mLastEventNanos : Math.max(0, mLastEventNanos - OVERLAP_NS); 283 if (DEBUG) Slog.d(TAG, "SecurityLog.readEventsSince: " + startNanos); 284 // Non-blocking read that returns all logs with timestamps >= startNanos immediately. 285 SecurityLog.readEventsSince(startNanos, newLogs); 286 } 287 288 // Sometimes events may be reordered in logd due to simultaneous readers and writers. In 289 // this case, we have to sort it to make overlap checking work. This is very unlikely. 290 for (int i = 0; i < newLogs.size() - 1; i++) { 291 if (newLogs.get(i).getTimeNanos() > newLogs.get(i+1).getTimeNanos()) { 292 if (DEBUG) Slog.d(TAG, "Got out of order events, sorting."); 293 // Sort using comparator that compares timestamps. 294 newLogs.sort((e1, e2) -> Long.signum(e1.getTimeNanos() - e2.getTimeNanos())); 295 break; 296 } 297 } 298 SecurityLog.redactEvents(newLogs, mEnabledUser); 299 if (DEBUG) Slog.d(TAG, "Got " + newLogs.size() + " new events."); 300 } 301 302 /** 303 * Save the last events for overlap checking with the next batch. 304 */ saveLastEvents(ArrayList<SecurityEvent> newLogs)305 private void saveLastEvents(ArrayList<SecurityEvent> newLogs) { 306 mLastEvents.clear(); 307 if (newLogs.isEmpty()) { 308 // This can happen if no events were logged yet or the buffer got cleared. In this case 309 // we aren't going to have any overlap next time, leave mLastEvents events empty. 310 return; 311 } 312 313 // Save the last timestamp. 314 mLastEventNanos = newLogs.get(newLogs.size() - 1).getTimeNanos(); 315 // Position of the earliest event that has to be saved. Start from the penultimate event, 316 // going backward. 317 int pos = newLogs.size() - 2; 318 while (pos >= 0 && mLastEventNanos - newLogs.get(pos).getTimeNanos() < OVERLAP_NS) { 319 pos--; 320 } 321 // We either run past the start of the list or encountered an event that is too old to keep. 322 pos++; 323 mLastEvents.addAll(newLogs.subList(pos, newLogs.size())); 324 if (DEBUG) Slog.d(TAG, mLastEvents.size() + " events saved for overlap check"); 325 } 326 327 /** 328 * Merges a new batch into already fetched logs and deals with overlapping and out of order 329 * events. 330 */ 331 @GuardedBy("mLock") mergeBatchLocked(final ArrayList<SecurityEvent> newLogs)332 private void mergeBatchLocked(final ArrayList<SecurityEvent> newLogs) { 333 // Reserve capacity so that copying doesn't occur. 334 mPendingLogs.ensureCapacity(mPendingLogs.size() + newLogs.size()); 335 // Run through the first events of the batch to check if there is an overlap with previous 336 // batch and if so, skip overlapping events. Events are sorted by timestamp, so we can 337 // compare it in linear time by advancing two pointers, one for each batch. 338 int curPos = 0; 339 int lastPos = 0; 340 // For the first batch mLastEvents will be empty, so no iterations will happen. 341 while (lastPos < mLastEvents.size() && curPos < newLogs.size()) { 342 final SecurityEvent curEvent = newLogs.get(curPos); 343 final long currentNanos = curEvent.getTimeNanos(); 344 if (currentNanos > mLastEventNanos) { 345 // We got past the last event of the last batch, no overlap possible anymore. 346 break; 347 } 348 final SecurityEvent lastEvent = mLastEvents.get(lastPos); 349 final long lastNanos = lastEvent.getTimeNanos(); 350 if (lastNanos > currentNanos) { 351 // New event older than the last we've seen so far, must be due to reordering. 352 if (DEBUG) Slog.d(TAG, "New event in the overlap: " + currentNanos); 353 assignLogId(curEvent); 354 mPendingLogs.add(curEvent); 355 curPos++; 356 } else if (lastNanos < currentNanos) { 357 if (DEBUG) Slog.d(TAG, "Event disappeared from the overlap: " + lastNanos); 358 lastPos++; 359 } else { 360 // Two events have the same timestamp, check if they are the same. 361 if (lastEvent.eventEquals(curEvent)) { 362 // Actual overlap, just skip the event. 363 if (DEBUG) Slog.d(TAG, "Skipped dup event with timestamp: " + lastNanos); 364 } else { 365 // Wow, what a coincidence, or probably the clock is too coarse. 366 assignLogId(curEvent); 367 mPendingLogs.add(curEvent); 368 if (DEBUG) Slog.d(TAG, "Event timestamp collision: " + lastNanos); 369 } 370 lastPos++; 371 curPos++; 372 } 373 } 374 // Assign an id to the new logs, after the overlap with mLastEvents. 375 List<SecurityEvent> idLogs = newLogs.subList(curPos, newLogs.size()); 376 for (SecurityEvent event : idLogs) { 377 assignLogId(event); 378 } 379 // Save the rest of the new batch. 380 mPendingLogs.addAll(idLogs); 381 382 checkCriticalLevel(); 383 384 if (mPendingLogs.size() > BUFFER_ENTRIES_MAXIMUM_LEVEL) { 385 // Truncate buffer down to half of BUFFER_ENTRIES_MAXIMUM_LEVEL. 386 mPendingLogs = new ArrayList<>(mPendingLogs.subList( 387 mPendingLogs.size() - (BUFFER_ENTRIES_MAXIMUM_LEVEL / 2), 388 mPendingLogs.size())); 389 mCriticalLevelLogged = false; 390 Slog.i(TAG, "Pending logs buffer full. Discarding old logs."); 391 } 392 if (DEBUG) { 393 if (mPendingLogs.size() > 0) { 394 Slog.d(TAG, mPendingLogs.size() + " pending events in the buffer after merging," 395 + " with ids " + mPendingLogs.get(0).getId() 396 + " to " + mPendingLogs.get(mPendingLogs.size() - 1).getId()); 397 } else { 398 Slog.d(TAG, "0 pending events in the buffer after merging"); 399 } 400 } 401 } 402 403 @GuardedBy("mLock") checkCriticalLevel()404 private void checkCriticalLevel() { 405 if (!SecurityLog.isLoggingEnabled()) { 406 return; 407 } 408 409 if (mPendingLogs.size() >= BUFFER_ENTRIES_CRITICAL_LEVEL) { 410 if (!mCriticalLevelLogged) { 411 mCriticalLevelLogged = true; 412 SecurityLog.writeEvent(SecurityLog.TAG_LOG_BUFFER_SIZE_CRITICAL); 413 } 414 } 415 } 416 417 @GuardedBy("mLock") assignLogId(SecurityEvent event)418 private void assignLogId(SecurityEvent event) { 419 event.setId(mId); 420 if (mId == Long.MAX_VALUE) { 421 Slog.i(TAG, "Reached maximum id value; wrapping around."); 422 mId = 0; 423 } else { 424 mId++; 425 } 426 } 427 428 @Override run()429 public void run() { 430 Process.setThreadPriority(Process.THREAD_PRIORITY_BACKGROUND); 431 432 final ArrayList<SecurityEvent> newLogs = new ArrayList<>(); 433 while (!Thread.currentThread().isInterrupted()) { 434 try { 435 final boolean force = mForceSemaphore.tryAcquire(POLLING_INTERVAL_MS, MILLISECONDS); 436 437 getNextBatch(newLogs); 438 439 mLock.lockInterruptibly(); 440 try { 441 mergeBatchLocked(newLogs); 442 } finally { 443 mLock.unlock(); 444 } 445 446 saveLastEvents(newLogs); 447 newLogs.clear(); 448 notifyDeviceOwnerOrProfileOwnerIfNeeded(force); 449 } catch (IOException e) { 450 Log.e(TAG, "Failed to read security log", e); 451 } catch (InterruptedException e) { 452 Log.i(TAG, "Thread interrupted, exiting.", e); 453 // We are asked to stop. 454 break; 455 } 456 } 457 458 // Discard previous batch info. 459 mLastEvents.clear(); 460 if (mLastEventNanos != -1) { 461 // Make sure we don't read old events if logging is re-enabled. Since mLastEvents is 462 // empty, the next request will be done without overlap, so it is enough to add 1 ns. 463 mLastEventNanos += 1; 464 } 465 466 Slog.i(TAG, "MonitorThread exit."); 467 } 468 notifyDeviceOwnerOrProfileOwnerIfNeeded(boolean force)469 private void notifyDeviceOwnerOrProfileOwnerIfNeeded(boolean force) 470 throws InterruptedException { 471 boolean allowRetrievalAndNotifyDOOrPO = false; 472 mLock.lockInterruptibly(); 473 try { 474 if (mPaused) { 475 return; 476 } 477 final int logSize = mPendingLogs.size(); 478 if (logSize >= BUFFER_ENTRIES_NOTIFICATION_LEVEL || (force && logSize > 0)) { 479 // Allow DO to retrieve logs if too many pending logs or if forced. 480 if (!mAllowedToRetrieve) { 481 allowRetrievalAndNotifyDOOrPO = true; 482 } 483 if (DEBUG) Slog.d(TAG, "Number of log entries over threshold: " + logSize); 484 } 485 if (logSize > 0 && SystemClock.elapsedRealtime() >= mNextAllowedRetrievalTimeMillis) { 486 // Rate limit reset 487 allowRetrievalAndNotifyDOOrPO = true; 488 if (DEBUG) Slog.d(TAG, "Timeout reached"); 489 } 490 if (allowRetrievalAndNotifyDOOrPO) { 491 mAllowedToRetrieve = true; 492 // Set the timeout to retry the notification if the DO misses it. 493 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime() 494 + BROADCAST_RETRY_INTERVAL_MS; 495 } 496 } finally { 497 mLock.unlock(); 498 } 499 if (allowRetrievalAndNotifyDOOrPO) { 500 Slog.i(TAG, "notify DO or PO"); 501 mService.sendDeviceOwnerOrProfileOwnerCommand( 502 DeviceAdminReceiver.ACTION_SECURITY_LOGS_AVAILABLE, null, mEnabledUser); 503 } 504 } 505 506 /** 507 * Forces the logs to be fetched and made available. Returns 0 on success or timeout to wait 508 * before attempting in milliseconds. 509 */ forceLogs()510 public long forceLogs() { 511 final long nowNanos = System.nanoTime(); 512 // We only synchronize with another calls to this function, not with the fetching thread. 513 synchronized (mForceSemaphore) { 514 final long toWaitNanos = mLastForceNanos + FORCE_FETCH_THROTTLE_NS - nowNanos; 515 if (toWaitNanos > 0) { 516 return NANOSECONDS.toMillis(toWaitNanos) + 1; // Round up. 517 } 518 mLastForceNanos = nowNanos; 519 // There is a race condition with the fetching thread below, but if the last permit is 520 // acquired just after we do the check, logs are forced anyway and that's what we need. 521 if (mForceSemaphore.availablePermits() == 0) { 522 mForceSemaphore.release(); 523 } 524 return 0; 525 } 526 } 527 } 528