1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server.devicepolicy;
18 
19 import static java.util.concurrent.TimeUnit.MILLISECONDS;
20 import static java.util.concurrent.TimeUnit.NANOSECONDS;
21 
22 import android.app.admin.DeviceAdminReceiver;
23 import android.app.admin.SecurityLog;
24 import android.app.admin.SecurityLog.SecurityEvent;
25 import android.os.Process;
26 import android.os.SystemClock;
27 import android.util.Log;
28 import android.util.Slog;
29 
30 import com.android.internal.annotations.GuardedBy;
31 import com.android.internal.annotations.VisibleForTesting;
32 
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.List;
36 import java.util.concurrent.Semaphore;
37 import java.util.concurrent.TimeUnit;
38 import java.util.concurrent.locks.Lock;
39 import java.util.concurrent.locks.ReentrantLock;
40 
41 /**
42  * A class managing access to the security logs. It maintains an internal buffer of pending
43  * logs to be retrieved by the device owner. The logs are retrieved from the logd daemon via
44  * JNI binding, and kept until device owner has retrieved to prevent loss of logs. Access to
45  * the logs from the device owner is rate-limited, and device owner is notified when the logs
46  * are ready to be retrieved. This happens every two hours, or when our internal buffer is
47  * larger than a certain threshold.
48  */
49 class SecurityLogMonitor implements Runnable {
50     private final DevicePolicyManagerService mService;
51 
52     private final Lock mLock = new ReentrantLock();
53 
54     private int mEnabledUser;
55 
SecurityLogMonitor(DevicePolicyManagerService service)56     SecurityLogMonitor(DevicePolicyManagerService service) {
57         this(service, 0 /* id */);
58     }
59 
60     @VisibleForTesting
SecurityLogMonitor(DevicePolicyManagerService service, long id)61     SecurityLogMonitor(DevicePolicyManagerService service, long id) {
62         mService = service;
63         mId = id;
64         mLastForceNanos = System.nanoTime();
65     }
66 
67     private static final boolean DEBUG = false;  // STOPSHIP if true.
68     private static final String TAG = "SecurityLogMonitor";
69     /**
70      * Each log entry can hold up to 4K bytes (but as of {@link android.os.Build.VERSION_CODES#N}
71      * it should be less than 100 bytes), setting 1024 entries as the threshold to notify Device
72      * Owner.
73      */
74     @VisibleForTesting static final int BUFFER_ENTRIES_NOTIFICATION_LEVEL = 1024;
75     /**
76      * The maximum number of entries we should store before dropping earlier logs, to limit the
77      * memory usage.
78      */
79     private static final int BUFFER_ENTRIES_MAXIMUM_LEVEL = BUFFER_ENTRIES_NOTIFICATION_LEVEL * 10;
80     /**
81      * Critical log buffer level, 90% of capacity.
82      */
83     private static final int BUFFER_ENTRIES_CRITICAL_LEVEL = BUFFER_ENTRIES_MAXIMUM_LEVEL * 9 / 10;
84     /**
85      * How often should Device Owner be notified under normal circumstances.
86      */
87     private static final long RATE_LIMIT_INTERVAL_MS = TimeUnit.HOURS.toMillis(2);
88     /**
89      * How often to retry the notification about available logs if it is ignored or missed by DO.
90      */
91     private static final long BROADCAST_RETRY_INTERVAL_MS = TimeUnit.MINUTES.toMillis(30);
92     /**
93      * Internally how often should the monitor poll the security logs from logd.
94      */
95     private static final long POLLING_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1);
96     /**
97      * Overlap between two subsequent log requests, required to avoid losing out of order events.
98      */
99     private static final long OVERLAP_NS = TimeUnit.SECONDS.toNanos(3);
100 
101     /** Minimum time between forced fetch attempts. */
102     private static final long FORCE_FETCH_THROTTLE_NS = TimeUnit.SECONDS.toNanos(10);
103 
104     @GuardedBy("mLock")
105     private Thread mMonitorThread = null;
106     @GuardedBy("mLock")
107     private ArrayList<SecurityEvent> mPendingLogs = new ArrayList<>();
108     @GuardedBy("mLock")
109     private long mId;
110     @GuardedBy("mLock")
111     private boolean mAllowedToRetrieve = false;
112 
113     // Whether we have already logged the fact that log buffer reached 90%, to avoid dupes.
114     @GuardedBy("mLock")
115     private boolean mCriticalLevelLogged = false;
116 
117     /**
118      * Last events fetched from log to check for overlap between batches. We can leave it empty if
119      * we are sure there will be no overlap anymore, e.g. when we get empty batch.
120      */
121     private final ArrayList<SecurityEvent> mLastEvents = new ArrayList<>();
122     /** Timestamp of the very last event, -1 means request from the beginning of time. */
123     private long mLastEventNanos = -1;
124 
125     /**
126      * When DO will be allowed to retrieve the log, in milliseconds since boot (as per
127      * {@link SystemClock#elapsedRealtime()}). After that it will mark the time to retry broadcast.
128      */
129     @GuardedBy("mLock")
130     private long mNextAllowedRetrievalTimeMillis = -1;
131     @GuardedBy("mLock")
132     private boolean mPaused = false;
133 
134     /** Semaphore used to force log fetching on request from adb. */
135     private final Semaphore mForceSemaphore = new Semaphore(0 /* permits */);
136 
137     /** The last timestamp when force fetch was used, to prevent abuse. */
138     @GuardedBy("mForceSemaphore")
139     private long mLastForceNanos = 0;
140 
141     /**
142      * Start security logging.
143      *
144      * @param enabledUser which user logging is enabled on, or USER_ALL to enable logging for all
145      *     users on the device.
146      */
start(int enabledUser)147     void start(int enabledUser) {
148         Slog.i(TAG, "Starting security logging for user " + enabledUser);
149         mEnabledUser = enabledUser;
150         SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STARTED);
151         mLock.lock();
152         try {
153             if (mMonitorThread == null) {
154                 mPendingLogs = new ArrayList<>();
155                 mCriticalLevelLogged = false;
156                 mId = 0;
157                 mAllowedToRetrieve = false;
158                 mNextAllowedRetrievalTimeMillis = -1;
159                 mPaused = false;
160 
161                 mMonitorThread = new Thread(this);
162                 mMonitorThread.start();
163             }
164         } finally {
165             mLock.unlock();
166         }
167     }
168 
stop()169     void stop() {
170         Slog.i(TAG, "Stopping security logging.");
171         SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STOPPED);
172         mLock.lock();
173         try {
174             if (mMonitorThread != null) {
175                 mMonitorThread.interrupt();
176                 try {
177                     mMonitorThread.join(TimeUnit.SECONDS.toMillis(5));
178                 } catch (InterruptedException e) {
179                     Log.e(TAG, "Interrupted while waiting for thread to stop", e);
180                 }
181                 // Reset state and clear buffer
182                 mPendingLogs = new ArrayList<>();
183                 mId = 0;
184                 mAllowedToRetrieve = false;
185                 mNextAllowedRetrievalTimeMillis = -1;
186                 mPaused = false;
187                 mMonitorThread = null;
188             }
189         } finally {
190             mLock.unlock();
191         }
192     }
193 
194     /**
195      * If logs are being collected, keep collecting them but stop notifying the device owner that
196      * new logs are available (since they cannot be retrieved).
197      */
pause()198     void pause() {
199         Slog.i(TAG, "Paused.");
200 
201         mLock.lock();
202         mPaused = true;
203         mAllowedToRetrieve = false;
204         mLock.unlock();
205     }
206 
207     /**
208      * If logs are being collected, start notifying the device owner when logs are ready to be
209      * retrieved again (if it was paused).
210      * <p>If logging is enabled and there are logs ready to be retrieved, this method will attempt
211      * to notify the device owner. Therefore calling identity should be cleared before calling it
212      * (in case the method is called from a user other than the DO's user).
213      */
resume()214     void resume() {
215         mLock.lock();
216         try {
217             if (!mPaused) {
218                 Log.d(TAG, "Attempted to resume, but logging is not paused.");
219                 return;
220             }
221             mPaused = false;
222             mAllowedToRetrieve = false;
223         } finally {
224             mLock.unlock();
225         }
226 
227         Slog.i(TAG, "Resumed.");
228         try {
229             notifyDeviceOwnerOrProfileOwnerIfNeeded(false /* force */);
230         } catch (InterruptedException e) {
231             Log.w(TAG, "Thread interrupted.", e);
232         }
233     }
234 
235     /**
236      * Discard all collected logs.
237      */
discardLogs()238     void discardLogs() {
239         mLock.lock();
240         mAllowedToRetrieve = false;
241         mPendingLogs = new ArrayList<>();
242         mCriticalLevelLogged = false;
243         mLock.unlock();
244         Slog.i(TAG, "Discarded all logs.");
245     }
246 
247     /**
248      * Returns the new batch of logs since the last call to this method. Returns null if
249      * rate limit is exceeded.
250      */
retrieveLogs()251     List<SecurityEvent> retrieveLogs() {
252         mLock.lock();
253         try {
254             if (mAllowedToRetrieve) {
255                 mAllowedToRetrieve = false;
256                 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
257                         + RATE_LIMIT_INTERVAL_MS;
258                 List<SecurityEvent> result = mPendingLogs;
259                 mPendingLogs = new ArrayList<>();
260                 mCriticalLevelLogged = false;
261                 return result;
262             } else {
263                 return null;
264             }
265         } finally {
266             mLock.unlock();
267         }
268     }
269 
270     /**
271      * Requests the next (or the first) batch of events from the log with appropriate timestamp.
272      */
getNextBatch(ArrayList<SecurityEvent> newLogs)273     private void getNextBatch(ArrayList<SecurityEvent> newLogs) throws IOException {
274         if (mLastEventNanos < 0) {
275             // Non-blocking read that returns all logs immediately.
276             if (DEBUG) Slog.d(TAG, "SecurityLog.readEvents");
277             SecurityLog.readEvents(newLogs);
278         } else {
279             // If we have last events from the previous batch, request log events with time overlap
280             // with previously retrieved messages to avoid losing events due to reordering in logd.
281             final long startNanos = mLastEvents.isEmpty()
282                     ? mLastEventNanos : Math.max(0, mLastEventNanos - OVERLAP_NS);
283             if (DEBUG) Slog.d(TAG, "SecurityLog.readEventsSince: " + startNanos);
284             // Non-blocking read that returns all logs with timestamps >= startNanos immediately.
285             SecurityLog.readEventsSince(startNanos, newLogs);
286         }
287 
288         // Sometimes events may be reordered in logd due to simultaneous readers and writers. In
289         // this case, we have to sort it to make overlap checking work. This is very unlikely.
290         for (int i = 0; i < newLogs.size() - 1; i++) {
291             if (newLogs.get(i).getTimeNanos() > newLogs.get(i+1).getTimeNanos()) {
292                 if (DEBUG) Slog.d(TAG, "Got out of order events, sorting.");
293                 // Sort using comparator that compares timestamps.
294                 newLogs.sort((e1, e2) -> Long.signum(e1.getTimeNanos() - e2.getTimeNanos()));
295                 break;
296             }
297         }
298         SecurityLog.redactEvents(newLogs, mEnabledUser);
299         if (DEBUG) Slog.d(TAG, "Got " + newLogs.size() + " new events.");
300     }
301 
302     /**
303      * Save the last events for overlap checking with the next batch.
304      */
saveLastEvents(ArrayList<SecurityEvent> newLogs)305     private void saveLastEvents(ArrayList<SecurityEvent> newLogs) {
306         mLastEvents.clear();
307         if (newLogs.isEmpty()) {
308             // This can happen if no events were logged yet or the buffer got cleared. In this case
309             // we aren't going to have any overlap next time, leave mLastEvents events empty.
310             return;
311         }
312 
313         // Save the last timestamp.
314         mLastEventNanos = newLogs.get(newLogs.size() - 1).getTimeNanos();
315         // Position of the earliest event that has to be saved. Start from the penultimate event,
316         // going backward.
317         int pos = newLogs.size() - 2;
318         while (pos >= 0 && mLastEventNanos - newLogs.get(pos).getTimeNanos() < OVERLAP_NS) {
319             pos--;
320         }
321         // We either run past the start of the list or encountered an event that is too old to keep.
322         pos++;
323         mLastEvents.addAll(newLogs.subList(pos, newLogs.size()));
324         if (DEBUG) Slog.d(TAG, mLastEvents.size() + " events saved for overlap check");
325     }
326 
327     /**
328      * Merges a new batch into already fetched logs and deals with overlapping and out of order
329      * events.
330      */
331     @GuardedBy("mLock")
mergeBatchLocked(final ArrayList<SecurityEvent> newLogs)332     private void mergeBatchLocked(final ArrayList<SecurityEvent> newLogs) {
333         // Reserve capacity so that copying doesn't occur.
334         mPendingLogs.ensureCapacity(mPendingLogs.size() + newLogs.size());
335         // Run through the first events of the batch to check if there is an overlap with previous
336         // batch and if so, skip overlapping events. Events are sorted by timestamp, so we can
337         // compare it in linear time by advancing two pointers, one for each batch.
338         int curPos = 0;
339         int lastPos = 0;
340         // For the first batch mLastEvents will be empty, so no iterations will happen.
341         while (lastPos < mLastEvents.size() && curPos < newLogs.size()) {
342             final SecurityEvent curEvent = newLogs.get(curPos);
343             final long currentNanos = curEvent.getTimeNanos();
344             if (currentNanos > mLastEventNanos) {
345                 // We got past the last event of the last batch, no overlap possible anymore.
346                 break;
347             }
348             final SecurityEvent lastEvent = mLastEvents.get(lastPos);
349             final long lastNanos = lastEvent.getTimeNanos();
350             if (lastNanos > currentNanos) {
351                 // New event older than the last we've seen so far, must be due to reordering.
352                 if (DEBUG) Slog.d(TAG, "New event in the overlap: " + currentNanos);
353                 assignLogId(curEvent);
354                 mPendingLogs.add(curEvent);
355                 curPos++;
356             } else if (lastNanos < currentNanos) {
357                 if (DEBUG) Slog.d(TAG, "Event disappeared from the overlap: " + lastNanos);
358                 lastPos++;
359             } else {
360                 // Two events have the same timestamp, check if they are the same.
361                 if (lastEvent.eventEquals(curEvent)) {
362                     // Actual overlap, just skip the event.
363                     if (DEBUG) Slog.d(TAG, "Skipped dup event with timestamp: " + lastNanos);
364                 } else {
365                     // Wow, what a coincidence, or probably the clock is too coarse.
366                     assignLogId(curEvent);
367                     mPendingLogs.add(curEvent);
368                     if (DEBUG) Slog.d(TAG, "Event timestamp collision: " + lastNanos);
369                 }
370                 lastPos++;
371                 curPos++;
372             }
373         }
374         // Assign an id to the new logs, after the overlap with mLastEvents.
375         List<SecurityEvent> idLogs = newLogs.subList(curPos, newLogs.size());
376         for (SecurityEvent event : idLogs) {
377             assignLogId(event);
378         }
379         // Save the rest of the new batch.
380         mPendingLogs.addAll(idLogs);
381 
382         checkCriticalLevel();
383 
384         if (mPendingLogs.size() > BUFFER_ENTRIES_MAXIMUM_LEVEL) {
385             // Truncate buffer down to half of BUFFER_ENTRIES_MAXIMUM_LEVEL.
386             mPendingLogs = new ArrayList<>(mPendingLogs.subList(
387                     mPendingLogs.size() - (BUFFER_ENTRIES_MAXIMUM_LEVEL / 2),
388                     mPendingLogs.size()));
389             mCriticalLevelLogged = false;
390             Slog.i(TAG, "Pending logs buffer full. Discarding old logs.");
391         }
392         if (DEBUG) {
393             if (mPendingLogs.size() > 0) {
394                 Slog.d(TAG, mPendingLogs.size() + " pending events in the buffer after merging,"
395                         + " with ids " + mPendingLogs.get(0).getId()
396                         + " to " + mPendingLogs.get(mPendingLogs.size() - 1).getId());
397             } else {
398                 Slog.d(TAG, "0 pending events in the buffer after merging");
399             }
400         }
401     }
402 
403     @GuardedBy("mLock")
checkCriticalLevel()404     private void checkCriticalLevel() {
405         if (!SecurityLog.isLoggingEnabled()) {
406             return;
407         }
408 
409         if (mPendingLogs.size() >= BUFFER_ENTRIES_CRITICAL_LEVEL) {
410             if (!mCriticalLevelLogged) {
411                 mCriticalLevelLogged = true;
412                 SecurityLog.writeEvent(SecurityLog.TAG_LOG_BUFFER_SIZE_CRITICAL);
413             }
414         }
415     }
416 
417     @GuardedBy("mLock")
assignLogId(SecurityEvent event)418     private void assignLogId(SecurityEvent event) {
419         event.setId(mId);
420         if (mId == Long.MAX_VALUE) {
421             Slog.i(TAG, "Reached maximum id value; wrapping around.");
422             mId = 0;
423         } else {
424             mId++;
425         }
426     }
427 
428     @Override
run()429     public void run() {
430         Process.setThreadPriority(Process.THREAD_PRIORITY_BACKGROUND);
431 
432         final ArrayList<SecurityEvent> newLogs = new ArrayList<>();
433         while (!Thread.currentThread().isInterrupted()) {
434             try {
435                 final boolean force = mForceSemaphore.tryAcquire(POLLING_INTERVAL_MS, MILLISECONDS);
436 
437                 getNextBatch(newLogs);
438 
439                 mLock.lockInterruptibly();
440                 try {
441                     mergeBatchLocked(newLogs);
442                 } finally {
443                     mLock.unlock();
444                 }
445 
446                 saveLastEvents(newLogs);
447                 newLogs.clear();
448                 notifyDeviceOwnerOrProfileOwnerIfNeeded(force);
449             } catch (IOException e) {
450                 Log.e(TAG, "Failed to read security log", e);
451             } catch (InterruptedException e) {
452                 Log.i(TAG, "Thread interrupted, exiting.", e);
453                 // We are asked to stop.
454                 break;
455             }
456         }
457 
458         // Discard previous batch info.
459         mLastEvents.clear();
460         if (mLastEventNanos != -1) {
461             // Make sure we don't read old events if logging is re-enabled. Since mLastEvents is
462             // empty, the next request will be done without overlap, so it is enough to add 1 ns.
463             mLastEventNanos += 1;
464         }
465 
466         Slog.i(TAG, "MonitorThread exit.");
467     }
468 
notifyDeviceOwnerOrProfileOwnerIfNeeded(boolean force)469     private void notifyDeviceOwnerOrProfileOwnerIfNeeded(boolean force)
470             throws InterruptedException {
471         boolean allowRetrievalAndNotifyDOOrPO = false;
472         mLock.lockInterruptibly();
473         try {
474             if (mPaused) {
475                 return;
476             }
477             final int logSize = mPendingLogs.size();
478             if (logSize >= BUFFER_ENTRIES_NOTIFICATION_LEVEL || (force && logSize > 0)) {
479                 // Allow DO to retrieve logs if too many pending logs or if forced.
480                 if (!mAllowedToRetrieve) {
481                     allowRetrievalAndNotifyDOOrPO = true;
482                 }
483                 if (DEBUG) Slog.d(TAG, "Number of log entries over threshold: " + logSize);
484             }
485             if (logSize > 0 && SystemClock.elapsedRealtime() >= mNextAllowedRetrievalTimeMillis) {
486                 // Rate limit reset
487                 allowRetrievalAndNotifyDOOrPO = true;
488                 if (DEBUG) Slog.d(TAG, "Timeout reached");
489             }
490             if (allowRetrievalAndNotifyDOOrPO) {
491                 mAllowedToRetrieve = true;
492                 // Set the timeout to retry the notification if the DO misses it.
493                 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
494                         + BROADCAST_RETRY_INTERVAL_MS;
495             }
496         } finally {
497             mLock.unlock();
498         }
499         if (allowRetrievalAndNotifyDOOrPO) {
500             Slog.i(TAG, "notify DO or PO");
501             mService.sendDeviceOwnerOrProfileOwnerCommand(
502                     DeviceAdminReceiver.ACTION_SECURITY_LOGS_AVAILABLE, null, mEnabledUser);
503         }
504     }
505 
506     /**
507      * Forces the logs to be fetched and made available. Returns 0 on success or timeout to wait
508      * before attempting in milliseconds.
509      */
forceLogs()510     public long forceLogs() {
511         final long nowNanos = System.nanoTime();
512         // We only synchronize with another calls to this function, not with the fetching thread.
513         synchronized (mForceSemaphore) {
514             final long toWaitNanos = mLastForceNanos + FORCE_FETCH_THROTTLE_NS - nowNanos;
515             if (toWaitNanos > 0) {
516                 return NANOSECONDS.toMillis(toWaitNanos) + 1; // Round up.
517             }
518             mLastForceNanos = nowNanos;
519             // There is a race condition with the fetching thread below, but if the last permit is
520             // acquired just after we do the check, logs are forced anyway and that's what we need.
521             if (mForceSemaphore.availablePermits() == 0) {
522                 mForceSemaphore.release();
523             }
524             return 0;
525         }
526     }
527 }
528