1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.os;
18 
19 import android.util.Log;
20 import android.util.proto.ProtoOutputStream;
21 
22 import java.util.Arrays;
23 
24 /**
25  * A simple pattern matcher, which is safe to use on untrusted data: it does
26  * not provide full reg-exp support, only simple globbing that can not be
27  * used maliciously.
28  */
29 public class PatternMatcher implements Parcelable {
30     /**
31      * Pattern type: the given pattern must exactly match the string it is
32      * tested against.
33      */
34     public static final int PATTERN_LITERAL = 0;
35 
36     /**
37      * Pattern type: the given pattern must match the
38      * beginning of the string it is tested against.
39      */
40     public static final int PATTERN_PREFIX = 1;
41 
42     /**
43      * Pattern type: the given pattern is interpreted with a
44      * simple glob syntax for matching against the string it is tested against.
45      * In this syntax, you can use the '*' character to match against zero or
46      * more occurrences of the character immediately before.  If the
47      * character before it is '.' it will match any character.  The character
48      * '\' can be used as an escape.  This essentially provides only the '*'
49      * wildcard part of a normal regexp.
50      */
51     public static final int PATTERN_SIMPLE_GLOB = 2;
52 
53     /**
54      * Pattern type: the given pattern is interpreted with a regular
55      * expression-like syntax for matching against the string it is tested
56      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
57      * with full support for character ranges and the not ({@code ^}) modifier.
58      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
59      * for one-or-more and full range ({@code {...}}) support. This is a simple
60      * evaluation implementation in which matching is done against the pattern in
61      * real time with no backtracking support.
62      */
63     public static final int PATTERN_ADVANCED_GLOB = 3;
64 
65     /**
66      * Pattern type: the given pattern must match the
67      * end of the string it is tested against.
68      */
69     public static final int PATTERN_SUFFIX = 4;
70 
71     // token types for advanced matching
72     private static final int TOKEN_TYPE_LITERAL = 0;
73     private static final int TOKEN_TYPE_ANY = 1;
74     private static final int TOKEN_TYPE_SET = 2;
75     private static final int TOKEN_TYPE_INVERSE_SET = 3;
76 
77     // Return for no match
78     private static final int NO_MATCH = -1;
79 
80     private static final String TAG = "PatternMatcher";
81 
82     // Parsed placeholders for advanced patterns
83     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
84     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
85     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
86     private static final int PARSED_TOKEN_CHAR_ANY = -4;
87     private static final int PARSED_MODIFIER_RANGE_START = -5;
88     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
89     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
90     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
91 
92     private final String mPattern;
93     private final int mType;
94     private final int[] mParsedPattern;
95 
96 
97     private static final int MAX_PATTERN_STORAGE = 2048;
98     // workspace to use for building a parsed advanced pattern;
99     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
100 
PatternMatcher(String pattern, int type)101     public PatternMatcher(String pattern, int type) {
102         mPattern = pattern;
103         mType = type;
104         if (mType == PATTERN_ADVANCED_GLOB) {
105             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
106         } else {
107             mParsedPattern = null;
108         }
109     }
110 
getPath()111     public final String getPath() {
112         return mPattern;
113     }
114 
getType()115     public final int getType() {
116         return mType;
117     }
118 
match(String str)119     public boolean match(String str) {
120         return matchPattern(str, mPattern, mParsedPattern, mType);
121     }
122 
toString()123     public String toString() {
124         String type = "? ";
125         switch (mType) {
126             case PATTERN_LITERAL:
127                 type = "LITERAL: ";
128                 break;
129             case PATTERN_PREFIX:
130                 type = "PREFIX: ";
131                 break;
132             case PATTERN_SIMPLE_GLOB:
133                 type = "GLOB: ";
134                 break;
135             case PATTERN_ADVANCED_GLOB:
136                 type = "ADVANCED: ";
137                 break;
138             case PATTERN_SUFFIX:
139                 type = "SUFFIX: ";
140                 break;
141         }
142         return "PatternMatcher{" + type + mPattern + "}";
143     }
144 
145     /** @hide */
dumpDebug(ProtoOutputStream proto, long fieldId)146     public void dumpDebug(ProtoOutputStream proto, long fieldId) {
147         long token = proto.start(fieldId);
148         proto.write(PatternMatcherProto.PATTERN, mPattern);
149         proto.write(PatternMatcherProto.TYPE, mType);
150         // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
151         // match the current data structure.
152         proto.end(token);
153     }
154 
155     /**
156      * Perform a check on the matcher for the pattern type of {@link #PATTERN_ADVANCED_GLOB}.
157      * Return true if it passed.
158      * @hide
159      */
check()160     public boolean check() {
161         try {
162             if (mType == PATTERN_ADVANCED_GLOB) {
163                 return Arrays.equals(mParsedPattern, parseAndVerifyAdvancedPattern(mPattern));
164             }
165         } catch (IllegalArgumentException e) {
166             Log.w(TAG, "Failed to verify advanced pattern: " + e.getMessage());
167             return false;
168         }
169         return true;
170     }
171 
describeContents()172     public int describeContents() {
173         return 0;
174     }
175 
writeToParcel(Parcel dest, int flags)176     public void writeToParcel(Parcel dest, int flags) {
177         dest.writeString(mPattern);
178         dest.writeInt(mType);
179         dest.writeIntArray(mParsedPattern);
180     }
181 
PatternMatcher(Parcel src)182     public PatternMatcher(Parcel src) {
183         mPattern = src.readString();
184         mType = src.readInt();
185         mParsedPattern = src.createIntArray();
186     }
187 
188     public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR
189             = new Parcelable.Creator<PatternMatcher>() {
190         public PatternMatcher createFromParcel(Parcel source) {
191             return new PatternMatcher(source);
192         }
193 
194         public PatternMatcher[] newArray(int size) {
195             return new PatternMatcher[size];
196         }
197     };
198 
matchPattern(String match, String pattern, int[] parsedPattern, int type)199     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
200         if (match == null) return false;
201         if (type == PATTERN_LITERAL) {
202             return pattern.equals(match);
203         } if (type == PATTERN_PREFIX) {
204             return match.startsWith(pattern);
205         } else if (type == PATTERN_SIMPLE_GLOB) {
206             return matchGlobPattern(pattern, match);
207         } else if (type == PATTERN_ADVANCED_GLOB) {
208             return matchAdvancedPattern(parsedPattern, match);
209         } else if (type == PATTERN_SUFFIX) {
210             return match.endsWith(pattern);
211         }
212         return false;
213     }
214 
matchGlobPattern(String pattern, String match)215     static boolean matchGlobPattern(String pattern, String match) {
216         final int NP = pattern.length();
217         if (NP <= 0) {
218             return match.length() <= 0;
219         }
220         final int NM = match.length();
221         int ip = 0, im = 0;
222         char nextChar = pattern.charAt(0);
223         while ((ip<NP) && (im<NM)) {
224             char c = nextChar;
225             ip++;
226             nextChar = ip < NP ? pattern.charAt(ip) : 0;
227             final boolean escaped = (c == '\\');
228             if (escaped) {
229                 c = nextChar;
230                 ip++;
231                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
232             }
233             if (nextChar == '*') {
234                 if (!escaped && c == '.') {
235                     if (ip >= (NP-1)) {
236                         // at the end with a pattern match, so
237                         // all is good without checking!
238                         return true;
239                     }
240                     ip++;
241                     nextChar = pattern.charAt(ip);
242                     // Consume everything until the next character in the
243                     // pattern is found.
244                     if (nextChar == '\\') {
245                         ip++;
246                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
247                     }
248                     do {
249                         if (match.charAt(im) == nextChar) {
250                             break;
251                         }
252                         im++;
253                     } while (im < NM);
254                     if (im == NM) {
255                         // Whoops, the next character in the pattern didn't
256                         // exist in the match.
257                         return false;
258                     }
259                     ip++;
260                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
261                     im++;
262                 } else {
263                     // Consume only characters matching the one before '*'.
264                     do {
265                         if (match.charAt(im) != c) {
266                             break;
267                         }
268                         im++;
269                     } while (im < NM);
270                     ip++;
271                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
272                 }
273             } else {
274                 if (c != '.' && match.charAt(im) != c) return false;
275                 im++;
276             }
277         }
278 
279         if (ip >= NP && im >= NM) {
280             // Reached the end of both strings, all is good!
281             return true;
282         }
283 
284         // One last check: we may have finished the match string, but still
285         // have a '.*' at the end of the pattern, which should still count
286         // as a match.
287         if (ip == NP-2 && pattern.charAt(ip) == '.'
288             && pattern.charAt(ip+1) == '*') {
289             return true;
290         }
291 
292         return false;
293     }
294 
295     /**
296      * Parses the advanced pattern and returns an integer array representation of it. The integer
297      * array treats each field as a character if positive and a unique token placeholder if
298      * negative. This method will throw on any pattern structure violations.
299      */
300     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
301         int ip = 0;
302         final int LP = pattern.length();
303 
304         int it = 0;
305 
306         boolean inSet = false;
307         boolean inRange = false;
308         boolean inCharClass = false;
309 
310         boolean addToParsedPattern;
311 
312         while (ip < LP) {
313             if (it > MAX_PATTERN_STORAGE - 3) {
314                 throw new IllegalArgumentException("Pattern is too large!");
315             }
316 
317             char c = pattern.charAt(ip);
318             addToParsedPattern = false;
319 
320             switch (c) {
321                 case '[':
322                     if (inSet) {
323                         addToParsedPattern = true; // treat as literal or char class in set
324                     } else {
325                         if (pattern.charAt(ip + 1) == '^') {
326                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
327                             ip++; // skip over the '^'
328                         } else {
329                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
330                         }
331                         ip++; // move to the next pattern char
332                         inSet = true;
333                         continue;
334                     }
335                     break;
336                 case ']':
337                     if (!inSet) {
338                         addToParsedPattern = true; // treat as literal outside of set
339                     } else {
340                         int parsedToken = sParsedPatternScratch[it - 1];
341                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
342                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
343                             throw new IllegalArgumentException(
344                                     "You must define characters in a set.");
345                         }
346                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
347                         inSet = false;
348                         inCharClass = false;
349                     }
350                     break;
351                 case '{':
352                     if (!inSet) {
353                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
354                             throw new IllegalArgumentException("Modifier must follow a token.");
355                         }
356                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
357                         ip++;
358                         inRange = true;
359                     }
360                     break;
361                 case '}':
362                     if (inRange) { // only terminate the range if we're currently in one
363                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
364                         inRange = false;
365                     }
366                     break;
367                 case '*':
368                     if (!inSet) {
369                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
370                             throw new IllegalArgumentException("Modifier must follow a token.");
371                         }
372                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
373                     }
374                     break;
375                 case '+':
376                     if (!inSet) {
377                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
378                             throw new IllegalArgumentException("Modifier must follow a token.");
379                         }
380                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
381                     }
382                     break;
383                 case '.':
384                     if (!inSet) {
385                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
386                     }
387                     break;
388                 case '\\': // escape
389                     if (ip + 1 >= LP) {
390                         throw new IllegalArgumentException("Escape found at end of pattern!");
391                     }
392                     c = pattern.charAt(++ip);
393                     addToParsedPattern = true;
394                     break;
395                 default:
396                     addToParsedPattern = true;
397                     break;
398             }
399             if (inSet) {
400                 if (inCharClass) {
401                     sParsedPatternScratch[it++] = c;
402                     inCharClass = false;
403                 } else {
404                     // look forward for character class
405                     if (ip + 2 < LP
406                             && pattern.charAt(ip + 1) == '-'
407                             && pattern.charAt(ip + 2) != ']') {
408                         inCharClass = true;
409                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
410                         ip++; // advance past dash
411                     } else { // literal
412                         sParsedPatternScratch[it++] = c; // set first token as literal
413                         sParsedPatternScratch[it++] = c; // set second set as literal
414                     }
415                 }
416             } else if (inRange) {
417                 int endOfSet = pattern.indexOf('}', ip);
418                 if (endOfSet < 0) {
419                     throw new IllegalArgumentException("Range not ended with '}'");
420                 }
421                 String rangeString = pattern.substring(ip, endOfSet);
422                 int commaIndex = rangeString.indexOf(',');
423                 try {
424                     final int rangeMin;
425                     final int rangeMax;
426                     if (commaIndex < 0) {
427                         int parsedRange = Integer.parseInt(rangeString);
428                         rangeMin = rangeMax = parsedRange;
429                     } else {
430                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
431                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
432                             rangeMax = Integer.MAX_VALUE;
433                         } else {
434                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
435                         }
436                     }
437                     if (rangeMin > rangeMax) {
438                         throw new IllegalArgumentException(
439                             "Range quantifier minimum is greater than maximum");
440                     }
441                     sParsedPatternScratch[it++] = rangeMin;
442                     sParsedPatternScratch[it++] = rangeMax;
443                 } catch (NumberFormatException e) {
444                     throw new IllegalArgumentException("Range number format incorrect", e);
445                 }
446                 ip = endOfSet;
447                 continue; // don't increment ip
448             } else if (addToParsedPattern) {
449                 sParsedPatternScratch[it++] = c;
450             }
451             ip++;
452         }
453         if (inSet) {
454             throw new IllegalArgumentException("Set was not terminated!");
455         }
456         return Arrays.copyOf(sParsedPatternScratch, it);
457     }
458 
459     private static boolean isParsedModifier(int parsedChar) {
460         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
461                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
462                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
463                 parsedChar == PARSED_MODIFIER_RANGE_START;
464     }
465 
466     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
467 
468         // create indexes
469         int ip = 0, im = 0;
470 
471         // one-time length check
472         final int LP = parsedPattern.length, LM = match.length();
473 
474         // The current character being analyzed in the pattern
475         int patternChar;
476 
477         int tokenType;
478 
479         int charSetStart = 0, charSetEnd = 0;
480 
481         while (ip < LP) { // we still have content in the pattern
482 
483             patternChar = parsedPattern[ip];
484             // get the match type of the next verb
485 
486             switch (patternChar) {
487                 case PARSED_TOKEN_CHAR_ANY:
488                     tokenType = TOKEN_TYPE_ANY;
489                     ip++;
490                     break;
491                 case PARSED_TOKEN_CHAR_SET_START:
492                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
493                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
494                             ? TOKEN_TYPE_SET
495                             : TOKEN_TYPE_INVERSE_SET;
496                     charSetStart = ip + 1; // start from the char after the set start
497                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
498                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
499                     ip++; // move the pointer to the next pattern entry
500                     break;
501                 default:
502                     charSetStart = ip;
503                     tokenType = TOKEN_TYPE_LITERAL;
504                     ip++;
505                     break;
506             }
507 
508             final int minRepetition;
509             final int maxRepetition;
510 
511             // look for a match length modifier
512             if (ip >= LP) {
513                 minRepetition = maxRepetition = 1;
514             } else {
515                 patternChar = parsedPattern[ip];
516                 switch (patternChar) {
517                     case PARSED_MODIFIER_ZERO_OR_MORE:
518                         minRepetition = 0;
519                         maxRepetition = Integer.MAX_VALUE;
520                         ip++;
521                         break;
522                     case PARSED_MODIFIER_ONE_OR_MORE:
523                         minRepetition = 1;
524                         maxRepetition = Integer.MAX_VALUE;
525                         ip++;
526                         break;
527                     case PARSED_MODIFIER_RANGE_START:
528                         minRepetition = parsedPattern[++ip];
529                         maxRepetition = parsedPattern[++ip];
530                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
531                         break;
532                     default:
533                         minRepetition = maxRepetition = 1; // implied literal
534                         break;
535                 }
536             }
537             if (minRepetition > maxRepetition) {
538                 return false;
539             }
540 
541             // attempt to match as many characters as possible
542             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
543                     parsedPattern, charSetStart, charSetEnd);
544 
545             // if we found a conflict, return false immediately
546             if (matched == NO_MATCH) {
547                 return false;
548             }
549 
550             // move the match pointer the number of characters matched
551             im += matched;
552         }
553         return ip >= LP && im >= LM; // have parsed entire string and regex
554     }
555 
556     private static int matchChars(String match, int im, final int lm, int tokenType,
557             int minRepetition, int maxRepetition, int[] parsedPattern,
558             int tokenStart, int tokenEnd) {
559         int matched = 0;
560 
561         while(matched < maxRepetition
562                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
563                     tokenEnd)) {
564             matched++;
565         }
566 
567         return matched < minRepetition ? NO_MATCH : matched;
568     }
569 
570     private static boolean matchChar(String match, int im, final int lm, int tokenType,
571             int[] parsedPattern, int tokenStart, int tokenEnd) {
572         if (im >= lm) { // we've overrun the string, no match
573             return false;
574         }
575         switch (tokenType) {
576             case TOKEN_TYPE_ANY:
577                 return true;
578             case TOKEN_TYPE_SET:
579                 for (int i = tokenStart; i < tokenEnd; i += 2) {
580                     char matchChar = match.charAt(im);
581                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
582                         return true;
583                     }
584                 }
585                 return false;
586             case TOKEN_TYPE_INVERSE_SET:
587                 for (int i = tokenStart; i < tokenEnd; i += 2) {
588                     char matchChar = match.charAt(im);
589                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
590                         return false;
591                     }
592                 }
593                 return true;
594             case TOKEN_TYPE_LITERAL:
595                 return match.charAt(im) == parsedPattern[tokenStart];
596             default:
597                 return false;
598         }
599     }
600 }