1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.os; 18 19 import android.util.Log; 20 import android.util.proto.ProtoOutputStream; 21 22 import java.util.Arrays; 23 24 /** 25 * A simple pattern matcher, which is safe to use on untrusted data: it does 26 * not provide full reg-exp support, only simple globbing that can not be 27 * used maliciously. 28 */ 29 public class PatternMatcher implements Parcelable { 30 /** 31 * Pattern type: the given pattern must exactly match the string it is 32 * tested against. 33 */ 34 public static final int PATTERN_LITERAL = 0; 35 36 /** 37 * Pattern type: the given pattern must match the 38 * beginning of the string it is tested against. 39 */ 40 public static final int PATTERN_PREFIX = 1; 41 42 /** 43 * Pattern type: the given pattern is interpreted with a 44 * simple glob syntax for matching against the string it is tested against. 45 * In this syntax, you can use the '*' character to match against zero or 46 * more occurrences of the character immediately before. If the 47 * character before it is '.' it will match any character. The character 48 * '\' can be used as an escape. This essentially provides only the '*' 49 * wildcard part of a normal regexp. 50 */ 51 public static final int PATTERN_SIMPLE_GLOB = 2; 52 53 /** 54 * Pattern type: the given pattern is interpreted with a regular 55 * expression-like syntax for matching against the string it is tested 56 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) 57 * with full support for character ranges and the not ({@code ^}) modifier. 58 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) 59 * for one-or-more and full range ({@code {...}}) support. This is a simple 60 * evaluation implementation in which matching is done against the pattern in 61 * real time with no backtracking support. 62 */ 63 public static final int PATTERN_ADVANCED_GLOB = 3; 64 65 /** 66 * Pattern type: the given pattern must match the 67 * end of the string it is tested against. 68 */ 69 public static final int PATTERN_SUFFIX = 4; 70 71 // token types for advanced matching 72 private static final int TOKEN_TYPE_LITERAL = 0; 73 private static final int TOKEN_TYPE_ANY = 1; 74 private static final int TOKEN_TYPE_SET = 2; 75 private static final int TOKEN_TYPE_INVERSE_SET = 3; 76 77 // Return for no match 78 private static final int NO_MATCH = -1; 79 80 private static final String TAG = "PatternMatcher"; 81 82 // Parsed placeholders for advanced patterns 83 private static final int PARSED_TOKEN_CHAR_SET_START = -1; 84 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; 85 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; 86 private static final int PARSED_TOKEN_CHAR_ANY = -4; 87 private static final int PARSED_MODIFIER_RANGE_START = -5; 88 private static final int PARSED_MODIFIER_RANGE_STOP = -6; 89 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; 90 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; 91 92 private final String mPattern; 93 private final int mType; 94 private final int[] mParsedPattern; 95 96 97 private static final int MAX_PATTERN_STORAGE = 2048; 98 // workspace to use for building a parsed advanced pattern; 99 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; 100 PatternMatcher(String pattern, int type)101 public PatternMatcher(String pattern, int type) { 102 mPattern = pattern; 103 mType = type; 104 if (mType == PATTERN_ADVANCED_GLOB) { 105 mParsedPattern = parseAndVerifyAdvancedPattern(pattern); 106 } else { 107 mParsedPattern = null; 108 } 109 } 110 getPath()111 public final String getPath() { 112 return mPattern; 113 } 114 getType()115 public final int getType() { 116 return mType; 117 } 118 match(String str)119 public boolean match(String str) { 120 return matchPattern(str, mPattern, mParsedPattern, mType); 121 } 122 toString()123 public String toString() { 124 String type = "? "; 125 switch (mType) { 126 case PATTERN_LITERAL: 127 type = "LITERAL: "; 128 break; 129 case PATTERN_PREFIX: 130 type = "PREFIX: "; 131 break; 132 case PATTERN_SIMPLE_GLOB: 133 type = "GLOB: "; 134 break; 135 case PATTERN_ADVANCED_GLOB: 136 type = "ADVANCED: "; 137 break; 138 case PATTERN_SUFFIX: 139 type = "SUFFIX: "; 140 break; 141 } 142 return "PatternMatcher{" + type + mPattern + "}"; 143 } 144 145 /** @hide */ dumpDebug(ProtoOutputStream proto, long fieldId)146 public void dumpDebug(ProtoOutputStream proto, long fieldId) { 147 long token = proto.start(fieldId); 148 proto.write(PatternMatcherProto.PATTERN, mPattern); 149 proto.write(PatternMatcherProto.TYPE, mType); 150 // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to 151 // match the current data structure. 152 proto.end(token); 153 } 154 155 /** 156 * Perform a check on the matcher for the pattern type of {@link #PATTERN_ADVANCED_GLOB}. 157 * Return true if it passed. 158 * @hide 159 */ check()160 public boolean check() { 161 try { 162 if (mType == PATTERN_ADVANCED_GLOB) { 163 return Arrays.equals(mParsedPattern, parseAndVerifyAdvancedPattern(mPattern)); 164 } 165 } catch (IllegalArgumentException e) { 166 Log.w(TAG, "Failed to verify advanced pattern: " + e.getMessage()); 167 return false; 168 } 169 return true; 170 } 171 describeContents()172 public int describeContents() { 173 return 0; 174 } 175 writeToParcel(Parcel dest, int flags)176 public void writeToParcel(Parcel dest, int flags) { 177 dest.writeString(mPattern); 178 dest.writeInt(mType); 179 dest.writeIntArray(mParsedPattern); 180 } 181 PatternMatcher(Parcel src)182 public PatternMatcher(Parcel src) { 183 mPattern = src.readString(); 184 mType = src.readInt(); 185 mParsedPattern = src.createIntArray(); 186 } 187 188 public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR 189 = new Parcelable.Creator<PatternMatcher>() { 190 public PatternMatcher createFromParcel(Parcel source) { 191 return new PatternMatcher(source); 192 } 193 194 public PatternMatcher[] newArray(int size) { 195 return new PatternMatcher[size]; 196 } 197 }; 198 matchPattern(String match, String pattern, int[] parsedPattern, int type)199 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { 200 if (match == null) return false; 201 if (type == PATTERN_LITERAL) { 202 return pattern.equals(match); 203 } if (type == PATTERN_PREFIX) { 204 return match.startsWith(pattern); 205 } else if (type == PATTERN_SIMPLE_GLOB) { 206 return matchGlobPattern(pattern, match); 207 } else if (type == PATTERN_ADVANCED_GLOB) { 208 return matchAdvancedPattern(parsedPattern, match); 209 } else if (type == PATTERN_SUFFIX) { 210 return match.endsWith(pattern); 211 } 212 return false; 213 } 214 matchGlobPattern(String pattern, String match)215 static boolean matchGlobPattern(String pattern, String match) { 216 final int NP = pattern.length(); 217 if (NP <= 0) { 218 return match.length() <= 0; 219 } 220 final int NM = match.length(); 221 int ip = 0, im = 0; 222 char nextChar = pattern.charAt(0); 223 while ((ip<NP) && (im<NM)) { 224 char c = nextChar; 225 ip++; 226 nextChar = ip < NP ? pattern.charAt(ip) : 0; 227 final boolean escaped = (c == '\\'); 228 if (escaped) { 229 c = nextChar; 230 ip++; 231 nextChar = ip < NP ? pattern.charAt(ip) : 0; 232 } 233 if (nextChar == '*') { 234 if (!escaped && c == '.') { 235 if (ip >= (NP-1)) { 236 // at the end with a pattern match, so 237 // all is good without checking! 238 return true; 239 } 240 ip++; 241 nextChar = pattern.charAt(ip); 242 // Consume everything until the next character in the 243 // pattern is found. 244 if (nextChar == '\\') { 245 ip++; 246 nextChar = ip < NP ? pattern.charAt(ip) : 0; 247 } 248 do { 249 if (match.charAt(im) == nextChar) { 250 break; 251 } 252 im++; 253 } while (im < NM); 254 if (im == NM) { 255 // Whoops, the next character in the pattern didn't 256 // exist in the match. 257 return false; 258 } 259 ip++; 260 nextChar = ip < NP ? pattern.charAt(ip) : 0; 261 im++; 262 } else { 263 // Consume only characters matching the one before '*'. 264 do { 265 if (match.charAt(im) != c) { 266 break; 267 } 268 im++; 269 } while (im < NM); 270 ip++; 271 nextChar = ip < NP ? pattern.charAt(ip) : 0; 272 } 273 } else { 274 if (c != '.' && match.charAt(im) != c) return false; 275 im++; 276 } 277 } 278 279 if (ip >= NP && im >= NM) { 280 // Reached the end of both strings, all is good! 281 return true; 282 } 283 284 // One last check: we may have finished the match string, but still 285 // have a '.*' at the end of the pattern, which should still count 286 // as a match. 287 if (ip == NP-2 && pattern.charAt(ip) == '.' 288 && pattern.charAt(ip+1) == '*') { 289 return true; 290 } 291 292 return false; 293 } 294 295 /** 296 * Parses the advanced pattern and returns an integer array representation of it. The integer 297 * array treats each field as a character if positive and a unique token placeholder if 298 * negative. This method will throw on any pattern structure violations. 299 */ 300 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { 301 int ip = 0; 302 final int LP = pattern.length(); 303 304 int it = 0; 305 306 boolean inSet = false; 307 boolean inRange = false; 308 boolean inCharClass = false; 309 310 boolean addToParsedPattern; 311 312 while (ip < LP) { 313 if (it > MAX_PATTERN_STORAGE - 3) { 314 throw new IllegalArgumentException("Pattern is too large!"); 315 } 316 317 char c = pattern.charAt(ip); 318 addToParsedPattern = false; 319 320 switch (c) { 321 case '[': 322 if (inSet) { 323 addToParsedPattern = true; // treat as literal or char class in set 324 } else { 325 if (pattern.charAt(ip + 1) == '^') { 326 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; 327 ip++; // skip over the '^' 328 } else { 329 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; 330 } 331 ip++; // move to the next pattern char 332 inSet = true; 333 continue; 334 } 335 break; 336 case ']': 337 if (!inSet) { 338 addToParsedPattern = true; // treat as literal outside of set 339 } else { 340 int parsedToken = sParsedPatternScratch[it - 1]; 341 if (parsedToken == PARSED_TOKEN_CHAR_SET_START || 342 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { 343 throw new IllegalArgumentException( 344 "You must define characters in a set."); 345 } 346 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; 347 inSet = false; 348 inCharClass = false; 349 } 350 break; 351 case '{': 352 if (!inSet) { 353 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 354 throw new IllegalArgumentException("Modifier must follow a token."); 355 } 356 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; 357 ip++; 358 inRange = true; 359 } 360 break; 361 case '}': 362 if (inRange) { // only terminate the range if we're currently in one 363 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; 364 inRange = false; 365 } 366 break; 367 case '*': 368 if (!inSet) { 369 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 370 throw new IllegalArgumentException("Modifier must follow a token."); 371 } 372 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; 373 } 374 break; 375 case '+': 376 if (!inSet) { 377 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 378 throw new IllegalArgumentException("Modifier must follow a token."); 379 } 380 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; 381 } 382 break; 383 case '.': 384 if (!inSet) { 385 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; 386 } 387 break; 388 case '\\': // escape 389 if (ip + 1 >= LP) { 390 throw new IllegalArgumentException("Escape found at end of pattern!"); 391 } 392 c = pattern.charAt(++ip); 393 addToParsedPattern = true; 394 break; 395 default: 396 addToParsedPattern = true; 397 break; 398 } 399 if (inSet) { 400 if (inCharClass) { 401 sParsedPatternScratch[it++] = c; 402 inCharClass = false; 403 } else { 404 // look forward for character class 405 if (ip + 2 < LP 406 && pattern.charAt(ip + 1) == '-' 407 && pattern.charAt(ip + 2) != ']') { 408 inCharClass = true; 409 sParsedPatternScratch[it++] = c; // set first token as lower end of range 410 ip++; // advance past dash 411 } else { // literal 412 sParsedPatternScratch[it++] = c; // set first token as literal 413 sParsedPatternScratch[it++] = c; // set second set as literal 414 } 415 } 416 } else if (inRange) { 417 int endOfSet = pattern.indexOf('}', ip); 418 if (endOfSet < 0) { 419 throw new IllegalArgumentException("Range not ended with '}'"); 420 } 421 String rangeString = pattern.substring(ip, endOfSet); 422 int commaIndex = rangeString.indexOf(','); 423 try { 424 final int rangeMin; 425 final int rangeMax; 426 if (commaIndex < 0) { 427 int parsedRange = Integer.parseInt(rangeString); 428 rangeMin = rangeMax = parsedRange; 429 } else { 430 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); 431 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) 432 rangeMax = Integer.MAX_VALUE; 433 } else { 434 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); 435 } 436 } 437 if (rangeMin > rangeMax) { 438 throw new IllegalArgumentException( 439 "Range quantifier minimum is greater than maximum"); 440 } 441 sParsedPatternScratch[it++] = rangeMin; 442 sParsedPatternScratch[it++] = rangeMax; 443 } catch (NumberFormatException e) { 444 throw new IllegalArgumentException("Range number format incorrect", e); 445 } 446 ip = endOfSet; 447 continue; // don't increment ip 448 } else if (addToParsedPattern) { 449 sParsedPatternScratch[it++] = c; 450 } 451 ip++; 452 } 453 if (inSet) { 454 throw new IllegalArgumentException("Set was not terminated!"); 455 } 456 return Arrays.copyOf(sParsedPatternScratch, it); 457 } 458 459 private static boolean isParsedModifier(int parsedChar) { 460 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || 461 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || 462 parsedChar == PARSED_MODIFIER_RANGE_STOP || 463 parsedChar == PARSED_MODIFIER_RANGE_START; 464 } 465 466 static boolean matchAdvancedPattern(int[] parsedPattern, String match) { 467 468 // create indexes 469 int ip = 0, im = 0; 470 471 // one-time length check 472 final int LP = parsedPattern.length, LM = match.length(); 473 474 // The current character being analyzed in the pattern 475 int patternChar; 476 477 int tokenType; 478 479 int charSetStart = 0, charSetEnd = 0; 480 481 while (ip < LP) { // we still have content in the pattern 482 483 patternChar = parsedPattern[ip]; 484 // get the match type of the next verb 485 486 switch (patternChar) { 487 case PARSED_TOKEN_CHAR_ANY: 488 tokenType = TOKEN_TYPE_ANY; 489 ip++; 490 break; 491 case PARSED_TOKEN_CHAR_SET_START: 492 case PARSED_TOKEN_CHAR_SET_INVERSE_START: 493 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START 494 ? TOKEN_TYPE_SET 495 : TOKEN_TYPE_INVERSE_SET; 496 charSetStart = ip + 1; // start from the char after the set start 497 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); 498 charSetEnd = ip - 1; // we're on the set stop, end is the previous 499 ip++; // move the pointer to the next pattern entry 500 break; 501 default: 502 charSetStart = ip; 503 tokenType = TOKEN_TYPE_LITERAL; 504 ip++; 505 break; 506 } 507 508 final int minRepetition; 509 final int maxRepetition; 510 511 // look for a match length modifier 512 if (ip >= LP) { 513 minRepetition = maxRepetition = 1; 514 } else { 515 patternChar = parsedPattern[ip]; 516 switch (patternChar) { 517 case PARSED_MODIFIER_ZERO_OR_MORE: 518 minRepetition = 0; 519 maxRepetition = Integer.MAX_VALUE; 520 ip++; 521 break; 522 case PARSED_MODIFIER_ONE_OR_MORE: 523 minRepetition = 1; 524 maxRepetition = Integer.MAX_VALUE; 525 ip++; 526 break; 527 case PARSED_MODIFIER_RANGE_START: 528 minRepetition = parsedPattern[++ip]; 529 maxRepetition = parsedPattern[++ip]; 530 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token 531 break; 532 default: 533 minRepetition = maxRepetition = 1; // implied literal 534 break; 535 } 536 } 537 if (minRepetition > maxRepetition) { 538 return false; 539 } 540 541 // attempt to match as many characters as possible 542 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, 543 parsedPattern, charSetStart, charSetEnd); 544 545 // if we found a conflict, return false immediately 546 if (matched == NO_MATCH) { 547 return false; 548 } 549 550 // move the match pointer the number of characters matched 551 im += matched; 552 } 553 return ip >= LP && im >= LM; // have parsed entire string and regex 554 } 555 556 private static int matchChars(String match, int im, final int lm, int tokenType, 557 int minRepetition, int maxRepetition, int[] parsedPattern, 558 int tokenStart, int tokenEnd) { 559 int matched = 0; 560 561 while(matched < maxRepetition 562 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, 563 tokenEnd)) { 564 matched++; 565 } 566 567 return matched < minRepetition ? NO_MATCH : matched; 568 } 569 570 private static boolean matchChar(String match, int im, final int lm, int tokenType, 571 int[] parsedPattern, int tokenStart, int tokenEnd) { 572 if (im >= lm) { // we've overrun the string, no match 573 return false; 574 } 575 switch (tokenType) { 576 case TOKEN_TYPE_ANY: 577 return true; 578 case TOKEN_TYPE_SET: 579 for (int i = tokenStart; i < tokenEnd; i += 2) { 580 char matchChar = match.charAt(im); 581 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 582 return true; 583 } 584 } 585 return false; 586 case TOKEN_TYPE_INVERSE_SET: 587 for (int i = tokenStart; i < tokenEnd; i += 2) { 588 char matchChar = match.charAt(im); 589 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 590 return false; 591 } 592 } 593 return true; 594 case TOKEN_TYPE_LITERAL: 595 return match.charAt(im) == parsedPattern[tokenStart]; 596 default: 597 return false; 598 } 599 } 600 }