pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 1 | // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // The separator placeholder '^' symbol is used in subpatterns to match any |
| 6 | // separator character, which is any ASCII symbol except letters, digits, and |
| 7 | // the following: '_', '-', '.', '%'. Note that the separator placeholder |
pkalinnikov | 854818d6 | 2016-07-22 11:55:10 | [diff] [blame] | 8 | // character '^' is itself a separator, as well as '\0'. |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 9 | // |
| 10 | // In addition, a separator placeholder at the end of the pattern can be matched |
| 11 | // by the end of |text|. This should be handled by the clients using the |
| 12 | // following utility functions. |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 13 | // |
| 14 | // We define a fuzzy occurrence as an occurrence of a |subpattern| in |text| |
| 15 | // such that all its non-placeholder characters are equal to the corresponding |
| 16 | // characters of the |text|, whereas each '^' placeholder can correspond to any |
| 17 | // type of separator in |text|. |
| 18 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 19 | #ifndef COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_ |
| 20 | #define COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_ |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 21 | |
| 22 | #include <stddef.h> |
| 23 | |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 24 | #include "base/strings/string_piece.h" |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 25 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 26 | namespace url_pattern_index { |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 27 | |
| 28 | constexpr char kSeparatorPlaceholder = '^'; |
| 29 | |
| 30 | inline bool IsAscii(char c) { |
| 31 | return !(c & ~0x7F); |
| 32 | } |
| 33 | |
| 34 | inline bool IsAlphaNumericAscii(char c) { |
| 35 | if (c <= '9') |
| 36 | return c >= '0'; |
| 37 | c |= 0x20; // Puts all alphabetics (and only them) into the 'a'-'z' range. |
| 38 | return c >= 'a' && c <= 'z'; |
| 39 | } |
| 40 | |
| 41 | // Returns whether |c| is a separator. |
| 42 | inline bool IsSeparator(char c) { |
| 43 | switch (c) { |
| 44 | case '_': |
| 45 | case '-': |
| 46 | case '.': |
| 47 | case '%': |
| 48 | return false; |
| 49 | case kSeparatorPlaceholder: |
| 50 | return true; |
| 51 | default: |
| 52 | return !IsAlphaNumericAscii(c) && IsAscii(c); |
| 53 | } |
| 54 | } |
| 55 | |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 56 | // Returns whether |text| starts with a fuzzy occurrence of |subpattern|. |
| 57 | bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); |
| 58 | |
| 59 | // Returns whether |text| ends with a fuzzy occurrence of |subpattern|. |
| 60 | bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); |
| 61 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 62 | // Returns the position of the leftmost fuzzy occurrence of a |subpattern| in |
| 63 | // the |text| starting no earlier than |from| the specified position. |
| 64 | size_t FindFuzzy(base::StringPiece text, |
| 65 | base::StringPiece subpattern, |
| 66 | size_t from = 0); |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 67 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 68 | } // namespace url_pattern_index |
pkalinnikov | ea35060 | 2016-06-24 11:22:15 | [diff] [blame] | 69 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 70 | #endif // COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_ |