blob: 3feb7701706c7468ea05e60760690772798bc45c [file] [log] [blame]
pkalinnikovea350602016-06-24 11:22:151// Copyright 2016 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// The separator placeholder '^' symbol is used in subpatterns to match any
6// separator character, which is any ASCII symbol except letters, digits, and
7// the following: '_', '-', '.', '%'. Note that the separator placeholder
pkalinnikov854818d62016-07-22 11:55:108// character '^' is itself a separator, as well as '\0'.
Karan Bhatia1d1eaed22019-02-20 21:07:179//
10// In addition, a separator placeholder at the end of the pattern can be matched
11// by the end of |text|. This should be handled by the clients using the
12// following utility functions.
pkalinnikovea350602016-06-24 11:22:1513//
14// We define a fuzzy occurrence as an occurrence of a |subpattern| in |text|
15// such that all its non-placeholder characters are equal to the corresponding
16// characters of the |text|, whereas each '^' placeholder can correspond to any
17// type of separator in |text|.
18
Pavel Kalinnikovd7970632017-06-20 09:07:3419#ifndef COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_
20#define COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_
pkalinnikovea350602016-06-24 11:22:1521
22#include <stddef.h>
23
pkalinnikovea350602016-06-24 11:22:1524#include "base/strings/string_piece.h"
pkalinnikovea350602016-06-24 11:22:1525
Pavel Kalinnikovd7970632017-06-20 09:07:3426namespace url_pattern_index {
pkalinnikovea350602016-06-24 11:22:1527
28constexpr char kSeparatorPlaceholder = '^';
29
30inline bool IsAscii(char c) {
31 return !(c & ~0x7F);
32}
33
34inline bool IsAlphaNumericAscii(char c) {
35 if (c <= '9')
36 return c >= '0';
37 c |= 0x20; // Puts all alphabetics (and only them) into the 'a'-'z' range.
38 return c >= 'a' && c <= 'z';
39}
40
41// Returns whether |c| is a separator.
42inline bool IsSeparator(char c) {
43 switch (c) {
44 case '_':
45 case '-':
46 case '.':
47 case '%':
48 return false;
49 case kSeparatorPlaceholder:
50 return true;
51 default:
52 return !IsAlphaNumericAscii(c) && IsAscii(c);
53 }
54}
55
pkalinnikovea350602016-06-24 11:22:1556// Returns whether |text| starts with a fuzzy occurrence of |subpattern|.
57bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
58
59// Returns whether |text| ends with a fuzzy occurrence of |subpattern|.
60bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
61
pkalinnikov35d18812017-04-05 17:28:1862// Returns the position of the leftmost fuzzy occurrence of a |subpattern| in
63// the |text| starting no earlier than |from| the specified position.
64size_t FindFuzzy(base::StringPiece text,
65 base::StringPiece subpattern,
66 size_t from = 0);
pkalinnikovea350602016-06-24 11:22:1567
Pavel Kalinnikovd7970632017-06-20 09:07:3468} // namespace url_pattern_index
pkalinnikovea350602016-06-24 11:22:1569
Pavel Kalinnikovd7970632017-06-20 09:07:3470#endif // COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_