pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 1 | // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 5 | #ifndef COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_ |
| 6 | #define COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_ |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 7 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 8 | #include <iosfwd> |
| 9 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 10 | #include "base/macros.h" |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 11 | #include "base/strings/string_piece.h" |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 12 | #include "components/url_pattern_index/proto/rules.pb.h" |
Anton Bikineev | 1156b5f | 2021-05-15 22:35:36 | [diff] [blame] | 13 | #include "third_party/abseil-cpp/absl/types/optional.h" |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 14 | #include "url/third_party/mozilla/url_parse.h" |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 15 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 16 | class GURL; |
| 17 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 18 | namespace url_pattern_index { |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 19 | |
| 20 | namespace flat { |
| 21 | struct UrlRule; // The FlatBuffers version of UrlRule. |
| 22 | } |
| 23 | |
| 24 | // The structure used to mirror a URL pattern regardless of the representation |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 25 | // of the UrlRule that owns it, and to match it against URLs. |
| 26 | class UrlPattern { |
| 27 | public: |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 28 | enum class MatchCase { |
| 29 | kTrue, |
| 30 | kFalse, |
| 31 | }; |
| 32 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 33 | // A wrapper over a GURL to reduce redundant computation. |
| 34 | class UrlInfo { |
| 35 | public: |
| 36 | // The |url| must outlive this instance. |
| 37 | UrlInfo(const GURL& url); |
| 38 | ~UrlInfo(); |
| 39 | |
| 40 | base::StringPiece spec() const { return spec_; } |
Karan Bhatia | e177fb6 | 2018-09-14 00:57:30 | [diff] [blame] | 41 | base::StringPiece GetLowerCaseSpec() const; |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 42 | url::Component host() const { return host_; } |
| 43 | |
| 44 | private: |
| 45 | // The url spec. |
| 46 | const base::StringPiece spec_; |
Karan Bhatia | e177fb6 | 2018-09-14 00:57:30 | [diff] [blame] | 47 | // String to hold the lazily computed lower cased spec. |
| 48 | mutable std::string lower_case_spec_owner_; |
| 49 | // Reference to the lower case spec. Computed lazily. |
Anton Bikineev | 1156b5f | 2021-05-15 22:35:36 | [diff] [blame] | 50 | mutable absl::optional<base::StringPiece> lower_case_spec_cached_; |
Karan Bhatia | e177fb6 | 2018-09-14 00:57:30 | [diff] [blame] | 51 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 52 | // The url host component. |
| 53 | const url::Component host_; |
| 54 | |
| 55 | DISALLOW_COPY_AND_ASSIGN(UrlInfo); |
| 56 | }; |
| 57 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 58 | UrlPattern(); |
| 59 | |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 60 | // Creates a |url_pattern| of a certain |type| and case-sensitivity. |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 61 | UrlPattern(base::StringPiece url_pattern, |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 62 | proto::UrlPatternType type = proto::URL_PATTERN_TYPE_WILDCARDED, |
| 63 | MatchCase match_case = MatchCase::kFalse); |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 64 | |
| 65 | // Creates a WILDCARDED |url_pattern| with the specified anchors. |
| 66 | UrlPattern(base::StringPiece url_pattern, |
| 67 | proto::AnchorType anchor_left, |
| 68 | proto::AnchorType anchor_right); |
| 69 | |
Karan Bhatia | 01d5917 | 2018-09-05 16:57:40 | [diff] [blame] | 70 | // The passed in |rule| must outlive the created instance. |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 71 | explicit UrlPattern(const flat::UrlRule& rule); |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 72 | |
| 73 | ~UrlPattern(); |
| 74 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 75 | proto::UrlPatternType type() const { return type_; } |
| 76 | base::StringPiece url_pattern() const { return url_pattern_; } |
| 77 | proto::AnchorType anchor_left() const { return anchor_left_; } |
| 78 | proto::AnchorType anchor_right() const { return anchor_right_; } |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 79 | bool match_case() const { return match_case_ == MatchCase::kTrue; } |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 80 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 81 | // Returns whether the |url| matches the URL |pattern|. Requires the type of |
| 82 | // |this| pattern to be either SUBSTRING or WILDCARDED. |
| 83 | // |
| 84 | // Splits the pattern into subpatterns separated by '*' wildcards, and |
| 85 | // greedily finds each of them in the spec of the |url|. Respects anchors at |
| 86 | // either end of the pattern, and '^' separator placeholders when comparing a |
| 87 | // subpattern to a subtring of the spec. |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 88 | bool MatchesUrl(const UrlInfo& url) const; |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 89 | |
| 90 | private: |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 91 | // TODO(pkalinnikov): Store flat:: types instead of proto::, in order to avoid |
| 92 | // conversions in IndexedRuleset. |
| 93 | proto::UrlPatternType type_ = proto::URL_PATTERN_TYPE_UNSPECIFIED; |
| 94 | base::StringPiece url_pattern_; |
| 95 | |
| 96 | proto::AnchorType anchor_left_ = proto::ANCHOR_TYPE_NONE; |
| 97 | proto::AnchorType anchor_right_ = proto::ANCHOR_TYPE_NONE; |
| 98 | |
Charlie Harrison | 8d71f6f | 2018-09-14 14:43:26 | [diff] [blame] | 99 | MatchCase match_case_ = MatchCase::kTrue; |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 100 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 101 | DISALLOW_COPY_AND_ASSIGN(UrlPattern); |
| 102 | }; |
| 103 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 104 | // Allow pretty-printing URLPatterns when they are used in GTest assertions. |
| 105 | std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern); |
| 106 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 107 | } // namespace url_pattern_index |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 108 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 109 | #endif // COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_ |