pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 1 | // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 5 | // The matching logic distinguishes between the terms URL pattern and |
| 6 | // subpattern. A URL pattern usually stands for the full thing, e.g. |
| 7 | // "example.com^*path*par=val^", whereas subpattern denotes a maximal substring |
| 8 | // of a pattern not containing the wildcard '*' character. For the example above |
| 9 | // the subpatterns are: "example.com^", "path" and "par=val^". |
| 10 | // |
| 11 | // The separator placeholder '^' symbol is used in subpatterns to match any |
| 12 | // separator character, which is any ASCII symbol except letters, digits, and |
| 13 | // the following: '_', '-', '.', '%'. Note that the separator placeholder |
| 14 | // character '^' is itself a separator, as well as '\0'. |
| 15 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 16 | #include "components/url_pattern_index/url_pattern.h" |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 17 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 18 | #include <stddef.h> |
| 19 | |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 20 | #include <algorithm> |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 21 | #include <ostream> |
| 22 | |
Hans Wennborg | df87046c | 2020-04-28 11:06:24 | [diff] [blame] | 23 | #include "base/check_op.h" |
| 24 | #include "base/notreached.h" |
Charles Harrison | 78ff41bf | 2018-02-08 20:21:11 | [diff] [blame] | 25 | #include "base/numerics/checked_math.h" |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 26 | #include "base/strings/string_util.h" |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 27 | #include "components/url_pattern_index/flat/url_pattern_index_generated.h" |
| 28 | #include "components/url_pattern_index/fuzzy_pattern_matching.h" |
| 29 | #include "components/url_pattern_index/string_splitter.h" |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 30 | #include "url/gurl.h" |
| 31 | #include "url/third_party/mozilla/url_parse.h" |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 32 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 33 | namespace url_pattern_index { |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 34 | |
| 35 | namespace { |
| 36 | |
Karandeep Bhatia | cf2b1a0 | 2019-02-25 23:09:31 | [diff] [blame] | 37 | constexpr char kWildcard = '*'; |
| 38 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 39 | class IsWildcard { |
| 40 | public: |
Karandeep Bhatia | cf2b1a0 | 2019-02-25 23:09:31 | [diff] [blame] | 41 | bool operator()(char c) const { return c == kWildcard; } |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 42 | }; |
| 43 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 44 | proto::UrlPatternType ConvertUrlPatternType(flat::UrlPatternType type) { |
| 45 | switch (type) { |
| 46 | case flat::UrlPatternType_SUBSTRING: |
| 47 | return proto::URL_PATTERN_TYPE_SUBSTRING; |
| 48 | case flat::UrlPatternType_WILDCARDED: |
| 49 | return proto::URL_PATTERN_TYPE_WILDCARDED; |
| 50 | case flat::UrlPatternType_REGEXP: |
| 51 | return proto::URL_PATTERN_TYPE_REGEXP; |
| 52 | default: |
| 53 | return proto::URL_PATTERN_TYPE_UNSPECIFIED; |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | proto::AnchorType ConvertAnchorType(flat::AnchorType type) { |
| 58 | switch (type) { |
| 59 | case flat::AnchorType_NONE: |
| 60 | return proto::ANCHOR_TYPE_NONE; |
| 61 | case flat::AnchorType_BOUNDARY: |
| 62 | return proto::ANCHOR_TYPE_BOUNDARY; |
| 63 | case flat::AnchorType_SUBDOMAIN: |
| 64 | return proto::ANCHOR_TYPE_SUBDOMAIN; |
| 65 | default: |
| 66 | return proto::ANCHOR_TYPE_UNSPECIFIED; |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | base::StringPiece ConvertString(const flatbuffers::String* string) { |
| 71 | return string ? base::StringPiece(string->data(), string->size()) |
| 72 | : base::StringPiece(); |
| 73 | } |
| 74 | |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 75 | bool HasAnyUpperAscii(base::StringPiece string) { |
| 76 | return std::any_of(string.begin(), string.end(), base::IsAsciiUpper<char>); |
| 77 | } |
| 78 | |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 79 | // Returns whether |position| within the |url| belongs to its |host| component |
| 80 | // and corresponds to the beginning of a (sub-)domain. |
| 81 | inline bool IsSubdomainAnchored(base::StringPiece url, |
| 82 | url::Component host, |
| 83 | size_t position) { |
| 84 | DCHECK_LE(position, url.size()); |
| 85 | const size_t host_begin = static_cast<size_t>(host.begin); |
| 86 | const size_t host_end = static_cast<size_t>(host.end()); |
| 87 | DCHECK_LE(host_end, url.size()); |
| 88 | |
| 89 | return position == host_begin || |
| 90 | (position > host_begin && position <= host_end && |
| 91 | url[position - 1] == '.'); |
| 92 | } |
| 93 | |
| 94 | // Returns the position of the leftmost occurrence of a |subpattern| in the |
| 95 | // |text| starting no earlier than |from| the specified position. If the |
| 96 | // |subpattern| has separator placeholders, searches for a fuzzy occurrence. |
| 97 | size_t FindSubpattern(base::StringPiece text, |
| 98 | base::StringPiece subpattern, |
| 99 | size_t from = 0) { |
| 100 | const bool is_fuzzy = |
| 101 | (subpattern.find(kSeparatorPlaceholder) != base::StringPiece::npos); |
| 102 | return is_fuzzy ? FindFuzzy(text, subpattern, from) |
| 103 | : text.find(subpattern, from); |
| 104 | } |
| 105 | |
| 106 | // Same as FindSubpattern(url, subpattern), but searches for an occurrence that |
| 107 | // starts at the beginning of a (sub-)domain within the url's |host| component. |
| 108 | size_t FindSubdomainAnchoredSubpattern(base::StringPiece url, |
| 109 | url::Component host, |
| 110 | base::StringPiece subpattern) { |
| 111 | const bool is_fuzzy = |
| 112 | (subpattern.find(kSeparatorPlaceholder) != base::StringPiece::npos); |
| 113 | |
Charles Harrison | 78ff41bf | 2018-02-08 20:21:11 | [diff] [blame] | 114 | // Any match found after the end of the host will be discarded, so just |
| 115 | // avoid searching there for the subpattern to begin with. |
| 116 | // |
| 117 | // Check for overflow. |
| 118 | size_t max_match_end = 0; |
| 119 | if (!base::CheckAdd(host.end(), subpattern.length()) |
| 120 | .AssignIfValid(&max_match_end)) { |
| 121 | return base::StringPiece::npos; |
| 122 | } |
| 123 | const base::StringPiece url_match_candidate = url.substr(0, max_match_end); |
| 124 | const base::StringPiece url_host = url.substr(0, host.end()); |
| 125 | |
| 126 | for (size_t position = static_cast<size_t>(host.begin); |
| 127 | position <= static_cast<size_t>(host.end()); ++position) { |
| 128 | // Enforce as a loop precondition that we are always anchored at a |
| 129 | // sub-domain before calling find. This is to reduce the number of potential |
| 130 | // searches for |subpattern|. |
| 131 | DCHECK(IsSubdomainAnchored(url, host, position)); |
| 132 | |
| 133 | position = is_fuzzy ? FindFuzzy(url_match_candidate, subpattern, position) |
| 134 | : url_match_candidate.find(subpattern, position); |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 135 | if (position == base::StringPiece::npos || |
| 136 | IsSubdomainAnchored(url, host, position)) { |
| 137 | return position; |
| 138 | } |
Charles Harrison | 78ff41bf | 2018-02-08 20:21:11 | [diff] [blame] | 139 | |
| 140 | // Enforce the loop precondition. This skips |position| to the next '.', |
| 141 | // within the host, which the loop itself increments to the anchored |
| 142 | // sub-domain. |
| 143 | position = url_host.find('.', position); |
| 144 | if (position == base::StringPiece::npos) |
| 145 | break; |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 146 | } |
| 147 | return base::StringPiece::npos; |
| 148 | } |
| 149 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 150 | // Helper for DoesTextMatchLastSubpattern. Treats kSeparatorPlaceholder as not |
| 151 | // matching the end of the text. |
| 152 | bool DoesTextMatchLastSubpatternInternal(proto::AnchorType anchor_left, |
| 153 | proto::AnchorType anchor_right, |
| 154 | base::StringPiece text, |
| 155 | url::Component url_host, |
| 156 | base::StringPiece subpattern) { |
| 157 | // Enumerate all possible combinations of |anchor_left| and |anchor_right|. |
| 158 | if (anchor_left == proto::ANCHOR_TYPE_NONE && |
| 159 | anchor_right == proto::ANCHOR_TYPE_NONE) { |
| 160 | return FindSubpattern(text, subpattern) != base::StringPiece::npos; |
| 161 | } |
| 162 | |
| 163 | if (anchor_left == proto::ANCHOR_TYPE_NONE && |
| 164 | anchor_right == proto::ANCHOR_TYPE_BOUNDARY) { |
| 165 | return EndsWithFuzzy(text, subpattern); |
| 166 | } |
| 167 | |
| 168 | if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY && |
| 169 | anchor_right == proto::ANCHOR_TYPE_NONE) { |
| 170 | return StartsWithFuzzy(text, subpattern); |
| 171 | } |
| 172 | |
| 173 | if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY && |
| 174 | anchor_right == proto::ANCHOR_TYPE_BOUNDARY) { |
| 175 | return text.size() == subpattern.size() && |
| 176 | StartsWithFuzzy(text, subpattern); |
| 177 | } |
| 178 | |
| 179 | if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN && |
| 180 | anchor_right == proto::ANCHOR_TYPE_NONE) { |
| 181 | return url_host.is_nonempty() && |
| 182 | FindSubdomainAnchoredSubpattern(text, url_host, subpattern) != |
| 183 | base::StringPiece::npos; |
| 184 | } |
| 185 | |
| 186 | if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN && |
| 187 | anchor_right == proto::ANCHOR_TYPE_BOUNDARY) { |
| 188 | return url_host.is_nonempty() && text.size() >= subpattern.size() && |
| 189 | IsSubdomainAnchored(text, url_host, |
| 190 | text.size() - subpattern.size()) && |
| 191 | EndsWithFuzzy(text, subpattern); |
| 192 | } |
| 193 | |
| 194 | NOTREACHED(); |
| 195 | return false; |
| 196 | } |
| 197 | |
| 198 | // Matches the last |subpattern| against |text|. Special treatment is required |
| 199 | // for the last subpattern since |kSeparatorPlaceholder| can also match the end |
| 200 | // of the text. |
| 201 | bool DoesTextMatchLastSubpattern(proto::AnchorType anchor_left, |
| 202 | proto::AnchorType anchor_right, |
| 203 | base::StringPiece text, |
| 204 | url::Component url_host, |
| 205 | base::StringPiece subpattern) { |
| 206 | DCHECK(!subpattern.empty()); |
| 207 | |
| 208 | if (DoesTextMatchLastSubpatternInternal(anchor_left, anchor_right, text, |
| 209 | url_host, subpattern)) { |
| 210 | return true; |
| 211 | } |
| 212 | |
| 213 | // If the last |subpattern| ends with kSeparatorPlaceholder, then it can also |
| 214 | // match the end of text. |
| 215 | if (subpattern.back() == kSeparatorPlaceholder) { |
| 216 | subpattern.remove_suffix(1); |
| 217 | return DoesTextMatchLastSubpatternInternal( |
| 218 | anchor_left, proto::ANCHOR_TYPE_BOUNDARY, text, url_host, subpattern); |
| 219 | } |
| 220 | |
| 221 | return false; |
| 222 | } |
| 223 | |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 224 | // Returns whether the given |url_pattern| matches the given |url_spec|. |
| 225 | // Compares the pattern the the url in a case-sensitive manner. |
| 226 | bool IsCaseSensitiveMatch(base::StringPiece url_pattern, |
| 227 | proto::AnchorType anchor_left, |
| 228 | proto::AnchorType anchor_right, |
| 229 | base::StringPiece url_spec, |
| 230 | url::Component url_host) { |
| 231 | DCHECK(!url_spec.empty()); |
| 232 | |
| 233 | StringSplitter<IsWildcard> subpatterns(url_pattern); |
| 234 | auto subpattern_it = subpatterns.begin(); |
| 235 | auto subpattern_end = subpatterns.end(); |
| 236 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 237 | // No subpatterns. |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 238 | if (subpattern_it == subpattern_end) { |
| 239 | return anchor_left == proto::ANCHOR_TYPE_NONE || |
| 240 | anchor_right == proto::ANCHOR_TYPE_NONE; |
| 241 | } |
| 242 | |
| 243 | base::StringPiece subpattern = *subpattern_it; |
| 244 | ++subpattern_it; |
| 245 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 246 | // There is only one |subpattern|. |
| 247 | if (subpattern_it == subpattern_end) { |
| 248 | return DoesTextMatchLastSubpattern(anchor_left, anchor_right, url_spec, |
| 249 | url_host, subpattern); |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 250 | } |
| 251 | |
| 252 | // Otherwise, the first |subpattern| does not have to be a suffix. But it |
| 253 | // still can have a left anchor. Check and handle that. |
| 254 | base::StringPiece text = url_spec; |
| 255 | if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY) { |
| 256 | if (!StartsWithFuzzy(url_spec, subpattern)) |
| 257 | return false; |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 258 | text.remove_prefix(subpattern.size()); |
| 259 | } else if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) { |
| 260 | if (!url_host.is_nonempty()) |
| 261 | return false; |
| 262 | const size_t match_begin = |
| 263 | FindSubdomainAnchoredSubpattern(url_spec, url_host, subpattern); |
| 264 | if (match_begin == base::StringPiece::npos) |
| 265 | return false; |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 266 | text.remove_prefix(match_begin + subpattern.size()); |
| 267 | } else { |
| 268 | DCHECK_EQ(anchor_left, proto::ANCHOR_TYPE_NONE); |
| 269 | // Get back to the initial |subpattern|, process it in the loop below. |
| 270 | subpattern_it = subpatterns.begin(); |
| 271 | } |
| 272 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 273 | DCHECK(subpattern_it != subpattern_end); |
| 274 | subpattern = *subpattern_it; |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 275 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 276 | // Consecutively find all the remaining subpatterns in the |text|. Handle the |
| 277 | // last subpattern outside the loop. |
| 278 | while (++subpattern_it != subpattern_end) { |
| 279 | DCHECK(!subpattern.empty()); |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 280 | |
| 281 | const size_t match_position = FindSubpattern(text, subpattern); |
| 282 | if (match_position == base::StringPiece::npos) |
| 283 | return false; |
| 284 | text.remove_prefix(match_position + subpattern.size()); |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 285 | |
| 286 | subpattern = *subpattern_it; |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 287 | } |
| 288 | |
Karan Bhatia | 1d1eaed2 | 2019-02-20 21:07:17 | [diff] [blame] | 289 | return DoesTextMatchLastSubpattern(proto::ANCHOR_TYPE_NONE, anchor_right, |
| 290 | text, url::Component(), subpattern); |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 291 | } |
| 292 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 293 | } // namespace |
| 294 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 295 | UrlPattern::UrlInfo::UrlInfo(const GURL& url) |
| 296 | : spec_(url.possibly_invalid_spec()), |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 297 | host_(url.parsed_for_possibly_invalid_spec().host) { |
| 298 | DCHECK(url.is_valid()); |
| 299 | } |
| 300 | |
Karan Bhatia | e177fb6 | 2018-09-14 00:57:30 | [diff] [blame] | 301 | base::StringPiece UrlPattern::UrlInfo::GetLowerCaseSpec() const { |
| 302 | if (lower_case_spec_cached_) |
| 303 | return *lower_case_spec_cached_; |
| 304 | |
| 305 | if (!HasAnyUpperAscii(spec_)) { |
| 306 | lower_case_spec_cached_ = spec_; |
| 307 | } else { |
| 308 | lower_case_spec_owner_ = base::ToLowerASCII(spec_); |
| 309 | lower_case_spec_cached_ = lower_case_spec_owner_; |
| 310 | } |
| 311 | return *lower_case_spec_cached_; |
| 312 | } |
| 313 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 314 | UrlPattern::UrlInfo::~UrlInfo() = default; |
| 315 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 316 | UrlPattern::UrlPattern() = default; |
| 317 | |
| 318 | UrlPattern::UrlPattern(base::StringPiece url_pattern, |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 319 | proto::UrlPatternType type, |
| 320 | MatchCase match_case) |
| 321 | : type_(type), url_pattern_(url_pattern), match_case_(match_case) {} |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 322 | |
| 323 | UrlPattern::UrlPattern(base::StringPiece url_pattern, |
| 324 | proto::AnchorType anchor_left, |
| 325 | proto::AnchorType anchor_right) |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 326 | : type_(proto::URL_PATTERN_TYPE_WILDCARDED), |
| 327 | url_pattern_(url_pattern), |
| 328 | anchor_left_(anchor_left), |
| 329 | anchor_right_(anchor_right) {} |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 330 | |
| 331 | UrlPattern::UrlPattern(const flat::UrlRule& rule) |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 332 | : type_(ConvertUrlPatternType(rule.url_pattern_type())), |
| 333 | url_pattern_(ConvertString(rule.url_pattern())), |
| 334 | anchor_left_(ConvertAnchorType(rule.anchor_left())), |
| 335 | anchor_right_(ConvertAnchorType(rule.anchor_right())), |
Charlie Harrison | 8d71f6f | 2018-09-14 14:43:26 | [diff] [blame] | 336 | match_case_(rule.options() & flat::OptionFlag_IS_CASE_INSENSITIVE |
| 337 | ? MatchCase::kFalse |
| 338 | : MatchCase::kTrue) {} |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 339 | |
pkalinnikov | 15cf724 | 2016-07-13 08:57:34 | [diff] [blame] | 340 | UrlPattern::~UrlPattern() = default; |
| 341 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 342 | bool UrlPattern::MatchesUrl(const UrlInfo& url) const { |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 343 | DCHECK(type_ == proto::URL_PATTERN_TYPE_SUBSTRING || |
| 344 | type_ == proto::URL_PATTERN_TYPE_WILDCARDED); |
Karan Bhatia | a9c4e1d | 2018-09-10 23:37:47 | [diff] [blame] | 345 | DCHECK(base::IsStringASCII(url_pattern_)); |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 346 | DCHECK(base::IsStringASCII(url.spec())); |
Karan Bhatia | e177fb6 | 2018-09-14 00:57:30 | [diff] [blame] | 347 | DCHECK(base::IsStringASCII(url.GetLowerCaseSpec())); |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 348 | |
Karandeep Bhatia | cf2b1a0 | 2019-02-25 23:09:31 | [diff] [blame] | 349 | // Pre-process patterns to ensure left anchored and right anchored patterns |
| 350 | // don't begin and end with a wildcard respectively i.e. change "|*xyz" to |
| 351 | // "*xyz" and "xyz*|" to "xyz*". |
| 352 | proto::AnchorType anchor_left = anchor_left_; |
| 353 | proto::AnchorType anchor_right = anchor_right_; |
| 354 | if (!url_pattern_.empty()) { |
| 355 | if (url_pattern_.front() == kWildcard) { |
| 356 | // Note: We don't handle "||*" and expect clients to disallow it. |
| 357 | DCHECK_NE(proto::ANCHOR_TYPE_SUBDOMAIN, anchor_left_); |
| 358 | anchor_left = proto::ANCHOR_TYPE_NONE; |
| 359 | } |
| 360 | if (url_pattern_.back() == kWildcard) |
| 361 | anchor_right = proto::ANCHOR_TYPE_NONE; |
| 362 | } |
| 363 | |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 364 | if (match_case()) { |
Karandeep Bhatia | cf2b1a0 | 2019-02-25 23:09:31 | [diff] [blame] | 365 | return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right, |
Karan Bhatia | e0aeb0e | 2018-09-12 18:57:21 | [diff] [blame] | 366 | url.spec(), url.host()); |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 367 | } |
| 368 | |
Karan Bhatia | a06f682 | 2018-09-18 00:05:49 | [diff] [blame] | 369 | // Use the lower-cased url for case-insensitive comparison. Case-insensitive |
| 370 | // patterns should already be lower-cased. |
| 371 | DCHECK(!HasAnyUpperAscii(url_pattern_)); |
Karandeep Bhatia | cf2b1a0 | 2019-02-25 23:09:31 | [diff] [blame] | 372 | return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right, |
Karan Bhatia | a06f682 | 2018-09-18 00:05:49 | [diff] [blame] | 373 | url.GetLowerCaseSpec(), url.host()); |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 374 | } |
| 375 | |
| 376 | std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern) { |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 377 | switch (pattern.anchor_left()) { |
| 378 | case proto::ANCHOR_TYPE_SUBDOMAIN: |
| 379 | out << '|'; |
Nico Weber | b1cea5c | 2018-01-29 22:26:07 | [diff] [blame] | 380 | FALLTHROUGH; |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 381 | case proto::ANCHOR_TYPE_BOUNDARY: |
| 382 | out << '|'; |
Nico Weber | b1cea5c | 2018-01-29 22:26:07 | [diff] [blame] | 383 | FALLTHROUGH; |
pkalinnikov | 35d1881 | 2017-04-05 17:28:18 | [diff] [blame] | 384 | default: |
| 385 | break; |
| 386 | } |
| 387 | out << pattern.url_pattern(); |
| 388 | if (pattern.anchor_right() == proto::ANCHOR_TYPE_BOUNDARY) |
| 389 | out << '|'; |
| 390 | if (pattern.match_case()) |
| 391 | out << "$match-case"; |
| 392 | |
| 393 | return out; |
| 394 | } |
| 395 | |
Pavel Kalinnikov | d797063 | 2017-06-20 09:07:34 | [diff] [blame] | 396 | } // namespace url_pattern_index |