blob: cf0d86ae22d70f37f9a49872b5f609ab39e67f51 [file] [log] [blame]
pkalinnikov15cf7242016-07-13 08:57:341// Copyright 2016 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Pavel Kalinnikovd7970632017-06-20 09:07:345#ifndef COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_
6#define COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_
pkalinnikov15cf7242016-07-13 08:57:347
pkalinnikov35d18812017-04-05 17:28:188#include <iosfwd>
9
pkalinnikov15cf7242016-07-13 08:57:3410#include "base/macros.h"
pkalinnikov15cf7242016-07-13 08:57:3411#include "base/strings/string_piece.h"
Pavel Kalinnikovd7970632017-06-20 09:07:3412#include "components/url_pattern_index/proto/rules.pb.h"
Anton Bikineev1156b5f2021-05-15 22:35:3613#include "third_party/abseil-cpp/absl/types/optional.h"
Karan Bhatiae0aeb0e2018-09-12 18:57:2114#include "url/third_party/mozilla/url_parse.h"
pkalinnikov15cf7242016-07-13 08:57:3415
pkalinnikov35d18812017-04-05 17:28:1816class GURL;
17
Pavel Kalinnikovd7970632017-06-20 09:07:3418namespace url_pattern_index {
pkalinnikov15cf7242016-07-13 08:57:3419
20namespace flat {
21struct UrlRule; // The FlatBuffers version of UrlRule.
22}
23
24// The structure used to mirror a URL pattern regardless of the representation
pkalinnikov35d18812017-04-05 17:28:1825// of the UrlRule that owns it, and to match it against URLs.
26class UrlPattern {
27 public:
Karan Bhatiaa9c4e1d2018-09-10 23:37:4728 enum class MatchCase {
29 kTrue,
30 kFalse,
31 };
32
Karan Bhatiae0aeb0e2018-09-12 18:57:2133 // A wrapper over a GURL to reduce redundant computation.
34 class UrlInfo {
35 public:
36 // The |url| must outlive this instance.
37 UrlInfo(const GURL& url);
38 ~UrlInfo();
39
40 base::StringPiece spec() const { return spec_; }
Karan Bhatiae177fb62018-09-14 00:57:3041 base::StringPiece GetLowerCaseSpec() const;
Karan Bhatiae0aeb0e2018-09-12 18:57:2142 url::Component host() const { return host_; }
43
44 private:
45 // The url spec.
46 const base::StringPiece spec_;
Karan Bhatiae177fb62018-09-14 00:57:3047 // String to hold the lazily computed lower cased spec.
48 mutable std::string lower_case_spec_owner_;
49 // Reference to the lower case spec. Computed lazily.
Anton Bikineev1156b5f2021-05-15 22:35:3650 mutable absl::optional<base::StringPiece> lower_case_spec_cached_;
Karan Bhatiae177fb62018-09-14 00:57:3051
Karan Bhatiae0aeb0e2018-09-12 18:57:2152 // The url host component.
53 const url::Component host_;
54
55 DISALLOW_COPY_AND_ASSIGN(UrlInfo);
56 };
57
pkalinnikov15cf7242016-07-13 08:57:3458 UrlPattern();
59
Karan Bhatiaa9c4e1d2018-09-10 23:37:4760 // Creates a |url_pattern| of a certain |type| and case-sensitivity.
pkalinnikov15cf7242016-07-13 08:57:3461 UrlPattern(base::StringPiece url_pattern,
Karan Bhatiaa9c4e1d2018-09-10 23:37:4762 proto::UrlPatternType type = proto::URL_PATTERN_TYPE_WILDCARDED,
63 MatchCase match_case = MatchCase::kFalse);
pkalinnikov15cf7242016-07-13 08:57:3464
65 // Creates a WILDCARDED |url_pattern| with the specified anchors.
66 UrlPattern(base::StringPiece url_pattern,
67 proto::AnchorType anchor_left,
68 proto::AnchorType anchor_right);
69
Karan Bhatia01d59172018-09-05 16:57:4070 // The passed in |rule| must outlive the created instance.
pkalinnikov15cf7242016-07-13 08:57:3471 explicit UrlPattern(const flat::UrlRule& rule);
pkalinnikov15cf7242016-07-13 08:57:3472
73 ~UrlPattern();
74
pkalinnikov35d18812017-04-05 17:28:1875 proto::UrlPatternType type() const { return type_; }
76 base::StringPiece url_pattern() const { return url_pattern_; }
77 proto::AnchorType anchor_left() const { return anchor_left_; }
78 proto::AnchorType anchor_right() const { return anchor_right_; }
Karan Bhatiaa9c4e1d2018-09-10 23:37:4779 bool match_case() const { return match_case_ == MatchCase::kTrue; }
pkalinnikov15cf7242016-07-13 08:57:3480
pkalinnikov35d18812017-04-05 17:28:1881 // Returns whether the |url| matches the URL |pattern|. Requires the type of
82 // |this| pattern to be either SUBSTRING or WILDCARDED.
83 //
84 // Splits the pattern into subpatterns separated by '*' wildcards, and
85 // greedily finds each of them in the spec of the |url|. Respects anchors at
86 // either end of the pattern, and '^' separator placeholders when comparing a
87 // subpattern to a subtring of the spec.
Karan Bhatiae0aeb0e2018-09-12 18:57:2188 bool MatchesUrl(const UrlInfo& url) const;
pkalinnikov15cf7242016-07-13 08:57:3489
90 private:
pkalinnikov35d18812017-04-05 17:28:1891 // TODO(pkalinnikov): Store flat:: types instead of proto::, in order to avoid
92 // conversions in IndexedRuleset.
93 proto::UrlPatternType type_ = proto::URL_PATTERN_TYPE_UNSPECIFIED;
94 base::StringPiece url_pattern_;
95
96 proto::AnchorType anchor_left_ = proto::ANCHOR_TYPE_NONE;
97 proto::AnchorType anchor_right_ = proto::ANCHOR_TYPE_NONE;
98
Charlie Harrison8d71f6f2018-09-14 14:43:2699 MatchCase match_case_ = MatchCase::kTrue;
pkalinnikov35d18812017-04-05 17:28:18100
pkalinnikov15cf7242016-07-13 08:57:34101 DISALLOW_COPY_AND_ASSIGN(UrlPattern);
102};
103
pkalinnikov35d18812017-04-05 17:28:18104// Allow pretty-printing URLPatterns when they are used in GTest assertions.
105std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern);
106
Pavel Kalinnikovd7970632017-06-20 09:07:34107} // namespace url_pattern_index
pkalinnikov15cf7242016-07-13 08:57:34108
Pavel Kalinnikovd7970632017-06-20 09:07:34109#endif // COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_H_