Event matching by regular expression matching on URLs.
Uses FilteredRE2 implementation to more efficiently match lots of regexes at once by prefiltering.
BUG=135609,112155
Review URL: https://codereview.chromium.org/10910179
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@156693 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/chrome/chrome_common.gypi b/chrome/chrome_common.gypi
index b0bb3796..bb189a6 100644
--- a/chrome/chrome_common.gypi
+++ b/chrome/chrome_common.gypi
@@ -50,6 +50,7 @@
'<(DEPTH)/third_party/icu/icu.gyp:icuuc',
'<(DEPTH)/third_party/libxml/libxml.gyp:libxml',
'<(DEPTH)/third_party/mt19937ar/mt19937ar.gyp:mt19937ar',
+ '<(DEPTH)/third_party/re2/re2.gyp:re2',
'<(DEPTH)/third_party/sqlite/sqlite.gyp:sqlite',
'<(DEPTH)/third_party/zlib/zlib.gyp:zlib',
'<(DEPTH)/ui/ui.gyp:ui_resources',
@@ -164,6 +165,10 @@
'common/extensions/file_browser_handler.h',
'common/extensions/manifest.cc',
'common/extensions/manifest.h',
+ 'common/extensions/matcher/regex_set_matcher.cc',
+ 'common/extensions/matcher/regex_set_matcher.h',
+ 'common/extensions/matcher/string_pattern.cc',
+ 'common/extensions/matcher/string_pattern.h',
'common/extensions/matcher/substring_set_matcher.cc',
'common/extensions/matcher/substring_set_matcher.h',
'common/extensions/matcher/url_matcher.cc',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 72c8446..1a80eeef 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1997,6 +1997,8 @@
'common/extensions/manifest_tests/extension_manifests_update_unittest.cc',
'common/extensions/manifest_tests/extension_manifests_validapp_unittest.cc',
'common/extensions/manifest_tests/extension_manifests_web_unittest.cc',
+ 'common/extensions/matcher/regex_set_matcher_unittest.cc',
+ 'common/extensions/matcher/string_pattern_unittest.cc',
'common/extensions/matcher/substring_set_matcher_unittest.cc',
'common/extensions/matcher/url_matcher_unittest.cc',
'common/extensions/matcher/url_matcher_factory_unittest.cc',
diff --git a/chrome/common/DEPS b/chrome/common/DEPS
index 7c61927..75b7eea 100644
--- a/chrome/common/DEPS
+++ b/chrome/common/DEPS
@@ -26,6 +26,7 @@
"+third_party/bzip2",
"+third_party/mt19937ar",
"+third_party/npapi",
+ "+third_party/re2",
"+third_party/sqlite",
"+third_party/zlib",
diff --git a/chrome/common/extensions/api/events.json b/chrome/common/extensions/api/events.json
index aa07faf..a5f824c 100644
--- a/chrome/common/extensions/api/events.json
+++ b/chrome/common/extensions/api/events.json
@@ -264,6 +264,11 @@
"description": "Matches if the URL (without fragment identifier) is equal to a specified string. Port numbers are stripped from the URL if they match the default port number.",
"optional": true
},
+ "urlMatches": {
+ "type": "string",
+ "description": "Matches if the URL (without fragment identifier) matches a specified regular expression. Port numbers are stripped from the URL if they match the default port number. The regular expressions use the <a href=\"http://code.google.com/p/re2/wiki/Syntax\">RE2 syntax</a>.",
+ "optional": true
+ },
"urlPrefix": {
"type": "string",
"description": "Matches if the URL (without fragment identifier) starts with a specified string. Port numbers are stripped from the URL if they match the default port number.",
diff --git a/chrome/common/extensions/matcher/regex_set_matcher.cc b/chrome/common/extensions/matcher/regex_set_matcher.cc
new file mode 100644
index 0000000..455bece
--- /dev/null
+++ b/chrome/common/extensions/matcher/regex_set_matcher.cc
@@ -0,0 +1,109 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/common/extensions/matcher/regex_set_matcher.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/stl_util.h"
+#include "chrome/common/extensions/matcher/substring_set_matcher.h"
+#include "third_party/re2/re2/filtered_re2.h"
+#include "third_party/re2/re2/re2.h"
+
+namespace extensions {
+
+RegexSetMatcher::RegexSetMatcher() {}
+
+RegexSetMatcher::~RegexSetMatcher() {
+ DeleteSubstringPatterns();
+}
+
+void RegexSetMatcher::AddPatterns(
+ const std::vector<const StringPattern*>& regex_list) {
+ if (regex_list.empty())
+ return;
+ for (size_t i = 0; i < regex_list.size(); ++i) {
+ regexes_[regex_list[i]->id()] = regex_list[i];
+ }
+
+ RebuildMatcher();
+}
+
+void RegexSetMatcher::ClearPatterns() {
+ regexes_.clear();
+ RebuildMatcher();
+}
+
+bool RegexSetMatcher::Match(const std::string& text,
+ std::set<StringPattern::ID>* matches) const {
+ size_t old_number_of_matches = matches->size();
+ if (regexes_.empty())
+ return false;
+ if (!filtered_re2_.get()) {
+ LOG(ERROR) << "RegexSetMatcher was not initialized";
+ return false;
+ }
+
+ // FilteredRE2 expects lowercase for prefiltering, but we still
+ // match case-sensitively.
+ std::vector<RE2ID> atoms(FindSubstringMatches(
+ StringToLowerASCII(text)));
+
+ std::vector<RE2ID> re2_ids;
+ filtered_re2_->AllMatches(text, atoms, &re2_ids);
+
+ std::set<StringPattern::ID> matched_ids;
+ for (size_t i = 0; i < re2_ids.size(); ++i) {
+ StringPattern::ID id = re2_id_map_[re2_ids[i]];
+ matches->insert(id);
+ }
+ return old_number_of_matches != matches->size();
+}
+
+std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
+ const std::string& text) const {
+ std::set<int> atoms_set;
+ substring_matcher_->Match(text, &atoms_set);
+ return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
+}
+
+void RegexSetMatcher::RebuildMatcher() {
+ re2_id_map_.clear();
+ filtered_re2_.reset(new re2::FilteredRE2());
+ if (regexes_.empty())
+ return;
+
+ for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
+ RE2ID re2_id;
+ RE2::ErrorCode error = filtered_re2_->Add(
+ it->second->pattern(), RE2::DefaultOptions, &re2_id);
+ if (error == RE2::NoError) {
+ DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
+ re2_id_map_.push_back(it->first);
+ } else {
+ // TODO(yoz): Return an unparseable regex error as soon as possible.
+ LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
+ << it->second->pattern() << ")";
+ }
+ }
+
+ std::vector<std::string> strings_to_match;
+ filtered_re2_->Compile(&strings_to_match);
+
+ substring_matcher_.reset(new SubstringSetMatcher);
+ DeleteSubstringPatterns();
+ // Build SubstringSetMatcher from |strings_to_match|.
+ // SubstringSetMatcher doesn't own its strings.
+ for (size_t i = 0; i < strings_to_match.size(); ++i) {
+ substring_patterns_.push_back(
+ new StringPattern(strings_to_match[i], i));
+ }
+ substring_matcher_->RegisterPatterns(substring_patterns_);
+}
+
+void RegexSetMatcher::DeleteSubstringPatterns() {
+ STLDeleteElements(&substring_patterns_);
+}
+
+} // namespace extensions
diff --git a/chrome/common/extensions/matcher/regex_set_matcher.h b/chrome/common/extensions/matcher/regex_set_matcher.h
new file mode 100644
index 0000000..79c6641
--- /dev/null
+++ b/chrome/common/extensions/matcher/regex_set_matcher.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_COMMON_EXTENSIONS_MATCHER_REGEX_SET_MATCHER_H_
+#define CHROME_COMMON_EXTENSIONS_MATCHER_REGEX_SET_MATCHER_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/memory/scoped_ptr.h"
+#include "chrome/common/extensions/matcher/string_pattern.h"
+#include "chrome/common/extensions/matcher/substring_set_matcher.h"
+
+namespace re2 {
+class FilteredRE2;
+}
+
+namespace extensions {
+
+// Efficiently matches URLs against a collection of regular expressions,
+// using FilteredRE2 to reduce the number of regexes that must be matched
+// by pre-filtering with substring matching. See:
+// http://swtch.com/~rsc/regexp/regexp3.html#analysis
+class RegexSetMatcher {
+ public:
+ RegexSetMatcher();
+ virtual ~RegexSetMatcher();
+
+ // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
+ // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
+ // patterns at once.
+ // Ownership of the patterns remains with the caller.
+ void AddPatterns(const std::vector<const StringPattern*>& regex_list);
+
+ // Removes all regex patterns.
+ void ClearPatterns();
+
+ // Appends the IDs of regular expressions in our set that match the |text|
+ // to |matches|.
+ bool Match(const std::string& text,
+ std::set<StringPattern::ID>* matches) const;
+
+ private:
+ typedef int RE2ID;
+ typedef std::map<StringPattern::ID, const StringPattern*> RegexMap;
+ typedef std::vector<StringPattern::ID> RE2IDMap;
+
+ // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
+ // match the |text|.
+ std::vector<RE2ID> FindSubstringMatches(const std::string& text) const;
+
+ // Rebuild FilteredRE2 from scratch. Needs to be called whenever
+ // our set of regexes changes.
+ // TODO(yoz): investigate if it could be done incrementally;
+ // apparently not supported by FilteredRE2.
+ void RebuildMatcher();
+
+ // Clean up StringPatterns in |substring_patterns_|.
+ void DeleteSubstringPatterns();
+
+ // Mapping of regex StringPattern::IDs to regexes.
+ RegexMap regexes_;
+ // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
+ // to regex StringPattern::IDs.
+ RE2IDMap re2_id_map_;
+
+ scoped_ptr<re2::FilteredRE2> filtered_re2_;
+ scoped_ptr<SubstringSetMatcher> substring_matcher_;
+
+ // The substring patterns from FilteredRE2, which are used in
+ // |substring_matcher_| but whose lifetime is managed here.
+ std::vector<const StringPattern*> substring_patterns_;
+};
+
+} // namespace extensions
+
+#endif // CHROME_COMMON_EXTENSIONS_MATCHER_REGEX_SET_MATCHER_H_
diff --git a/chrome/common/extensions/matcher/regex_set_matcher_unittest.cc b/chrome/common/extensions/matcher/regex_set_matcher_unittest.cc
new file mode 100644
index 0000000..63cf1c2d
--- /dev/null
+++ b/chrome/common/extensions/matcher/regex_set_matcher_unittest.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <set>
+
+#include "base/stl_util.h"
+#include "chrome/common/extensions/matcher/regex_set_matcher.h"
+#include "googleurl/src/gurl.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+using extensions::StringPattern;
+using extensions::RegexSetMatcher;
+
+TEST(RegexSetMatcherTest, MatchRegexes) {
+ StringPattern pattern_1("ab.*c", 42);
+ StringPattern pattern_2("f*f", 17);
+ StringPattern pattern_3("c(ar|ra)b|brac", 239);
+ std::vector<const StringPattern*> regexes;
+ regexes.push_back(&pattern_1);
+ regexes.push_back(&pattern_2);
+ regexes.push_back(&pattern_3);
+ RegexSetMatcher matcher;
+ matcher.AddPatterns(regexes);
+
+ std::set<StringPattern::ID> result1;
+ matcher.Match("http://abracadabra.com", &result1);
+ EXPECT_EQ(2U, result1.size());
+ EXPECT_TRUE(ContainsKey(result1, 42));
+ EXPECT_TRUE(ContainsKey(result1, 239));
+
+ std::set<StringPattern::ID> result2;
+ matcher.Match("https://abfffffffffffffffffffffffffffffff.fi/cf", &result2);
+ EXPECT_EQ(2U, result2.size());
+ EXPECT_TRUE(ContainsKey(result2, 17));
+ EXPECT_TRUE(ContainsKey(result2, 42));
+
+ std::set<StringPattern::ID> result3;
+ matcher.Match("http://nothing.com/", &result3);
+ EXPECT_EQ(0U, result3.size());
+}
+
+TEST(RegexSetMatcherTest, CaseSensitivity) {
+ StringPattern pattern_1("AAA", 51);
+ StringPattern pattern_2("aaA", 57);
+ std::vector<const StringPattern*> regexes;
+ regexes.push_back(&pattern_1);
+ regexes.push_back(&pattern_2);
+ RegexSetMatcher matcher;
+ matcher.AddPatterns(regexes);
+
+ std::set<StringPattern::ID> result1;
+ matcher.Match("http://aaa.net/", &result1);
+ EXPECT_EQ(0U, result1.size());
+
+ std::set<StringPattern::ID> result2;
+ matcher.Match("http://aaa.net/quaaACK", &result2);
+ EXPECT_EQ(1U, result2.size());
+ EXPECT_TRUE(ContainsKey(result2, 57));
+}
diff --git a/chrome/common/extensions/matcher/string_pattern.cc b/chrome/common/extensions/matcher/string_pattern.cc
new file mode 100644
index 0000000..673ed56
--- /dev/null
+++ b/chrome/common/extensions/matcher/string_pattern.cc
@@ -0,0 +1,20 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/common/extensions/matcher/string_pattern.h"
+
+namespace extensions {
+
+StringPattern::StringPattern(const std::string& pattern,
+ StringPattern::ID id)
+ : pattern_(pattern), id_(id) {}
+
+StringPattern::~StringPattern() {}
+
+bool StringPattern::operator<(const StringPattern& rhs) const {
+ if (id_ != rhs.id_) return id_ < rhs.id_;
+ return pattern_ < rhs.pattern_;
+}
+
+} // namespace extensions
diff --git a/chrome/common/extensions/matcher/string_pattern.h b/chrome/common/extensions/matcher/string_pattern.h
new file mode 100644
index 0000000..15e9b43f
--- /dev/null
+++ b/chrome/common/extensions/matcher/string_pattern.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_COMMON_EXTENSIONS_MATCHER_STRING_PATTERN_H_
+#define CHROME_COMMON_EXTENSIONS_MATCHER_STRING_PATTERN_H_
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+
+namespace extensions {
+
+// An individual pattern of a substring or regex matcher. A pattern consists of
+// a string (interpreted as individual bytes, no character encoding) and an
+// identifier.
+// IDs are returned to the caller of SubstringSetMatcher::Match() or
+// RegexMatcher::MatchURL() to help the caller to figure out what
+// patterns matched a string. All patterns registered to a matcher
+// need to contain unique IDs.
+class StringPattern {
+ public:
+ typedef int ID;
+
+ StringPattern(const std::string& pattern, ID id);
+ ~StringPattern();
+ const std::string& pattern() const { return pattern_; }
+ ID id() const { return id_; }
+
+ bool operator<(const StringPattern& rhs) const;
+
+ private:
+ std::string pattern_;
+ ID id_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringPattern);
+};
+
+} // namespace extensions
+
+#endif // CHROME_COMMON_EXTENSIONS_MATCHER_STRING_PATTERN_H_
diff --git a/chrome/common/extensions/matcher/string_pattern_unittest.cc b/chrome/common/extensions/matcher/string_pattern_unittest.cc
new file mode 100644
index 0000000..2580afa
--- /dev/null
+++ b/chrome/common/extensions/matcher/string_pattern_unittest.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/common/extensions/matcher/string_pattern.h"
+
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+using extensions::StringPattern;
+
+TEST(StringPatternTest, StringPattern) {
+ StringPattern r1("Test", 2);
+ EXPECT_EQ("Test", r1.pattern());
+ EXPECT_EQ(2, r1.id());
+
+ EXPECT_FALSE(r1 < r1);
+ StringPattern r2("Test", 3);
+ EXPECT_TRUE(r1 < r2);
+ StringPattern r3("ZZZZ", 2);
+ EXPECT_TRUE(r1 < r3);
+}
diff --git a/chrome/common/extensions/matcher/substring_set_matcher.cc b/chrome/common/extensions/matcher/substring_set_matcher.cc
index fef000df..fb654d6 100644
--- a/chrome/common/extensions/matcher/substring_set_matcher.cc
+++ b/chrome/common/extensions/matcher/substring_set_matcher.cc
@@ -12,22 +12,6 @@
namespace extensions {
//
-// SubstringPattern
-//
-
-SubstringPattern::SubstringPattern(const std::string& pattern,
- SubstringPattern::ID id)
- : pattern_(pattern), id_(id) {}
-
-SubstringPattern::~SubstringPattern() {}
-
-bool SubstringPattern::operator<(const SubstringPattern& rhs) const {
- if (id_ < rhs.id_) return true;
- if (id_ > rhs.id_) return false;
- return pattern_ < rhs.pattern_;
-}
-
-//
// SubstringSetMatcher
//
@@ -38,29 +22,29 @@
SubstringSetMatcher::~SubstringSetMatcher() {}
void SubstringSetMatcher::RegisterPatterns(
- const std::vector<const SubstringPattern*>& patterns) {
+ const std::vector<const StringPattern*>& patterns) {
RegisterAndUnregisterPatterns(patterns,
- std::vector<const SubstringPattern*>());
+ std::vector<const StringPattern*>());
}
void SubstringSetMatcher::UnregisterPatterns(
- const std::vector<const SubstringPattern*>& patterns) {
- RegisterAndUnregisterPatterns(std::vector<const SubstringPattern*>(),
+ const std::vector<const StringPattern*>& patterns) {
+ RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(),
patterns);
}
void SubstringSetMatcher::RegisterAndUnregisterPatterns(
- const std::vector<const SubstringPattern*>& to_register,
- const std::vector<const SubstringPattern*>& to_unregister) {
+ const std::vector<const StringPattern*>& to_register,
+ const std::vector<const StringPattern*>& to_unregister) {
// Register patterns.
- for (std::vector<const SubstringPattern*>::const_iterator i =
+ for (std::vector<const StringPattern*>::const_iterator i =
to_register.begin(); i != to_register.end(); ++i) {
DCHECK(patterns_.find((*i)->id()) == patterns_.end());
patterns_[(*i)->id()] = *i;
}
// Unregister patterns
- for (std::vector<const SubstringPattern*>::const_iterator i =
+ for (std::vector<const StringPattern*>::const_iterator i =
to_unregister.begin(); i != to_unregister.end(); ++i) {
patterns_.erase((*i)->id());
}
@@ -69,7 +53,7 @@
}
bool SubstringSetMatcher::Match(const std::string& text,
- std::set<SubstringPattern::ID>* matches) const {
+ std::set<StringPattern::ID>* matches) const {
size_t old_number_of_matches = matches->size();
// Handle patterns matching the empty string.
@@ -115,7 +99,7 @@
}
void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree(
- const SubstringPattern* pattern) {
+ const StringPattern* pattern) {
const std::string& text = pattern->pattern();
size_t text_length = text.length();
@@ -212,7 +196,7 @@
edges_[c] = node;
}
-void SubstringSetMatcher::AhoCorasickNode::AddMatch(SubstringPattern::ID id) {
+void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) {
matches_.insert(id);
}
diff --git a/chrome/common/extensions/matcher/substring_set_matcher.h b/chrome/common/extensions/matcher/substring_set_matcher.h
index 65126a68..0b4bbc35 100644
--- a/chrome/common/extensions/matcher/substring_set_matcher.h
+++ b/chrome/common/extensions/matcher/substring_set_matcher.h
@@ -11,34 +11,10 @@
#include <vector>
#include "base/basictypes.h"
+#include "chrome/common/extensions/matcher/string_pattern.h"
namespace extensions {
-// An individual pattern of the SubstringSetMatcher. A pattern consists of
-// a string (interpreted as individual bytes, no character encoding) and an
-// identifier.
-// Each pattern that matches a string emits its ID to the caller of
-// SubstringSetMatcher::Match(). This helps the caller to figure out what
-// patterns matched a string. All patterns registered to a SubstringSetMatcher
-// need to contain unique IDs.
-class SubstringPattern {
- public:
- typedef int ID;
-
- SubstringPattern(const std::string& pattern, ID id);
- ~SubstringPattern();
- const std::string& pattern() const { return pattern_; }
- ID id() const { return id_; }
-
- bool operator<(const SubstringPattern& rhs) const;
-
- private:
- std::string pattern_;
- ID id_;
-
- DISALLOW_COPY_AND_ASSIGN(SubstringPattern);
-};
-
// Class that store a set of string patterns and can find for a string S,
// which string patterns occur in S.
class SubstringSetMatcher {
@@ -50,22 +26,22 @@
// The same pattern cannot be registered twice and each pattern needs to have
// a unique ID.
// Ownership of the patterns remains with the caller.
- void RegisterPatterns(const std::vector<const SubstringPattern*>& patterns);
+ void RegisterPatterns(const std::vector<const StringPattern*>& patterns);
// Unregisters the passed |patterns|.
- void UnregisterPatterns(const std::vector<const SubstringPattern*>& patterns);
+ void UnregisterPatterns(const std::vector<const StringPattern*>& patterns);
// Analogous to RegisterPatterns and UnregisterPatterns but executes both
// operations in one step, which is cheaper in the execution.
void RegisterAndUnregisterPatterns(
- const std::vector<const SubstringPattern*>& to_register,
- const std::vector<const SubstringPattern*>& to_unregister);
+ const std::vector<const StringPattern*>& to_register,
+ const std::vector<const StringPattern*>& to_unregister);
- // Matches |text| against all registered SubstringPatterns. Stores the IDs
+ // Matches |text| against all registered StringPatterns. Stores the IDs
// of matching patterns in |matches|. |matches| is not cleared before adding
// to it.
bool Match(const std::string& text,
- std::set<SubstringPattern::ID>* matches) const;
+ std::set<StringPattern::ID>* matches) const;
// Returns true if this object retains no allocated data. Only for debugging.
bool IsEmpty() const;
@@ -105,7 +81,7 @@
public:
// Key: label of the edge, value: node index in |tree_| of parent class.
typedef std::map<char, int> Edges;
- typedef std::set<SubstringPattern::ID> Matches;
+ typedef std::set<StringPattern::ID> Matches;
AhoCorasickNode();
~AhoCorasickNode();
@@ -120,7 +96,7 @@
int failure() const { return failure_; }
void set_failure(int failure) { failure_ = failure; }
- void AddMatch(SubstringPattern::ID id);
+ void AddMatch(StringPattern::ID id);
void AddMatches(const Matches& matches);
const Matches& matches() const { return matches_; }
@@ -140,12 +116,12 @@
// Inserts a path for |pattern->pattern()| into the tree and adds
// |pattern->id()| to the set of matches. Ownership of |pattern| remains with
// the caller.
- void InsertPatternIntoAhoCorasickTree(const SubstringPattern* pattern);
+ void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern);
void CreateFailureEdges();
- // Set of all registered SubstringPatterns. Used to regenerate the
+ // Set of all registered StringPatterns. Used to regenerate the
// Aho-Corasick tree in case patterns are registered or unregistered.
- typedef std::map<SubstringPattern::ID, const SubstringPattern*>
+ typedef std::map<StringPattern::ID, const StringPattern*>
SubstringPatternSet;
SubstringPatternSet patterns_;
diff --git a/chrome/common/extensions/matcher/substring_set_matcher_unittest.cc b/chrome/common/extensions/matcher/substring_set_matcher_unittest.cc
index 8e5eb3b..6ea27e7e 100644
--- a/chrome/common/extensions/matcher/substring_set_matcher_unittest.cc
+++ b/chrome/common/extensions/matcher/substring_set_matcher_unittest.cc
@@ -10,21 +10,9 @@
#include "testing/gtest/include/gtest/gtest.h"
-using extensions::SubstringPattern;
+using extensions::StringPattern;
using extensions::SubstringSetMatcher;
-TEST(SubstringSetMatcherTest, SubstringPattern) {
- SubstringPattern r1("Test", 2);
- EXPECT_EQ("Test", r1.pattern());
- EXPECT_EQ(2, r1.id());
-
- EXPECT_FALSE(r1 < r1);
- SubstringPattern r2("Test", 3);
- EXPECT_TRUE(r1 < r2);
- SubstringPattern r3("ZZZZ", 2);
- EXPECT_TRUE(r1 < r3);
-}
-
namespace {
void TestOnePattern(const std::string& test_string,
const std::string& pattern,
@@ -32,8 +20,8 @@
std::string test =
"TestOnePattern(" + test_string + ", " + pattern + ", " +
(is_match ? "1" : "0") + ")";
- std::vector<const SubstringPattern*> patterns;
- SubstringPattern substring_pattern(pattern, 1);
+ std::vector<const StringPattern*> patterns;
+ StringPattern substring_pattern(pattern, 1);
patterns.push_back(&substring_pattern);
SubstringSetMatcher matcher;
matcher.RegisterPatterns(patterns);
@@ -53,12 +41,12 @@
std::string test =
"TestTwoPatterns(" + test_string + ", " + pattern_1 + ", " + pattern_2 +
", " + (is_match_1 ? "1" : "0") + ", " + (is_match_2 ? "1" : "0") + ")";
- SubstringPattern substring_pattern_1(pattern_1, 1);
- SubstringPattern substring_pattern_2(pattern_2, 2);
+ StringPattern substring_pattern_1(pattern_1, 1);
+ StringPattern substring_pattern_2(pattern_2, 2);
// In order to make sure that the order in which patterns are registered
// does not make any difference we try both permutations.
for (int permutation = 0; permutation < 2; ++permutation) {
- std::vector<const SubstringPattern*> patterns;
+ std::vector<const StringPattern*> patterns;
if (permutation == 0) {
patterns.push_back(&substring_pattern_1);
patterns.push_back(&substring_pattern_2);
@@ -135,11 +123,11 @@
TEST(SubstringSetMatcherTest, RegisterAndRemove) {
SubstringSetMatcher matcher;
- SubstringPattern pattern_1("a", 1);
- SubstringPattern pattern_2("b", 2);
- SubstringPattern pattern_3("c", 3);
+ StringPattern pattern_1("a", 1);
+ StringPattern pattern_2("b", 2);
+ StringPattern pattern_3("c", 3);
- std::vector<const SubstringPattern*> patterns;
+ std::vector<const StringPattern*> patterns;
patterns.push_back(&pattern_1);
matcher.RegisterPatterns(patterns);
diff --git a/chrome/common/extensions/matcher/url_matcher.cc b/chrome/common/extensions/matcher/url_matcher.cc
index 0eb5ab5..ac639d9 100644
--- a/chrome/common/extensions/matcher/url_matcher.cc
+++ b/chrome/common/extensions/matcher/url_matcher.cc
@@ -15,29 +15,37 @@
namespace extensions {
// This set of classes implement a mapping of URL Component Patterns, such as
-// host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns.
+// host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
+// for use in substring comparisons.
//
// The idea of this mapping is to reduce the problem of comparing many
// URL Component Patterns against one URL to the problem of searching many
// substrings in one string:
//
-// ---------------------- --------------------
-// | URL Query operator | ----translate----> | SubstringPattern |
-// ---------------------- --------------------
-// ^
-// |
-// compare
-// |
-// v
-// ---------------------- --------------------
-// | URL to compare | | |
-// | to all URL Query | ----translate----> | String |
-// | operators | | |
-// ---------------------- --------------------
+// ---------------------- -----------------
+// | URL Query operator | ----translate----> | StringPattern |
+// ---------------------- -----------------
+// ^
+// |
+// compare
+// |
+// v
+// ---------------------- -----------------
+// | URL to compare | | |
+// | to all URL Query | ----translate----> | String |
+// | operators | | |
+// ---------------------- -----------------
//
// The reason for this problem reduction is that there are efficient algorithms
// for searching many substrings in one string (see Aho-Corasick algorithm).
//
+// Additionally, some of the same pieces are reused to implement regular
+// expression comparisons. The FilteredRE2 implementation for matching many
+// regular expressions against one string uses prefiltering, in which a set
+// of substrings (derived from the regexes) are first searched for, to reduce
+// the number of regular expressions to test; the prefiltering step also
+// uses Aho-Corasick.
+//
// Case 1: {host,path,query}_{prefix,suffix,equals} searches.
// ==========================================================
//
@@ -84,7 +92,7 @@
//
// Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
//
-// With this, we can search the SubstringPatterns in the normalized URL.
+// With this, we can search the StringPatterns in the normalized URL.
//
//
// Case 2: url_{prefix,suffix,equals,contains} searches.
@@ -124,7 +132,21 @@
// followed by gurl.host().find("example.co");
//
// [similarly for path_contains and query_contains].
+//
+//
+// Regular expression matching (url_matches searches)
+// ==================================================
+//
+// This class also supports matching regular expressions (RE2 syntax)
+// against full URLs, which are transformed as in case 2.
+namespace {
+
+bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
+ return criterion == URLMatcherCondition::URL_MATCHES;
+}
+
+} // namespace
//
// URLMatcherCondition
@@ -132,34 +154,34 @@
URLMatcherCondition::URLMatcherCondition()
: criterion_(HOST_PREFIX),
- substring_pattern_(NULL) {}
+ string_pattern_(NULL) {}
URLMatcherCondition::~URLMatcherCondition() {}
URLMatcherCondition::URLMatcherCondition(
Criterion criterion,
- const SubstringPattern* substring_pattern)
+ const StringPattern* string_pattern)
: criterion_(criterion),
- substring_pattern_(substring_pattern) {}
+ string_pattern_(string_pattern) {}
URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
: criterion_(rhs.criterion_),
- substring_pattern_(rhs.substring_pattern_) {}
+ string_pattern_(rhs.string_pattern_) {}
URLMatcherCondition& URLMatcherCondition::operator=(
const URLMatcherCondition& rhs) {
criterion_ = rhs.criterion_;
- substring_pattern_ = rhs.substring_pattern_;
+ string_pattern_ = rhs.string_pattern_;
return *this;
}
bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
if (criterion_ < rhs.criterion_) return true;
if (criterion_ > rhs.criterion_) return false;
- if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL)
- return *substring_pattern_ < *rhs.substring_pattern_;
- if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true;
- // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL,
+ if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
+ return *string_pattern_ < *rhs.string_pattern_;
+ if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
+ // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
// or both are NULL.
return false;
}
@@ -183,25 +205,28 @@
return false;
}
+bool URLMatcherCondition::IsRegexCondition() const {
+ return IsRegexCriterion(criterion_);
+}
+
bool URLMatcherCondition::IsMatch(
- const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const std::set<StringPattern::ID>& matching_patterns,
const GURL& url) const {
- DCHECK(substring_pattern_);
- if (matching_substring_patterns.find(substring_pattern_->id()) ==
- matching_substring_patterns.end())
+ DCHECK(string_pattern_);
+ if (!ContainsKey(matching_patterns, string_pattern_->id()))
return false;
// The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
// a substring match on the raw URL. In case of a match, we need to verify
// that the match was found in the correct component of the URL.
switch (criterion_) {
case HOST_CONTAINS:
- return url.host().find(substring_pattern_->pattern()) !=
+ return url.host().find(string_pattern_->pattern()) !=
std::string::npos;
case PATH_CONTAINS:
- return url.path().find(substring_pattern_->pattern()) !=
+ return url.path().find(string_pattern_->pattern()) !=
std::string::npos;
case QUERY_CONTAINS:
- return url.query().find(substring_pattern_->pattern()) !=
+ return url.query().find(string_pattern_->pattern()) !=
std::string::npos;
default:
break;
@@ -224,7 +249,8 @@
URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
URLMatcherConditionFactory::~URLMatcherConditionFactory() {
- STLDeleteElements(&pattern_singletons_);
+ STLDeleteElements(&substring_pattern_singletons_);
+ STLDeleteElements(®ex_pattern_singletons_);
}
std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
@@ -336,6 +362,23 @@
kEndOfURL;
}
+std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
+ const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.ClearPassword();
+ replacements.ClearUsername();
+ replacements.ClearRef();
+ // Clear port if it is implicit from scheme.
+ if (url.has_port()) {
+ const std::string& port = url.scheme();
+ if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
+ url.EffectiveIntPort()) {
+ replacements.ClearPort();
+ }
+ }
+ return url.ReplaceComponents(replacements).spec();
+}
+
URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
const std::string& prefix) {
return CreateCondition(URLMatcherCondition::URL_PREFIX,
@@ -358,35 +401,55 @@
kBeginningOfURL + str + kEndOfURL);
}
+URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
+ const std::string& regex) {
+ return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
+}
+
void URLMatcherConditionFactory::ForgetUnusedPatterns(
- const std::set<SubstringPattern::ID>& used_patterns) {
- PatternSingletons::iterator i = pattern_singletons_.begin();
- while (i != pattern_singletons_.end()) {
+ const std::set<StringPattern::ID>& used_patterns) {
+ PatternSingletons::iterator i = substring_pattern_singletons_.begin();
+ while (i != substring_pattern_singletons_.end()) {
if (used_patterns.find((*i)->id()) != used_patterns.end()) {
++i;
} else {
delete *i;
- pattern_singletons_.erase(i++);
+ substring_pattern_singletons_.erase(i++);
+ }
+ }
+ i = regex_pattern_singletons_.begin();
+ while (i != regex_pattern_singletons_.end()) {
+ if (used_patterns.find((*i)->id()) != used_patterns.end()) {
+ ++i;
+ } else {
+ delete *i;
+ regex_pattern_singletons_.erase(i++);
}
}
}
bool URLMatcherConditionFactory::IsEmpty() const {
- return pattern_singletons_.empty();
+ return substring_pattern_singletons_.empty() &&
+ regex_pattern_singletons_.empty();
}
URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
URLMatcherCondition::Criterion criterion,
const std::string& pattern) {
- SubstringPattern search_pattern(pattern, 0);
+ StringPattern search_pattern(pattern, 0);
+ PatternSingletons* pattern_singletons =
+ IsRegexCriterion(criterion) ? ®ex_pattern_singletons_
+ : &substring_pattern_singletons_;
+
PatternSingletons::const_iterator iter =
- pattern_singletons_.find(&search_pattern);
- if (iter != pattern_singletons_.end()) {
+ pattern_singletons->find(&search_pattern);
+
+ if (iter != pattern_singletons->end()) {
return URLMatcherCondition(criterion, *iter);
} else {
- SubstringPattern* new_pattern =
- new SubstringPattern(pattern, id_counter_++);
- pattern_singletons_.insert(new_pattern);
+ StringPattern* new_pattern =
+ new StringPattern(pattern, id_counter_++);
+ pattern_singletons->insert(new_pattern);
return URLMatcherCondition(criterion, new_pattern);
}
}
@@ -399,9 +462,9 @@
return "." + hostname;
}
-bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()(
- SubstringPattern* lhs,
- SubstringPattern* rhs) const {
+bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
+ StringPattern* lhs,
+ StringPattern* rhs) const {
if (lhs == NULL && rhs != NULL) return true;
if (lhs != NULL && rhs != NULL)
return lhs->pattern() < rhs->pattern();
@@ -483,11 +546,11 @@
port_filter_(port_filter.Pass()) {}
bool URLMatcherConditionSet::IsMatch(
- const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const std::set<StringPattern::ID>& matching_patterns,
const GURL& url) const {
for (Conditions::const_iterator i = conditions_.begin();
i != conditions_.end(); ++i) {
- if (!i->IsMatch(matching_substring_patterns, url))
+ if (!i->IsMatch(matching_patterns, url))
return false;
}
if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
@@ -497,7 +560,6 @@
return true;
}
-
//
// URLMatcher
//
@@ -533,19 +595,21 @@
}
std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) {
- // Find all IDs of SubstringPatterns that match |url|.
+ // Find all IDs of StringPatterns that match |url|.
// See URLMatcherConditionFactory for the canonicalization of URLs and the
// distinction between full url searches and url component searches.
- std::set<SubstringPattern::ID> matches;
+ std::set<StringPattern::ID> matches;
full_url_matcher_.Match(
condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
url_component_matcher_.Match(
condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
+ regex_set_matcher_.Match(
+ condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
// Calculate all URLMatcherConditionSets for which all URLMatcherConditions
// were fulfilled.
std::set<URLMatcherConditionSet::ID> result;
- for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin();
+ for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
i != matches.end(); ++i) {
// For each URLMatcherConditionSet there is exactly one condition
// registered in substring_match_triggers_. This means that the following
@@ -580,7 +644,7 @@
// Determine which patterns need to be registered when this function
// terminates.
- std::set<const SubstringPattern*> new_patterns;
+ std::set<const StringPattern*> new_patterns;
for (URLMatcherConditionSets::const_iterator condition_set_iter =
url_matcher_condition_sets_.begin();
condition_set_iter != url_matcher_condition_sets_.end();
@@ -590,21 +654,22 @@
for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
conditions.begin(); condition_iter != conditions.end();
++condition_iter) {
- // If we are called to process Full URL searches, ignore all others,
- // and vice versa.
- if (full_url_conditions == condition_iter->IsFullURLCondition())
- new_patterns.insert(condition_iter->substring_pattern());
+ // If we are called to process Full URL searches, ignore others, and
+ // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
+ if (!condition_iter->IsRegexCondition() &&
+ full_url_conditions == condition_iter->IsFullURLCondition())
+ new_patterns.insert(condition_iter->string_pattern());
}
}
// This is the set of patterns that were registered before this function
// is called.
- std::set<const SubstringPattern*>& registered_patterns =
+ std::set<const StringPattern*>& registered_patterns =
full_url_conditions ? registered_full_url_patterns_
: registered_url_component_patterns_;
// Add all patterns that are in new_patterns but not in registered_patterns.
- std::vector<const SubstringPattern*> patterns_to_register;
+ std::vector<const StringPattern*> patterns_to_register;
std::set_difference(
new_patterns.begin(), new_patterns.end(),
registered_patterns.begin(), registered_patterns.end(),
@@ -612,7 +677,7 @@
// Remove all patterns that are in registered_patterns but not in
// new_patterns.
- std::vector<const SubstringPattern*> patterns_to_unregister;
+ std::vector<const StringPattern*> patterns_to_unregister;
std::set_difference(
registered_patterns.begin(), registered_patterns.end(),
new_patterns.begin(), new_patterns.end(),
@@ -629,9 +694,9 @@
registered_patterns.swap(new_patterns);
}
-void URLMatcher::UpdateTriggers() {
- // Count substring pattern frequencies.
- std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies;
+void URLMatcher::UpdateRegexSetMatcher() {
+ std::vector<const StringPattern*> new_patterns;
+
for (URLMatcherConditionSets::const_iterator condition_set_iter =
url_matcher_condition_sets_.begin();
condition_set_iter != url_matcher_condition_sets_.end();
@@ -641,13 +706,36 @@
for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
conditions.begin(); condition_iter != conditions.end();
++condition_iter) {
- const SubstringPattern* pattern = condition_iter->substring_pattern();
+ if (condition_iter->IsRegexCondition())
+ new_patterns.push_back(condition_iter->string_pattern());
+ }
+ }
+
+ // Start over from scratch. We can't really do better than this, since the
+ // FilteredRE2 backend doesn't support incremental updates.
+ regex_set_matcher_.ClearPatterns();
+ regex_set_matcher_.AddPatterns(new_patterns);
+}
+
+void URLMatcher::UpdateTriggers() {
+ // Count substring pattern frequencies.
+ std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
+ for (URLMatcherConditionSets::const_iterator condition_set_iter =
+ url_matcher_condition_sets_.begin();
+ condition_set_iter != url_matcher_condition_sets_.end();
+ ++condition_set_iter) {
+ const URLMatcherConditionSet::Conditions& conditions =
+ condition_set_iter->second->conditions();
+ for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
+ conditions.begin(); condition_iter != conditions.end();
+ ++condition_iter) {
+ const StringPattern* pattern = condition_iter->string_pattern();
substring_pattern_frequencies[pattern->id()]++;
}
}
// Update trigger conditions: Determine for each URLMatcherConditionSet which
- // URLMatcherCondition contains a SubstringPattern that occurs least
+ // URLMatcherCondition contains a StringPattern that occurs least
// frequently in this URLMatcher. We assume that this condition is very
// specific and occurs rarely in URLs. If a match occurs for this
// URLMatcherCondition, we want to test all other URLMatcherCondition in the
@@ -664,12 +752,12 @@
continue;
URLMatcherConditionSet::Conditions::const_iterator condition_iter =
conditions.begin();
- SubstringPattern::ID trigger = condition_iter->substring_pattern()->id();
+ StringPattern::ID trigger = condition_iter->string_pattern()->id();
// We skip the first element in the following loop.
++condition_iter;
for (; condition_iter != conditions.end(); ++condition_iter) {
- SubstringPattern::ID current_id =
- condition_iter->substring_pattern()->id();
+ StringPattern::ID current_id =
+ condition_iter->string_pattern()->id();
if (substring_pattern_frequencies[trigger] >
substring_pattern_frequencies[current_id]) {
trigger = current_id;
@@ -680,7 +768,7 @@
}
void URLMatcher::UpdateConditionFactory() {
- std::set<SubstringPattern::ID> used_patterns;
+ std::set<StringPattern::ID> used_patterns;
for (URLMatcherConditionSets::const_iterator condition_set_iter =
url_matcher_condition_sets_.begin();
condition_set_iter != url_matcher_condition_sets_.end();
@@ -690,7 +778,7 @@
for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
conditions.begin(); condition_iter != conditions.end();
++condition_iter) {
- used_patterns.insert(condition_iter->substring_pattern()->id());
+ used_patterns.insert(condition_iter->string_pattern()->id());
}
}
condition_factory_.ForgetUnusedPatterns(used_patterns);
@@ -699,6 +787,7 @@
void URLMatcher::UpdateInternalDatastructures() {
UpdateSubstringSetMatcher(false);
UpdateSubstringSetMatcher(true);
+ UpdateRegexSetMatcher();
UpdateTriggers();
UpdateConditionFactory();
}
diff --git a/chrome/common/extensions/matcher/url_matcher.h b/chrome/common/extensions/matcher/url_matcher.h
index c547ae62..e868e27 100644
--- a/chrome/common/extensions/matcher/url_matcher.h
+++ b/chrome/common/extensions/matcher/url_matcher.h
@@ -11,6 +11,7 @@
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/scoped_vector.h"
+#include "chrome/common/extensions/matcher/regex_set_matcher.h"
#include "chrome/common/extensions/matcher/substring_set_matcher.h"
class GURL;
@@ -24,10 +25,12 @@
// This class represents a single URL matching condition, e.g. a match on the
// host suffix or the containment of a string in the query component of a GURL.
//
-// The difference from a simple SubstringPattern is that this also supports
+// The difference from a simple StringPattern is that this also supports
// checking whether the {Host, Path, Query} of a URL contains a string. The
// reduction of URL matching conditions to StringPatterns conducted by
// URLMatcherConditionFactory is not capable of expressing that alone.
+//
+// Also supported is matching regular expressions against the URL (URL_MATCHES).
class URLMatcherCondition {
public:
enum Criterion {
@@ -49,19 +52,20 @@
URL_SUFFIX,
URL_CONTAINS,
URL_EQUALS,
+ URL_MATCHES,
};
URLMatcherCondition();
~URLMatcherCondition();
URLMatcherCondition(Criterion criterion,
- const SubstringPattern* substring_pattern);
+ const StringPattern* substring_pattern);
URLMatcherCondition(const URLMatcherCondition& rhs);
URLMatcherCondition& operator=(const URLMatcherCondition& rhs);
bool operator<(const URLMatcherCondition& rhs) const;
Criterion criterion() const { return criterion_; }
- const SubstringPattern* substring_pattern() const {
- return substring_pattern_;
+ const StringPattern* string_pattern() const {
+ return string_pattern_;
}
// Returns whether this URLMatcherCondition needs to be executed on a
@@ -69,19 +73,22 @@
// URLMatcherConditionFactory).
bool IsFullURLCondition() const;
+ // Returns whether this URLMatcherCondition is a regular expression to be
+ // handled by a regex matcher instead of a substring matcher.
+ bool IsRegexCondition() const;
+
// Returns whether this condition is fulfilled according to
- // |matching_substring_patterns| and |url|.
- bool IsMatch(
- const std::set<SubstringPattern::ID>& matching_substring_patterns,
- const GURL& url) const;
+ // |matching_patterns| and |url|.
+ bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
+ const GURL& url) const;
private:
- // |criterion_| and |substring_pattern_| describe together what property a URL
+ // |criterion_| and |string_pattern_| describe together what property a URL
// needs to fulfill to be considered a match.
Criterion criterion_;
- // This is the SubstringPattern that is used in a SubstringSetMatcher.
- const SubstringPattern* substring_pattern_;
+ // This is the StringPattern that is used in a SubstringSetMatcher.
+ const StringPattern* string_pattern_;
};
// Class to map the problem of finding {host, path, query} {prefixes, suffixes,
@@ -101,7 +108,7 @@
// of a dictionary in a text" problem, which can be solved very efficiently
// by the Aho-Corasick algorithm.
//
-// IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern
+// IMPORTANT: The URLMatcherConditionFactory owns the StringPattern
// referenced by created URLMatcherConditions. Therefore, it must outlive
// all created URLMatcherCondition and the SubstringSetMatcher.
class URLMatcherConditionFactory {
@@ -146,23 +153,28 @@
// Canonicalizes a URL for "CreateURL*Condition" searches.
std::string CanonicalizeURLForFullSearches(const GURL& url);
+ // Canonicalizes a URL for "CreateURLMatchesCondition" searches.
+ std::string CanonicalizeURLForRegexSearches(const GURL& url);
+
URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix);
URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix);
URLMatcherCondition CreateURLContainsCondition(const std::string& str);
URLMatcherCondition CreateURLEqualsCondition(const std::string& str);
+ URLMatcherCondition CreateURLMatchesCondition(const std::string& regex);
+
// Removes all patterns from |pattern_singletons_| that are not listed in
// |used_patterns|. These patterns are not referenced any more and get
// freed.
void ForgetUnusedPatterns(
- const std::set<SubstringPattern::ID>& used_patterns);
+ const std::set<StringPattern::ID>& used_patterns);
// Returns true if this object retains no allocated data. Only for debugging.
bool IsEmpty() const;
private:
// Creates a URLMatcherCondition according to the parameters passed.
- // The URLMatcherCondition will refer to a SubstringPattern that is
+ // The URLMatcherCondition will refer to a StringPattern that is
// owned by |pattern_singletons_|.
URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion,
const std::string& pattern);
@@ -170,19 +182,21 @@
// Prepends a "." to the hostname if it does not start with one.
std::string CanonicalizeHostname(const std::string& hostname) const;
- // Counter that ensures that all created SubstringPatterns have unique IDs.
+ // Counter that ensures that all created StringPatterns have unique IDs.
+ // Note that substring patterns and regex patterns will use different IDs.
int id_counter_;
// This comparison considers only the pattern() value of the
- // SubstringPatterns.
- struct SubstringPatternPointerCompare {
- bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const;
+ // StringPatterns.
+ struct StringPatternPointerCompare {
+ bool operator()(StringPattern* lhs, StringPattern* rhs) const;
};
- // Set to ensure that we generate only one SubstringPattern for each content
- // of SubstringPattern::pattern().
- typedef std::set<SubstringPattern*, SubstringPatternPointerCompare>
+ // Set to ensure that we generate only one StringPattern for each content
+ // of StringPattern::pattern().
+ typedef std::set<StringPattern*, StringPatternPointerCompare>
PatternSingletons;
- PatternSingletons pattern_singletons_;
+ PatternSingletons substring_pattern_singletons_;
+ PatternSingletons regex_pattern_singletons_;
DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory);
};
@@ -244,9 +258,8 @@
ID id() const { return id_; }
const Conditions& conditions() const { return conditions_; }
- bool IsMatch(
- const std::set<SubstringPattern::ID>& matching_substring_patterns,
- const GURL& url) const;
+ bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
+ const GURL& url) const;
private:
friend class base::RefCounted<URLMatcherConditionSet>;
@@ -296,6 +309,7 @@
private:
void UpdateSubstringSetMatcher(bool full_url_conditions);
+ void UpdateRegexSetMatcher();
void UpdateTriggers();
void UpdateConditionFactory();
void UpdateInternalDatastructures();
@@ -309,15 +323,16 @@
URLMatcherConditionSets;
URLMatcherConditionSets url_matcher_condition_sets_;
- // Maps a SubstringPattern ID to the URLMatcherConditions that need to
- // be triggered in case of a SubstringPattern match.
- std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> >
+ // Maps a StringPattern ID to the URLMatcherConditions that need to
+ // be triggered in case of a StringPattern match.
+ std::map<StringPattern::ID, std::set<URLMatcherConditionSet::ID> >
substring_match_triggers_;
SubstringSetMatcher full_url_matcher_;
SubstringSetMatcher url_component_matcher_;
- std::set<const SubstringPattern*> registered_full_url_patterns_;
- std::set<const SubstringPattern*> registered_url_component_patterns_;
+ RegexSetMatcher regex_set_matcher_;
+ std::set<const StringPattern*> registered_full_url_patterns_;
+ std::set<const StringPattern*> registered_url_component_patterns_;
DISALLOW_COPY_AND_ASSIGN(URLMatcher);
};
diff --git a/chrome/common/extensions/matcher/url_matcher_constants.cc b/chrome/common/extensions/matcher/url_matcher_constants.cc
index 4c83f6d..a07cb1d 100644
--- a/chrome/common/extensions/matcher/url_matcher_constants.cc
+++ b/chrome/common/extensions/matcher/url_matcher_constants.cc
@@ -25,6 +25,7 @@
const char kQuerySuffixKey[] = "querySuffix";
const char kURLContainsKey[] = "urlContains";
const char kURLEqualsKey[] = "urlEquals";
+const char kURLMatchesKey[] = "urlMatches";
const char kURLPrefixKey[] = "urlPrefix";
const char kURLSuffixKey[] = "urlSuffix";
diff --git a/chrome/common/extensions/matcher/url_matcher_constants.h b/chrome/common/extensions/matcher/url_matcher_constants.h
index 17aebe7..59d0498 100644
--- a/chrome/common/extensions/matcher/url_matcher_constants.h
+++ b/chrome/common/extensions/matcher/url_matcher_constants.h
@@ -28,6 +28,7 @@
extern const char kQuerySuffixKey[];
extern const char kURLContainsKey[];
extern const char kURLEqualsKey[];
+extern const char kURLMatchesKey[];
extern const char kURLPrefixKey[];
extern const char kURLSuffixKey[];
diff --git a/chrome/common/extensions/matcher/url_matcher_factory.cc b/chrome/common/extensions/matcher/url_matcher_factory.cc
index 3abd8373..c0932c9 100644
--- a/chrome/common/extensions/matcher/url_matcher_factory.cc
+++ b/chrome/common/extensions/matcher/url_matcher_factory.cc
@@ -48,6 +48,7 @@
factory_methods_[keys::kURLEqualsKey] = &F::CreateURLEqualsCondition;
factory_methods_[keys::kURLPrefixKey] = &F::CreateURLPrefixCondition;
factory_methods_[keys::kURLSuffixKey] = &F::CreateURLSuffixCondition;
+ factory_methods_[keys::kURLMatchesKey] = &F::CreateURLMatchesCondition;
}
// Returns whether a factory method for the specified |pattern_type| (e.g.
diff --git a/chrome/common/extensions/matcher/url_matcher_unittest.cc b/chrome/common/extensions/matcher/url_matcher_unittest.cc
index acf414fc..661dc50 100644
--- a/chrome/common/extensions/matcher/url_matcher_unittest.cc
+++ b/chrome/common/extensions/matcher/url_matcher_unittest.cc
@@ -15,19 +15,19 @@
//
TEST(URLMatcherConditionTest, Constructors) {
- SubstringPattern pattern("example.com", 1);
+ StringPattern pattern("example.com", 1);
URLMatcherCondition m1(URLMatcherCondition::HOST_SUFFIX, &pattern);
EXPECT_EQ(URLMatcherCondition::HOST_SUFFIX, m1.criterion());
- EXPECT_EQ(&pattern, m1.substring_pattern());
+ EXPECT_EQ(&pattern, m1.string_pattern());
URLMatcherCondition m2;
m2 = m1;
EXPECT_EQ(URLMatcherCondition::HOST_SUFFIX, m2.criterion());
- EXPECT_EQ(&pattern, m2.substring_pattern());
+ EXPECT_EQ(&pattern, m2.string_pattern());
URLMatcherCondition m3(m1);
EXPECT_EQ(URLMatcherCondition::HOST_SUFFIX, m3.criterion());
- EXPECT_EQ(&pattern, m3.substring_pattern());
+ EXPECT_EQ(&pattern, m3.string_pattern());
}
TEST(URLMatcherSchemeFilter, TestMatching) {
@@ -61,7 +61,7 @@
}
TEST(URLMatcherConditionTest, IsFullURLCondition) {
- SubstringPattern pattern("example.com", 1);
+ StringPattern pattern("example.com", 1);
EXPECT_FALSE(URLMatcherCondition(URLMatcherCondition::HOST_SUFFIX,
&pattern).IsFullURLCondition());
@@ -86,30 +86,30 @@
GURL url1("http://www.example.com/www.foobar.com/index.html");
GURL url2("http://www.foobar.com/example.com/index.html");
- SubstringPattern pattern("example.com", 1);
+ StringPattern pattern("example.com", 1);
URLMatcherCondition m1(URLMatcherCondition::HOST_SUFFIX, &pattern);
- std::set<SubstringPattern::ID> matching_substring_patterns;
+ std::set<StringPattern::ID> matching_patterns;
// matches = {0} --> matcher did not indicate that m1 was a match.
- matching_substring_patterns.insert(0);
- EXPECT_FALSE(m1.IsMatch(matching_substring_patterns, url1));
+ matching_patterns.insert(0);
+ EXPECT_FALSE(m1.IsMatch(matching_patterns, url1));
// matches = {0, 1} --> matcher did indicate that m1 was a match.
- matching_substring_patterns.insert(1);
- EXPECT_TRUE(m1.IsMatch(matching_substring_patterns, url1));
+ matching_patterns.insert(1);
+ EXPECT_TRUE(m1.IsMatch(matching_patterns, url1));
// For m2 we use a HOST_CONTAINS test, which requires a post-validation
// whether the match reported by the SubstringSetMatcher occurs really
// in the correct url component.
URLMatcherCondition m2(URLMatcherCondition::HOST_CONTAINS, &pattern);
- EXPECT_TRUE(m2.IsMatch(matching_substring_patterns, url1));
- EXPECT_FALSE(m2.IsMatch(matching_substring_patterns, url2));
+ EXPECT_TRUE(m2.IsMatch(matching_patterns, url1));
+ EXPECT_FALSE(m2.IsMatch(matching_patterns, url2));
}
TEST(URLMatcherConditionTest, Comparison) {
- SubstringPattern p1("foobar.com", 1);
- SubstringPattern p2("foobar.com", 2);
+ StringPattern p1("foobar.com", 1);
+ StringPattern p2("foobar.com", 2);
// The first component of each test is expected to be < than the second.
URLMatcherCondition test_smaller[][2] = {
{URLMatcherCondition(URLMatcherCondition::HOST_PREFIX, &p1),
@@ -148,7 +148,7 @@
namespace {
bool Matches(const URLMatcherCondition& condition, std::string text) {
- return text.find(condition.substring_pattern()->pattern()) !=
+ return text.find(condition.string_pattern()->pattern()) !=
std::string::npos;
}
@@ -205,6 +205,8 @@
factory.CreateURLContainsCondition("foo").criterion());
EXPECT_EQ(URLMatcherCondition::URL_EQUALS,
factory.CreateURLEqualsCondition("foo").criterion());
+ EXPECT_EQ(URLMatcherCondition::URL_MATCHES,
+ factory.CreateURLMatchesCondition("foo").criterion());
}
TEST(URLMatcherConditionFactoryTest, TestSingletonProperty) {
@@ -212,21 +214,30 @@
URLMatcherCondition c1 = factory.CreateHostEqualsCondition("www.google.com");
URLMatcherCondition c2 = factory.CreateHostEqualsCondition("www.google.com");
EXPECT_EQ(c1.criterion(), c2.criterion());
- EXPECT_EQ(c1.substring_pattern(), c2.substring_pattern());
+ EXPECT_EQ(c1.string_pattern(), c2.string_pattern());
URLMatcherCondition c3 = factory.CreateHostEqualsCondition("www.google.de");
EXPECT_EQ(c2.criterion(), c3.criterion());
- EXPECT_NE(c2.substring_pattern(), c3.substring_pattern());
- EXPECT_NE(c2.substring_pattern()->id(), c3.substring_pattern()->id());
- EXPECT_NE(c2.substring_pattern()->pattern(),
- c3.substring_pattern()->pattern());
+ EXPECT_NE(c2.string_pattern(), c3.string_pattern());
+ EXPECT_NE(c2.string_pattern()->id(), c3.string_pattern()->id());
+ EXPECT_NE(c2.string_pattern()->pattern(),
+ c3.string_pattern()->pattern());
+ URLMatcherCondition c4 = factory.CreateURLMatchesCondition("www.google.com");
+ URLMatcherCondition c5 = factory.CreateURLContainsCondition("www.google.com");
+ // Regex patterns and substring patterns do not share IDs.
+ EXPECT_EQ(c5.string_pattern()->pattern(), c4.string_pattern()->pattern());
+ EXPECT_NE(c5.string_pattern(), c4.string_pattern());
+ EXPECT_NE(c5.string_pattern()->id(), c4.string_pattern()->id());
- // Check that all SubstringPattern singletons are freed if we call
+ // Check that all StringPattern singletons are freed if we call
// ForgetUnusedPatterns.
- SubstringPattern::ID old_id_1 = c1.substring_pattern()->id();
- factory.ForgetUnusedPatterns(std::set<SubstringPattern::ID>());
+ StringPattern::ID old_id_1 = c1.string_pattern()->id();
+ StringPattern::ID old_id_4 = c4.string_pattern()->id();
+ factory.ForgetUnusedPatterns(std::set<StringPattern::ID>());
EXPECT_TRUE(factory.IsEmpty());
- URLMatcherCondition c4 = factory.CreateHostEqualsCondition("www.google.com");
- EXPECT_NE(old_id_1, c4.substring_pattern()->id());
+ URLMatcherCondition c6 = factory.CreateHostEqualsCondition("www.google.com");
+ EXPECT_NE(old_id_1, c6.string_pattern()->id());
+ URLMatcherCondition c7 = factory.CreateURLMatchesCondition("www.google.com");
+ EXPECT_NE(old_id_4, c7.string_pattern()->id());
}
TEST(URLMatcherConditionFactoryTest, TestComponentSearches) {
@@ -374,7 +385,6 @@
EXPECT_TRUE(Matches(factory.CreateURLContainsCondition(":1234"), url));
}
-
//
// URLMatcherConditionSet
//
@@ -413,13 +423,13 @@
EXPECT_EQ(1, condition_set->id());
EXPECT_EQ(2u, condition_set->conditions().size());
- std::set<SubstringPattern::ID> matching_substring_patterns;
- matching_substring_patterns.insert(m1.substring_pattern()->id());
- EXPECT_FALSE(condition_set->IsMatch(matching_substring_patterns, url1));
+ std::set<StringPattern::ID> matching_patterns;
+ matching_patterns.insert(m1.string_pattern()->id());
+ EXPECT_FALSE(condition_set->IsMatch(matching_patterns, url1));
- matching_substring_patterns.insert(m2.substring_pattern()->id());
- EXPECT_TRUE(condition_set->IsMatch(matching_substring_patterns, url1));
- EXPECT_FALSE(condition_set->IsMatch(matching_substring_patterns, url2));
+ matching_patterns.insert(m2.string_pattern()->id());
+ EXPECT_TRUE(condition_set->IsMatch(matching_patterns, url1));
+ EXPECT_FALSE(condition_set->IsMatch(matching_patterns, url2));
// Test scheme filters.
scoped_refptr<URLMatcherConditionSet> condition_set2(
@@ -427,13 +437,13 @@
scoped_ptr<URLMatcherSchemeFilter>(
new URLMatcherSchemeFilter("https")),
scoped_ptr<URLMatcherPortFilter>(NULL)));
- EXPECT_FALSE(condition_set2->IsMatch(matching_substring_patterns, url1));
+ EXPECT_FALSE(condition_set2->IsMatch(matching_patterns, url1));
scoped_refptr<URLMatcherConditionSet> condition_set3(
new URLMatcherConditionSet(1, conditions,
scoped_ptr<URLMatcherSchemeFilter>(
new URLMatcherSchemeFilter("http")),
scoped_ptr<URLMatcherPortFilter>(NULL)));
- EXPECT_TRUE(condition_set3->IsMatch(matching_substring_patterns, url1));
+ EXPECT_TRUE(condition_set3->IsMatch(matching_patterns, url1));
// Test port filters.
std::vector<URLMatcherPortFilter::Range> ranges;
@@ -442,9 +452,27 @@
scoped_refptr<URLMatcherConditionSet> condition_set4(
new URLMatcherConditionSet(1, conditions,
scoped_ptr<URLMatcherSchemeFilter>(NULL), filter.Pass()));
- EXPECT_TRUE(condition_set4->IsMatch(matching_substring_patterns, url1));
- EXPECT_TRUE(condition_set4->IsMatch(matching_substring_patterns, url3));
- EXPECT_FALSE(condition_set4->IsMatch(matching_substring_patterns, url4));
+ EXPECT_TRUE(condition_set4->IsMatch(matching_patterns, url1));
+ EXPECT_TRUE(condition_set4->IsMatch(matching_patterns, url3));
+ EXPECT_FALSE(condition_set4->IsMatch(matching_patterns, url4));
+
+ // Test regex patterns.
+ matching_patterns.clear();
+ URLMatcherCondition r1 = factory.CreateURLMatchesCondition("/fo?oo");
+ std::set<URLMatcherCondition> regex_conditions;
+ regex_conditions.insert(r1);
+ scoped_refptr<URLMatcherConditionSet> condition_set5(
+ new URLMatcherConditionSet(1, regex_conditions));
+ EXPECT_FALSE(condition_set5->IsMatch(matching_patterns, url1));
+ matching_patterns.insert(r1.string_pattern()->id());
+ EXPECT_TRUE(condition_set5->IsMatch(matching_patterns, url1));
+
+ regex_conditions.insert(m1);
+ scoped_refptr<URLMatcherConditionSet> condition_set6(
+ new URLMatcherConditionSet(1, regex_conditions));
+ EXPECT_FALSE(condition_set6->IsMatch(matching_patterns, url1));
+ matching_patterns.insert(m1.string_pattern()->id());
+ EXPECT_TRUE(condition_set6->IsMatch(matching_patterns, url1));
}
@@ -486,9 +514,29 @@
// This should be the cached singleton.
int patternId1 = factory->CreateHostSuffixCondition(
- "example.com").substring_pattern()->id();
+ "example.com").string_pattern()->id();
- // Removal of last insert.
+ // Third insert.
+ URLMatcherConditionSet::Conditions conditions3;
+ conditions3.insert(factory->CreateHostSuffixCondition("example.com"));
+ conditions3.insert(factory->CreateURLMatchesCondition("x.*[0-9]"));
+
+ const int kConditionSetId3 = 3;
+ URLMatcherConditionSet::Vector insert3;
+ insert3.push_back(make_scoped_refptr(
+ new URLMatcherConditionSet(kConditionSetId3, conditions3)));
+ matcher.AddConditionSets(insert3);
+ EXPECT_EQ(3u, matcher.MatchURL(url1).size());
+ EXPECT_EQ(1u, matcher.MatchURL(url2).size());
+
+ // Removal of third insert.
+ std::vector<URLMatcherConditionSet::ID> remove3;
+ remove3.push_back(kConditionSetId3);
+ matcher.RemoveConditionSets(remove3);
+ EXPECT_EQ(2u, matcher.MatchURL(url1).size());
+ EXPECT_EQ(1u, matcher.MatchURL(url2).size());
+
+ // Removal of second insert.
std::vector<URLMatcherConditionSet::ID> remove2;
remove2.push_back(kConditionSetId2);
matcher.RemoveConditionSets(remove2);
@@ -507,9 +555,10 @@
// The cached singleton in matcher.condition_factory_ should be destroyed to
// free memory.
int patternId2 = factory->CreateHostSuffixCondition(
- "example.com").substring_pattern()->id();
+ "example.com").string_pattern()->id();
// If patternId1 and patternId2 are different that indicates that
- // matcher.condition_factory_ does not leak memory.
+ // matcher.condition_factory_ does not leak memory by holding onto
+ // unused patterns.
EXPECT_NE(patternId1, patternId2);
}
diff --git a/chrome/test/data/extensions/api_test/webrequest/test_declarative.js b/chrome/test/data/extensions/api_test/webrequest/test_declarative.js
index adbc139d..91fb9aea 100644
--- a/chrome/test/data/extensions/api_test/webrequest/test_declarative.js
+++ b/chrome/test/data/extensions/api_test/webrequest/test_declarative.js
@@ -252,6 +252,34 @@
);
},
+ function testRegexFilter() {
+ ignoreUnexpected = true;
+ expect(
+ [
+ { label: "onErrorOccurred",
+ event: "onErrorOccurred",
+ details: {
+ url: getURLHttpSimple(),
+ fromCache: false,
+ error: "net::ERR_BLOCKED_BY_CLIENT"
+ }
+ },
+ ],
+ [ ["onErrorOccurred"] ]);
+ onRequest.addRules(
+ [ {'conditions': [
+ new RequestMatcher({
+ 'url': {
+ 'urlMatches': 'simple[A-Z].*a\.html$',
+ 'schemes': ["http"]
+ },
+ })],
+ 'actions': [new CancelRequest()]}
+ ],
+ function() {navigateAndWait(getURLHttpSimple());}
+ );
+ },
+
function testSetRequestHeader() {
ignoreUnexpected = true;
expect(); // Used for initialization.