Create a separate component for UrlPatternIndex.
Bug: 713774
Change-Id: I12a70bc0b5caa37470ecf53568d90304c05095eb
Reviewed-on: https://chromium-review.googlesource.com/527445
Commit-Queue: Pavel Kalinnikov <[email protected]>
Reviewed-by: Jochen Eisinger <[email protected]>
Reviewed-by: Charlie Harrison <[email protected]>
Cr-Commit-Position: refs/heads/master@{#481360}
diff --git a/components/url_pattern_index/fuzzy_pattern_matching.h b/components/url_pattern_index/fuzzy_pattern_matching.h
new file mode 100644
index 0000000..52dc75b
--- /dev/null
+++ b/components/url_pattern_index/fuzzy_pattern_matching.h
@@ -0,0 +1,68 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The separator placeholder '^' symbol is used in subpatterns to match any
+// separator character, which is any ASCII symbol except letters, digits, and
+// the following: '_', '-', '.', '%'. Note that the separator placeholder
+// character '^' is itself a separator, as well as '\0'.
+// TODO(pkalinnikov): In addition, a separator placeholder at the end of the
+// pattern can be matched by the end of |text|.
+//
+// We define a fuzzy occurrence as an occurrence of a |subpattern| in |text|
+// such that all its non-placeholder characters are equal to the corresponding
+// characters of the |text|, whereas each '^' placeholder can correspond to any
+// type of separator in |text|.
+
+#ifndef COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_
+#define COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_
+
+#include <stddef.h>
+
+#include "base/strings/string_piece.h"
+
+namespace url_pattern_index {
+
+constexpr char kSeparatorPlaceholder = '^';
+
+inline bool IsAscii(char c) {
+ return !(c & ~0x7F);
+}
+
+inline bool IsAlphaNumericAscii(char c) {
+ if (c <= '9')
+ return c >= '0';
+ c |= 0x20; // Puts all alphabetics (and only them) into the 'a'-'z' range.
+ return c >= 'a' && c <= 'z';
+}
+
+// Returns whether |c| is a separator.
+inline bool IsSeparator(char c) {
+ switch (c) {
+ case '_':
+ case '-':
+ case '.':
+ case '%':
+ return false;
+ case kSeparatorPlaceholder:
+ return true;
+ default:
+ return !IsAlphaNumericAscii(c) && IsAscii(c);
+ }
+}
+
+// Returns whether |text| starts with a fuzzy occurrence of |subpattern|.
+bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
+
+// Returns whether |text| ends with a fuzzy occurrence of |subpattern|.
+bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
+
+// Returns the position of the leftmost fuzzy occurrence of a |subpattern| in
+// the |text| starting no earlier than |from| the specified position.
+size_t FindFuzzy(base::StringPiece text,
+ base::StringPiece subpattern,
+ size_t from = 0);
+
+} // namespace url_pattern_index
+
+#endif // COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_