URLPatternIndex: Allow separator placeholder to match the end of url.

This CL modifies the URLPatternIndex matching algorithm to ensure that the
separator placeholder (^) also matches the end of the text, thus fixing an
existing TODO.

BUG=772260

Change-Id: If6917c1ea4c7a037765ca421075bf298f64c5ceb
Reviewed-on: https://chromium-review.googlesource.com/c/1476814
Commit-Queue: Karan Bhatia <[email protected]>
Reviewed-by: Istiaque Ahmed <[email protected]>
Reviewed-by: Charlie Harrison <[email protected]>
Cr-Commit-Position: refs/heads/master@{#633869}
diff --git a/components/url_pattern_index/url_pattern.cc b/components/url_pattern_index/url_pattern.cc
index a9762fa..81b80bfb 100644
--- a/components/url_pattern_index/url_pattern.cc
+++ b/components/url_pattern_index/url_pattern.cc
@@ -144,6 +144,80 @@
   return base::StringPiece::npos;
 }
 
+// Helper for DoesTextMatchLastSubpattern. Treats kSeparatorPlaceholder as not
+// matching the end of the text.
+bool DoesTextMatchLastSubpatternInternal(proto::AnchorType anchor_left,
+                                         proto::AnchorType anchor_right,
+                                         base::StringPiece text,
+                                         url::Component url_host,
+                                         base::StringPiece subpattern) {
+  // Enumerate all possible combinations of |anchor_left| and |anchor_right|.
+  if (anchor_left == proto::ANCHOR_TYPE_NONE &&
+      anchor_right == proto::ANCHOR_TYPE_NONE) {
+    return FindSubpattern(text, subpattern) != base::StringPiece::npos;
+  }
+
+  if (anchor_left == proto::ANCHOR_TYPE_NONE &&
+      anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
+    return EndsWithFuzzy(text, subpattern);
+  }
+
+  if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
+      anchor_right == proto::ANCHOR_TYPE_NONE) {
+    return StartsWithFuzzy(text, subpattern);
+  }
+
+  if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY &&
+      anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
+    return text.size() == subpattern.size() &&
+           StartsWithFuzzy(text, subpattern);
+  }
+
+  if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
+      anchor_right == proto::ANCHOR_TYPE_NONE) {
+    return url_host.is_nonempty() &&
+           FindSubdomainAnchoredSubpattern(text, url_host, subpattern) !=
+               base::StringPiece::npos;
+  }
+
+  if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN &&
+      anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
+    return url_host.is_nonempty() && text.size() >= subpattern.size() &&
+           IsSubdomainAnchored(text, url_host,
+                               text.size() - subpattern.size()) &&
+           EndsWithFuzzy(text, subpattern);
+  }
+
+  NOTREACHED();
+  return false;
+}
+
+// Matches the last |subpattern| against |text|. Special treatment is required
+// for the last subpattern since |kSeparatorPlaceholder| can also match the end
+// of the text.
+bool DoesTextMatchLastSubpattern(proto::AnchorType anchor_left,
+                                 proto::AnchorType anchor_right,
+                                 base::StringPiece text,
+                                 url::Component url_host,
+                                 base::StringPiece subpattern) {
+  DCHECK(!subpattern.empty());
+
+  if (DoesTextMatchLastSubpatternInternal(anchor_left, anchor_right, text,
+                                          url_host, subpattern)) {
+    return true;
+  }
+
+  // If the last |subpattern| ends with kSeparatorPlaceholder, then it can also
+  // match the end of text.
+  if (subpattern.back() == kSeparatorPlaceholder) {
+    subpattern.remove_suffix(1);
+    return DoesTextMatchLastSubpatternInternal(
+        anchor_left, proto::ANCHOR_TYPE_BOUNDARY, text, url_host, subpattern);
+  }
+
+  return false;
+}
+
 // Returns whether the given |url_pattern| matches the given |url_spec|.
 // Compares the pattern the the url in a case-sensitive manner.
 bool IsCaseSensitiveMatch(base::StringPiece url_pattern,
@@ -157,6 +231,7 @@
   auto subpattern_it = subpatterns.begin();
   auto subpattern_end = subpatterns.end();
 
+  // No subpatterns.
   if (subpattern_it == subpattern_end) {
     return anchor_left == proto::ANCHOR_TYPE_NONE ||
            anchor_right == proto::ANCHOR_TYPE_NONE;
@@ -165,22 +240,10 @@
   base::StringPiece subpattern = *subpattern_it;
   ++subpattern_it;
 
-  // If there is only one |subpattern|, and it has a right anchor, then simply
-  // check that it is a suffix of the |url_spec|, and the left anchor is
-  // fulfilled.
-  if (subpattern_it == subpattern_end &&
-      anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
-    if (!EndsWithFuzzy(url_spec, subpattern))
-      return false;
-    if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY)
-      return url_spec.size() == subpattern.size();
-    if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
-      DCHECK_LE(subpattern.size(), url_spec.size());
-      return url_host.is_nonempty() &&
-             IsSubdomainAnchored(url_spec, url_host,
-                                 url_spec.size() - subpattern.size());
-    }
-    return true;
+  // There is only one |subpattern|.
+  if (subpattern_it == subpattern_end) {
+    return DoesTextMatchLastSubpattern(anchor_left, anchor_right, url_spec,
+                                       url_host, subpattern);
   }
 
   // Otherwise, the first |subpattern| does not have to be a suffix. But it
@@ -189,10 +252,6 @@
   if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY) {
     if (!StartsWithFuzzy(url_spec, subpattern))
       return false;
-    if (subpattern_it == subpattern_end) {
-      DCHECK_EQ(anchor_right, proto::ANCHOR_TYPE_NONE);
-      return true;
-    }
     text.remove_prefix(subpattern.size());
   } else if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
     if (!url_host.is_nonempty())
@@ -201,10 +260,6 @@
         FindSubdomainAnchoredSubpattern(url_spec, url_host, subpattern);
     if (match_begin == base::StringPiece::npos)
       return false;
-    if (subpattern_it == subpattern_end) {
-      DCHECK_EQ(anchor_right, proto::ANCHOR_TYPE_NONE);
-      return true;
-    }
     text.remove_prefix(match_begin + subpattern.size());
   } else {
     DCHECK_EQ(anchor_left, proto::ANCHOR_TYPE_NONE);
@@ -212,26 +267,24 @@
     subpattern_it = subpatterns.begin();
   }
 
-  // Consecutively find all the remaining subpatterns in the |text|. If the
-  // pattern has a right anchor, don't search for the last subpattern, but
-  // instead check that it is a suffix of the |text|.
-  while (subpattern_it != subpattern_end) {
-    subpattern = *subpattern_it;
-    DCHECK(!subpattern.empty());
+  DCHECK(subpattern_it != subpattern_end);
+  subpattern = *subpattern_it;
 
-    if (++subpattern_it == subpattern_end &&
-        anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
-      break;
-    }
+  // Consecutively find all the remaining subpatterns in the |text|. Handle the
+  // last subpattern outside the loop.
+  while (++subpattern_it != subpattern_end) {
+    DCHECK(!subpattern.empty());
 
     const size_t match_position = FindSubpattern(text, subpattern);
     if (match_position == base::StringPiece::npos)
       return false;
     text.remove_prefix(match_position + subpattern.size());
+
+    subpattern = *subpattern_it;
   }
 
-  return anchor_right != proto::ANCHOR_TYPE_BOUNDARY ||
-         EndsWithFuzzy(text, subpattern);
+  return DoesTextMatchLastSubpattern(proto::ANCHOR_TYPE_NONE, anchor_right,
+                                     text, url::Component(), subpattern);
 }
 
 }  // namespace