Added anonymization patterns for URLs and email addresses

BUG=567870

Review URL: https://codereview.chromium.org/1543633003

Cr-Commit-Position: refs/heads/master@{#368573}
diff --git a/components/feedback/anonymizer_tool_unittest.cc b/components/feedback/anonymizer_tool_unittest.cc
index 68f35a8..2e89137 100644
--- a/components/feedback/anonymizer_tool_unittest.cc
+++ b/components/feedback/anonymizer_tool_unittest.cc
@@ -6,6 +6,8 @@
 
 #include <gtest/gtest.h>
 
+#include "base/strings/string_util.h"
+
 namespace feedback {
 
 class AnonymizerToolTest : public testing::Test {
@@ -18,11 +20,19 @@
     return anonymizer_.AnonymizeCustomPatterns(input);
   }
 
-  static std::string AnonymizeCustomPattern(
+  std::string AnonymizeCustomPatternWithContext(
       const std::string& input,
       const std::string& pattern,
       std::map<std::string, std::string>* space) {
-    return AnonymizerTool::AnonymizeCustomPattern(input, pattern, space);
+    return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space);
+  }
+
+  std::string AnonymizeCustomPatternWithoutContext(
+      const std::string& input,
+      const CustomPatternWithoutContext& pattern,
+      std::map<std::string, std::string>* space) {
+    return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern,
+                                                            space);
   }
 
   AnonymizerTool anonymizer_;
@@ -84,26 +94,86 @@
   EXPECT_EQ(
       "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
       AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
+
+  EXPECT_EQ("<email: 1>",
+            AnonymizeCustomPatterns("[email protected]"));
+  EXPECT_EQ("Email: <email: 1>.",
+            AnonymizeCustomPatterns("Email: [email protected]."));
+  EXPECT_EQ("Email:\n<email: 2>\n",
+            AnonymizeCustomPatterns("Email:\[email protected]\n"));
+
+  EXPECT_EQ("[<IPv6: 1>]", AnonymizeCustomPatterns(
+                               "[2001:0db8:0000:0000:0000:ff00:0042:8329]"));
+  EXPECT_EQ("[<IPv6: 2>]",
+            AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
+  EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
+  EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[::1]"));
+  EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.168.0.1"));
+
+  EXPECT_EQ("<URL: 1>",
+            AnonymizeCustomPatterns("http://example.com/foo?test=1"));
+  EXPECT_EQ("Foo <URL: 2> Bar",
+            AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar"));
+  const char* kURLs[] = {
+    "http://example.com/foo?test=1",
+    "http://userid:[email protected]:8080",
+    "http://userid:[email protected]:8080/",
+    "http://@example.com",
+    "http://192.168.0.1",
+    "http://192.168.0.1/",
+    "http://اختبار.com",
+    "http://test.com/foo(bar)baz.html",
+    "http://test.com/foo%20bar",
+    "ftp://test:[email protected]",
+    "chrome://extensions/",
+    "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/options.html",
+    "http://example.com/[email protected]",
+  };
+  for (size_t i = 0; i < arraysize(kURLs); ++i) {
+    SCOPED_TRACE(kURLs[i]);
+    std::string got = AnonymizeCustomPatterns(kURLs[i]);
+    EXPECT_TRUE(
+        base::StartsWith(got, "<URL: ", base::CompareCase::INSENSITIVE_ASCII));
+    EXPECT_TRUE(base::EndsWith(got, ">", base::CompareCase::INSENSITIVE_ASCII));
+  }
+  // Test that "Android:" is not considered a schema with empty hier part.
+  EXPECT_EQ("The following applies to Android:",
+            AnonymizeCustomPatterns("The following applies to Android:"));
 }
 
-TEST_F(AnonymizerToolTest, AnonymizeCustomPattern) {
+TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) {
   const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
   std::map<std::string, std::string> space;
-  EXPECT_EQ("", AnonymizeCustomPattern("", kPattern, &space));
+  EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space));
   EXPECT_EQ("foo\nbar\n",
-            AnonymizeCustomPattern("foo\nbar\n", kPattern, &space));
-  EXPECT_EQ("id '1'", AnonymizeCustomPattern("id '2345'", kPattern, &space));
-  EXPECT_EQ("id '2'", AnonymizeCustomPattern("id '1234'", kPattern, &space));
-  EXPECT_EQ("id: '2'", AnonymizeCustomPattern("id: '1234'", kPattern, &space));
-  EXPECT_EQ("ID: '1'", AnonymizeCustomPattern("ID: '2345'", kPattern, &space));
+            AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space));
+  EXPECT_EQ("id '1'",
+            AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space));
+  EXPECT_EQ("id '2'",
+            AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
+  EXPECT_EQ("id: '2'",
+            AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space));
+  EXPECT_EQ("ID: '1'",
+            AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space));
   EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
-            AnonymizeCustomPattern("x1 id '2345' 1x id '1234'\nid '2345'\n",
-                                   kPattern, &space));
+            AnonymizeCustomPatternWithContext(
+                "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space));
   space.clear();
-  EXPECT_EQ("id '1'", AnonymizeCustomPattern("id '1234'", kPattern, &space));
+  EXPECT_EQ("id '1'",
+            AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
 
   space.clear();
-  EXPECT_EQ("x1z", AnonymizeCustomPattern("xyz", "()(y+)()", &space));
+  EXPECT_EQ("x1z",
+            AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space));
+}
+
+TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
+  CustomPatternWithoutContext kPattern = {"pattern", "(o+)"};
+  std::map<std::string, std::string> space;
+  EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space));
+  EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n",
+            AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern,
+                                                 &space));
 }
 
 }  // namespace feedback