anonymizer_tool: Whitelist certain special purpose IPv4 subnets.
Many IP addresses reveal no personal or device identifying information.
Logging and debugging can be improved by whitelisting such addresses without loss of privacy.
BUG=b:111048642
TEST=components_unittests
BUG: 908117
Change-Id: Icdc669a8030bbda64446d81d595702bf4df40b79
Reviewed-on: https://chromium-review.googlesource.com/c/1297857
Commit-Queue: Dominic Battré <[email protected]>
Reviewed-by: Thiemo Nagel <[email protected]>
Reviewed-by: Nick Harper <[email protected]>
Reviewed-by: Dominic Battré <[email protected]>
Cr-Commit-Position: refs/heads/master@{#624088}
diff --git a/components/feedback/anonymizer_tool.cc b/components/feedback/anonymizer_tool.cc
index a891a95..39ea9cdd 100644
--- a/components/feedback/anonymizer_tool.cc
+++ b/components/feedback/anonymizer_tool.cc
@@ -12,6 +12,7 @@
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "content/public/browser/browser_thread.h"
+#include "net/base/ip_address.h"
#include "third_party/re2/src/re2/re2.h"
using re2::RE2;
@@ -39,7 +40,7 @@
//
// +? is a non-greedy (lazy) +.
// \b matches a word boundary.
-// (?i) turns on case insensitivy for the remainder of the regex.
+// (?i) turns on case insensitivity for the remainder of the regex.
// (?-s) turns off "dot matches newline" for the remainder of the regex.
// (?:regex) denotes non-capturing parentheses group.
constexpr const char* kCustomPatternsWithContext[] = {
@@ -58,6 +59,49 @@
"(?i-s)(serial\\s*(?:number)?\\s*[:=]\\s*)([0-9a-zA-Z\\-\"]+)()",
};
+// Returns the number of leading bytes that may be kept unsanitized.
+std::string MaybeScrubIPv4Address(const std::string& addr) {
+ struct {
+ net::IPAddress ip_addr;
+ int prefix_length;
+ bool scrub;
+ } static const kWhitelistedIPv4Ranges[] = {
+ // Private.
+ {net::IPAddress(10, 0, 0, 0), 8, true},
+ {net::IPAddress(172, 16, 0, 0), 12, true},
+ {net::IPAddress(192, 168, 0, 0), 16, true},
+ // Chrome OS containers and VMs.
+ {net::IPAddress(100, 115, 92, 0), 24, false},
+ // Loopback.
+ {net::IPAddress(127, 0, 0, 0), 8, true},
+ // Any.
+ {net::IPAddress(0, 0, 0, 0), 8, true},
+ // DNS.
+ {net::IPAddress(8, 8, 8, 8), 32, false},
+ {net::IPAddress(8, 8, 4, 4), 32, false},
+ {net::IPAddress(1, 1, 1, 1), 32, false},
+ // Multicast.
+ {net::IPAddress(224, 0, 0, 0), 4, true},
+ // Link local.
+ {net::IPAddress(169, 254, 0, 0), 16, true},
+ // Broadcast.
+ {net::IPAddress(255, 255, 255, 255), 32, false},
+ };
+ net::IPAddress input_addr;
+ if (input_addr.AssignFromIPLiteral(addr) && input_addr.IsIPv4()) {
+ for (const auto& range : kWhitelistedIPv4Ranges) {
+ if (IPAddressMatchesPrefix(input_addr, range.ip_addr,
+ range.prefix_length)) {
+ return range.scrub ? base::StringPrintf(
+ "%s/%d", range.ip_addr.ToString().c_str(),
+ range.prefix_length)
+ : addr;
+ }
+ }
+ }
+ return "";
+}
+
// Helper macro: Non capturing group
#define NCG(x) "(?:" x ")"
// Helper macro: Optional non capturing group
@@ -377,12 +421,16 @@
std::string matched_id_as_string = matched_id.as_string();
std::string replacement_id = (*identifier_space)[matched_id_as_string];
if (replacement_id.empty()) {
- // The weird Uint64toString trick is because Windows does not like to deal
- // with %zu and a size_t in printf, nor does it support %llu.
- replacement_id = base::StringPrintf(
- "<%s: %s>", pattern.alias,
- base::NumberToString(identifier_space->size()).c_str());
- (*identifier_space)[matched_id_as_string] = replacement_id;
+ replacement_id = MaybeScrubIPv4Address(matched_id_as_string);
+ if (replacement_id != matched_id_as_string) {
+ // The weird Uint64toString trick is because Windows does not like
+ // to deal with %zu and a size_t in printf, nor does it support %llu.
+ replacement_id = base::StringPrintf(
+ "<%s: %s>",
+ replacement_id.empty() ? pattern.alias : replacement_id.c_str(),
+ base::NumberToString(identifier_space->size()).c_str());
+ (*identifier_space)[matched_id_as_string] = replacement_id;
+ }
}
skipped.AppendToString(&result);
diff --git a/components/feedback/anonymizer_tool_unittest.cc b/components/feedback/anonymizer_tool_unittest.cc
index a77b967..6464e02 100644
--- a/components/feedback/anonymizer_tool_unittest.cc
+++ b/components/feedback/anonymizer_tool_unittest.cc
@@ -133,7 +133,7 @@
AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[::1]"));
- EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.168.0.1"));
+ EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
EXPECT_EQ("<URL: 1>",
AnonymizeCustomPatterns("http://example.com/foo?test=1"));
@@ -211,6 +211,32 @@
"example@@1234\n" // No PII, it is not valid email address.
"255.255.155.2\n" // IP address.
"255.255.155.255\n" // IP address.
+ "127.0.0.1\n" // IPv4 loopback.
+ "127.255.0.1\n" // IPv4 loopback.
+ "0.0.0.0\n" // Any IPv4.
+ "0.255.255.255\n" // Any IPv4.
+ "10.10.10.100\n" // IPv4 private class A.
+ "10.10.10.100\n" // Intentional duplicate.
+ "10.10.10.101\n" // IPv4 private class A.
+ "10.255.255.255\n" // IPv4 private class A.
+ "172.16.0.0\n" // IPv4 private class B.
+ "172.31.255.255\n" // IPv4 private class B.
+ "172.11.5.5\n" // IP address.
+ "172.111.5.5\n" // IP address.
+ "192.168.0.0\n" // IPv4 private class C.
+ "192.168.255.255\n" // IPv4 private class C.
+ "192.169.2.120\n" // IP address.
+ "169.254.0.1\n" // Link local.
+ "169.200.0.1\n" // IP address.
+ "224.0.0.24\n" // Multicast.
+ "240.0.0.0\n" // IP address.
+ "255.255.255.255\n" // Broadcast.
+ "100.115.92.92\n" // ChromeOS.
+ "100.115.91.92\n" // IP address.
+ "1.1.1.1\n" // DNS
+ "8.8.8.8\n" // DNS
+ "8.8.4.4\n" // DNS
+ "8.8.8.4\n" // IP address.
"255.255.259.255\n" // Not an IP address.
"255.300.255.255\n" // Not an IP address.
"aaaa123.123.45.4aaa\n" // IP address.
@@ -225,9 +251,35 @@
"example@@1234\n"
"<IPv4: 1>\n"
"<IPv4: 2>\n"
+ "<127.0.0.0/8: 3>\n"
+ "<127.0.0.0/8: 4>\n"
+ "<0.0.0.0/8: 5>\n"
+ "<0.0.0.0/8: 6>\n"
+ "<10.0.0.0/8: 7>\n"
+ "<10.0.0.0/8: 7>\n"
+ "<10.0.0.0/8: 8>\n"
+ "<10.0.0.0/8: 9>\n"
+ "<172.16.0.0/12: 10>\n"
+ "<172.16.0.0/12: 11>\n"
+ "<IPv4: 12>\n"
+ "<IPv4: 13>\n"
+ "<192.168.0.0/16: 14>\n"
+ "<192.168.0.0/16: 15>\n"
+ "<IPv4: 16>\n"
+ "<169.254.0.0/16: 17>\n"
+ "<IPv4: 18>\n"
+ "<224.0.0.0/4: 19>\n"
+ "<IPv4: 20>\n"
+ "255.255.255.255\n"
+ "100.115.92.92\n"
+ "<IPv4: 23>\n"
+ "1.1.1.1\n"
+ "8.8.8.8\n"
+ "8.8.4.4\n"
+ "<IPv4: 27>\n"
"255.255.259.255\n"
"255.300.255.255\n"
- "aaaa<IPv4: 3>aaa\n"
+ "aaaa<IPv4: 28>aaa\n"
"11:11;<IPv6: 1>\n"
"<IPv6: 1>\n"
"11:11:abcdef:0:0:0:0:0\n"