anonymizer_tool: IPv6 sanitization improvements.
This patch whitelists certain special purpose IPv6 subnets and prevents
spurious matches from being treated as scrubbable IPv6 addresses,
as per go/cros_ip_logsanitizer.
BUG=b:111048642
TEST=components_unittests
BUG: 908117
Change-Id: If8ee24aba1ca5ac9a077dcb1cfa88ac9a69807f5
Reviewed-on: https://chromium-review.googlesource.com/c/1424626
Commit-Queue: Garrick Evans <[email protected]>
Reviewed-by: Dominic Battré <[email protected]>
Reviewed-by: Thiemo Nagel <[email protected]>
Auto-Submit: Garrick Evans <[email protected]>
Cr-Commit-Position: refs/heads/master@{#626507}
diff --git a/components/feedback/anonymizer_tool.cc b/components/feedback/anonymizer_tool.cc
index 39ea9cdd..7fbf845 100644
--- a/components/feedback/anonymizer_tool.cc
+++ b/components/feedback/anonymizer_tool.cc
@@ -7,7 +7,7 @@
#include <memory>
#include <utility>
-#include "base/stl_util.h"
+#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
@@ -59,13 +59,46 @@
"(?i-s)(serial\\s*(?:number)?\\s*[:=]\\s*)([0-9a-zA-Z\\-\"]+)()",
};
-// Returns the number of leading bytes that may be kept unsanitized.
-std::string MaybeScrubIPv4Address(const std::string& addr) {
+bool MaybeUnmapAddress(net::IPAddress* addr) {
+ if (!addr->IsIPv4MappedIPv6())
+ return false;
+
+ *addr = net::ConvertIPv4MappedIPv6ToIPv4(*addr);
+ return true;
+}
+
+bool MaybeUntranslateAddress(net::IPAddress* addr) {
+ if (!addr->IsIPv6())
+ return false;
+
+ static const net::IPAddress kTranslated6To4(0, 0x64, 0xff, 0x9b, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0);
+ if (!IPAddressMatchesPrefix(*addr, kTranslated6To4, 96))
+ return false;
+
+ const auto bytes = addr->bytes();
+ *addr = net::IPAddress(bytes[12], bytes[13], bytes[14], bytes[15]);
+ return true;
+}
+
+// If |addr| points to a valid IPv6 address, this function truncates it at /32.
+bool MaybeTruncateIPv6(net::IPAddress* addr) {
+ if (!addr->IsIPv6())
+ return false;
+
+ const auto bytes = addr->bytes();
+ *addr = net::IPAddress(bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0);
+ return true;
+}
+
+// Returns an appropriately scrubbed version of |addr| if applicable.
+std::string MaybeScrubIPAddress(const std::string& addr) {
struct {
net::IPAddress ip_addr;
int prefix_length;
bool scrub;
- } static const kWhitelistedIPv4Ranges[] = {
+ } static const kWhitelistedIPRanges[] = {
// Private.
{net::IPAddress(10, 0, 0, 0), 8, true},
{net::IPAddress(172, 16, 0, 0), 12, true},
@@ -84,20 +117,57 @@
{net::IPAddress(224, 0, 0, 0), 4, true},
// Link local.
{net::IPAddress(169, 254, 0, 0), 16, true},
+ {net::IPAddress(0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 10,
+ true},
// Broadcast.
{net::IPAddress(255, 255, 255, 255), 32, false},
+ // IPv6 loopback, unspecified and non-address strings.
+ {net::IPAddress::IPv6AllZeros(), 112, false},
+ // IPv6 multicast all nodes and routers.
+ {net::IPAddress(0xff, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
+ 128, false},
+ {net::IPAddress(0xff, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2),
+ 128, false},
+ {net::IPAddress(0xff, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
+ 128, false},
+ {net::IPAddress(0xff, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2),
+ 128, false},
+ // IPv6 other multicast (link and interface local).
+ {net::IPAddress(0xff, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 16,
+ true},
+ {net::IPAddress(0xff, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 16,
+ true},
+
};
net::IPAddress input_addr;
- if (input_addr.AssignFromIPLiteral(addr) && input_addr.IsIPv4()) {
- for (const auto& range : kWhitelistedIPv4Ranges) {
+ if (input_addr.AssignFromIPLiteral(addr) && input_addr.IsValid()) {
+ bool mapped = MaybeUnmapAddress(&input_addr);
+ bool translated = !mapped ? MaybeUntranslateAddress(&input_addr) : false;
+ for (const auto& range : kWhitelistedIPRanges) {
if (IPAddressMatchesPrefix(input_addr, range.ip_addr,
range.prefix_length)) {
- return range.scrub ? base::StringPrintf(
- "%s/%d", range.ip_addr.ToString().c_str(),
- range.prefix_length)
- : addr;
+ std::string prefix;
+ std::string out_addr = addr;
+ if (mapped) {
+ prefix = "M ";
+ out_addr = input_addr.ToString();
+ } else if (translated) {
+ prefix = "T ";
+ out_addr = input_addr.ToString();
+ }
+ if (range.scrub) {
+ out_addr = base::StringPrintf(
+ "%s/%d", range.ip_addr.ToString().c_str(), range.prefix_length);
+ }
+ return base::StrCat({prefix, out_addr});
}
}
+ // |addr| may have been over-aggressively matched as an IPv6 address when
+ // it's really just an arbitrary part of a sentence. If the string is the
+ // same as the coarsely truncated address then keep it because even if
+ // it happens to be a real address, there is no loss of anonymity.
+ if (MaybeTruncateIPv6(&input_addr) && input_addr.ToString() == addr)
+ return addr;
}
return "";
}
@@ -421,7 +491,7 @@
std::string matched_id_as_string = matched_id.as_string();
std::string replacement_id = (*identifier_space)[matched_id_as_string];
if (replacement_id.empty()) {
- replacement_id = MaybeScrubIPv4Address(matched_id_as_string);
+ replacement_id = MaybeScrubIPAddress(matched_id_as_string);
if (replacement_id != matched_id_as_string) {
// The weird Uint64toString trick is because Windows does not like
// to deal with %zu and a size_t in printf, nor does it support %llu.
diff --git a/components/feedback/anonymizer_tool_unittest.cc b/components/feedback/anonymizer_tool_unittest.cc
index 6464e02..190dfad 100644
--- a/components/feedback/anonymizer_tool_unittest.cc
+++ b/components/feedback/anonymizer_tool_unittest.cc
@@ -6,7 +6,6 @@
#include <gtest/gtest.h>
-#include "base/stl_util.h"
#include "base/strings/string_util.h"
namespace feedback {
@@ -132,7 +131,7 @@
EXPECT_EQ("[<IPv6: 2>]",
AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
- EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[::1]"));
+ EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[aa::bb]"));
EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
EXPECT_EQ("<URL: 1>",
@@ -228,6 +227,10 @@
"192.169.2.120\n" // IP address.
"169.254.0.1\n" // Link local.
"169.200.0.1\n" // IP address.
+ "fe80::\n" // Link local.
+ "fe80::ffff\n" // Link local.
+ "febf:ffff::ffff\n" // Link local.
+ "fecc::1111\n" // IP address.
"224.0.0.24\n" // Multicast.
"240.0.0.0\n" // IP address.
"255.255.255.255\n" // Broadcast.
@@ -243,7 +246,30 @@
"11:11;11::11\n" // IP address.
"11::11\n" // IP address.
"11:11:abcdef:0:0:0:0:0\n" // No PII.
- "aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
+ "::\n" // Unspecified.
+ "::1\n" // Local host.
+ "Instance::Set\n" // Ignore match, no PII.
+ "Instant::ff\n" // Ignore match, no PII.
+ "net::ERR_CONN_TIMEOUT\n" // Ignore match, no PII.
+ "ff01::1\n" // All nodes address (interface local).
+ "ff01::2\n" // All routers (interface local).
+ "ff01::3\n" // Multicast (interface local).
+ "ff02::1\n" // All nodes address (link local).
+ "ff02::2\n" // All routers (link local).
+ "ff02::3\n" // Multicast (link local).
+ "ff02::fb\n" // mDNSv6 (link local).
+ "ff08::fb\n" // mDNSv6.
+ "ff0f::101\n" // All NTP servers.
+ "::ffff:cb0c:10ea\n" // IPv4-mapped IPV6 (IP address).
+ "::ffff:a0a:a0a\n" // IPv4-mapped IPV6 (private class A).
+ "::ffff:a0a:a0a\n" // Intentional duplicate.
+ "::ffff:ac1e:1e1e\n" // IPv4-mapped IPV6 (private class B).
+ "::ffff:c0a8:640a\n" // IPv4-mapped IPV6 (private class C).
+ "::ffff:6473:5c01\n" // IPv4-mapped IPV6 (Chrome).
+ "64:ff9b::a0a:a0a\n" // IPv4-translated 6to4 IPV6 (private class A).
+ "64:ff9b::6473:5c01\n" // IPv4-translated 6to4 IPV6 (Chrome).
+ "::0101:ffff:c0a8:640a\n" // IP address.
+ "aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
std::string result =
"aaaaaaaa [SSID=1]aaaaa\n"
"aaaaaaaa<URL: 1>\n"
@@ -268,6 +294,10 @@
"<IPv4: 16>\n"
"<169.254.0.0/16: 17>\n"
"<IPv4: 18>\n"
+ "<fe80::/10: 1>\n"
+ "<fe80::/10: 2>\n"
+ "<fe80::/10: 3>\n"
+ "<IPv6: 4>\n"
"<224.0.0.0/4: 19>\n"
"<IPv4: 20>\n"
"255.255.255.255\n"
@@ -280,9 +310,32 @@
"255.255.259.255\n"
"255.300.255.255\n"
"aaaa<IPv4: 28>aaa\n"
- "11:11;<IPv6: 1>\n"
- "<IPv6: 1>\n"
+ "11:11;<IPv6: 5>\n"
+ "<IPv6: 5>\n"
"11:11:abcdef:0:0:0:0:0\n"
+ "::\n"
+ "::1\n"
+ "Instance::Set\n"
+ "Instant::ff\n"
+ "net::ERR_CONN_TIMEOUT\n"
+ "ff01::1\n"
+ "ff01::2\n"
+ "<ff01::/16: 13>\n"
+ "ff02::1\n"
+ "ff02::2\n"
+ "<ff02::/16: 16>\n"
+ "<ff02::/16: 17>\n"
+ "<IPv6: 18>\n"
+ "<IPv6: 19>\n"
+ "<IPv6: 20>\n"
+ "<M 10.0.0.0/8: 21>\n"
+ "<M 10.0.0.0/8: 21>\n"
+ "<M 172.16.0.0/12: 22>\n"
+ "<M 192.168.0.0/16: 23>\n"
+ "<M 100.115.92.1: 24>\n"
+ "<T 10.0.0.0/8: 25>\n"
+ "<T 100.115.92.1: 26>\n"
+ "<IPv6: 27>\n"
"aa:aa:aa:00:00:01";
EXPECT_EQ(result, anonymizer_.Anonymize(data));
}