blob: 4f4904813cbe51f39cab4bd9447dc40112bab819 [file] [log] [blame]
battre4cdaa7c2016-01-07 11:30:271// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/feedback/anonymizer_tool.h"
6
7#include <gtest/gtest.h>
8
battre03910b42016-01-11 13:42:349#include "base/strings/string_util.h"
10
battre4cdaa7c2016-01-07 11:30:2711namespace feedback {
12
13class AnonymizerToolTest : public testing::Test {
14 protected:
15 std::string AnonymizeMACAddresses(const std::string& input) {
16 return anonymizer_.AnonymizeMACAddresses(input);
17 }
18
19 std::string AnonymizeCustomPatterns(const std::string& input) {
20 return anonymizer_.AnonymizeCustomPatterns(input);
21 }
22
battre03910b42016-01-11 13:42:3423 std::string AnonymizeCustomPatternWithContext(
battre4cdaa7c2016-01-07 11:30:2724 const std::string& input,
25 const std::string& pattern,
26 std::map<std::string, std::string>* space) {
battre03910b42016-01-11 13:42:3427 return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space);
28 }
29
30 std::string AnonymizeCustomPatternWithoutContext(
31 const std::string& input,
32 const CustomPatternWithoutContext& pattern,
33 std::map<std::string, std::string>* space) {
34 return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern,
35 space);
battre4cdaa7c2016-01-07 11:30:2736 }
37
38 AnonymizerTool anonymizer_;
39};
40
41TEST_F(AnonymizerToolTest, Anonymize) {
42 EXPECT_EQ("", anonymizer_.Anonymize(""));
43 EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
44
45 // Make sure MAC address anonymization is invoked.
46 EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
47
48 // Make sure custom pattern anonymization is invoked.
49 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
afakhry5a59f952017-05-24 21:04:2650
51 // Make sure UUIDs are anonymized.
52 EXPECT_EQ(
53 "REQUEST localhost - - \"POST /printers/<UUID: 1> HTTP/1.1\" 200 291 "
54 "Create-Job successful-ok",
55 anonymizer_.Anonymize(
56 "REQUEST localhost - - \"POST /printers/"
57 "cb738a9f-6433-4d95-a81e-94e4ae0ed30b HTTP/1.1\" 200 291 Create-Job "
58 "successful-ok"));
59 EXPECT_EQ(
60 "REQUEST localhost - - \"POST /printers/<UUID: 2> HTTP/1.1\" 200 286 "
61 "Create-Job successful-ok",
62 anonymizer_.Anonymize(
63 "REQUEST localhost - - \"POST /printers/"
64 "d17188da-9cd3-44f4-b148-3e1d748a3b0f HTTP/1.1\" 200 286 Create-Job "
65 "successful-ok"));
battre4cdaa7c2016-01-07 11:30:2766}
67
68TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
69 EXPECT_EQ("", AnonymizeMACAddresses(""));
70 EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
71 EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
72 EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
73 EXPECT_EQ(
74 "BSSID: aa:bb:cc:00:00:01 in the middle\n"
75 "bb:cc:dd:00:00:02 start of line\n"
76 "end of line aa:bb:cc:00:00:01\n"
77 "no match across lines aa:bb:cc:\n"
78 "dd:ee:ff two on the same line:\n"
79 "x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
80 AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
81 "bb:cc:dd:ee:ff:00 start of line\n"
82 "end of line aa:bb:cc:dd:ee:ff\n"
83 "no match across lines aa:bb:cc:\n"
84 "dd:ee:ff two on the same line:\n"
85 "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
86 EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
87 AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
88}
89
90TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
91 EXPECT_EQ("", AnonymizeCustomPatterns(""));
92
93 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
94 EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'"));
95 EXPECT_EQ("foo Cell ID: '1' bar",
96 AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar"));
97
98 EXPECT_EQ("foo Location area code: '1' bar",
99 AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar"));
100
101 EXPECT_EQ("foo\na SSID='1' b\n'",
102 AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'"));
103 EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'"));
104 EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'"));
105
106 EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo",
107 AnonymizeCustomPatterns(
108 "Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo"));
109
110 EXPECT_EQ(
111 "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
112 AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
battre03910b42016-01-11 13:42:34113
afakhry85eea802017-05-01 17:04:10114 EXPECT_EQ("SerialNumber: 1",
115 AnonymizeCustomPatterns("SerialNumber: 1217D7EF"));
116 EXPECT_EQ("serial number: 2",
117 AnonymizeCustomPatterns("serial number: 50C971FEE7F3x010900"));
118 EXPECT_EQ("SerialNumber: 3",
119 AnonymizeCustomPatterns("SerialNumber: EVT23-17BA01-004"));
Thiemo Nagel21a5d552017-12-12 18:21:19120 EXPECT_EQ("serial=4", AnonymizeCustomPatterns("serial=\"1234AA5678\""));
afakhry85eea802017-05-01 17:04:10121
Jeffrey Kardatzke4a9a1bd32019-06-17 21:30:22122 EXPECT_EQ("\"gaia_id\":\"1\"",
123 AnonymizeCustomPatterns("\"gaia_id\":\"1234567890\""));
124 EXPECT_EQ("gaia_id='2'", AnonymizeCustomPatterns("gaia_id='987654321'"));
125 EXPECT_EQ("{id: 1, email:",
126 AnonymizeCustomPatterns("{id: 123454321, email:"));
127
battre03910b42016-01-11 13:42:34128 EXPECT_EQ("<email: 1>",
129 AnonymizeCustomPatterns("[email protected]"));
130 EXPECT_EQ("Email: <email: 1>.",
131 AnonymizeCustomPatterns("Email: [email protected]."));
132 EXPECT_EQ("Email:\n<email: 2>\n",
133 AnonymizeCustomPatterns("Email:\[email protected]\n"));
134
135 EXPECT_EQ("[<IPv6: 1>]", AnonymizeCustomPatterns(
136 "[2001:0db8:0000:0000:0000:ff00:0042:8329]"));
137 EXPECT_EQ("[<IPv6: 2>]",
138 AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
139 EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
Garrick Evans5a5c24e2019-01-28 12:55:44140 EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[aa::bb]"));
Garrick Evans5fbf45d2019-01-18 12:37:26141 EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
battre03910b42016-01-11 13:42:34142
143 EXPECT_EQ("<URL: 1>",
144 AnonymizeCustomPatterns("http://example.com/foo?test=1"));
145 EXPECT_EQ("Foo <URL: 2> Bar",
146 AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar"));
147 const char* kURLs[] = {
148 "http://example.com/foo?test=1",
149 "http://userid:[email protected]:8080",
150 "http://userid:[email protected]:8080/",
151 "http://@example.com",
152 "http://192.168.0.1",
153 "http://192.168.0.1/",
154 "http://اختبار.com",
155 "http://test.com/foo(bar)baz.html",
156 "http://test.com/foo%20bar",
157 "ftp://test:[email protected]",
158 "chrome://extensions/",
159 "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/options.html",
160 "http://example.com/[email protected]",
pbond4104d692016-02-10 09:41:07161 "rtsp://[email protected]/",
162 "https://aaaaaaaaaaaaaaaa.com",
battre03910b42016-01-11 13:42:34163 };
Avi Drissman8171db7d2018-12-25 23:08:31164 for (size_t i = 0; i < base::size(kURLs); ++i) {
battre03910b42016-01-11 13:42:34165 SCOPED_TRACE(kURLs[i]);
166 std::string got = AnonymizeCustomPatterns(kURLs[i]);
167 EXPECT_TRUE(
168 base::StartsWith(got, "<URL: ", base::CompareCase::INSENSITIVE_ASCII));
169 EXPECT_TRUE(base::EndsWith(got, ">", base::CompareCase::INSENSITIVE_ASCII));
170 }
171 // Test that "Android:" is not considered a schema with empty hier part.
172 EXPECT_EQ("The following applies to Android:",
173 AnonymizeCustomPatterns("The following applies to Android:"));
battre4cdaa7c2016-01-07 11:30:27174}
175
battre03910b42016-01-11 13:42:34176TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) {
battre4cdaa7c2016-01-07 11:30:27177 const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
178 std::map<std::string, std::string> space;
battre03910b42016-01-11 13:42:34179 EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27180 EXPECT_EQ("foo\nbar\n",
battre03910b42016-01-11 13:42:34181 AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space));
182 EXPECT_EQ("id '1'",
183 AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space));
184 EXPECT_EQ("id '2'",
185 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
186 EXPECT_EQ("id: '2'",
187 AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space));
188 EXPECT_EQ("ID: '1'",
189 AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27190 EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
battre03910b42016-01-11 13:42:34191 AnonymizeCustomPatternWithContext(
192 "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27193 space.clear();
battre03910b42016-01-11 13:42:34194 EXPECT_EQ("id '1'",
195 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27196
197 space.clear();
battre03910b42016-01-11 13:42:34198 EXPECT_EQ("x1z",
199 AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space));
200}
201
202TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
203 CustomPatternWithoutContext kPattern = {"pattern", "(o+)"};
204 std::map<std::string, std::string> space;
205 EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space));
206 EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n",
207 AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern,
208 &space));
battre4cdaa7c2016-01-07 11:30:27209}
210
Dominic Battref091addfc2017-12-07 15:45:34211TEST_F(AnonymizerToolTest, AnonymizeChunk) {
Jeffrey Kardatzke8719d852019-07-02 19:31:31212 // For better readability, put all the pre/post redaction strings in an array
213 // of pairs, and then convert that to two strings which become the input and
214 // output of the anonymizer.
215 std::pair<std::string, std::string> data[] = {
216 {"aaaaaaaa [SSID=123aaaaaa]aaaaa", // SSID.
217 "aaaaaaaa [SSID=1]aaaaa"},
218 {"aaaaaaaahttp://tets.comaaaaaaa", // URL.
219 "aaaaaaaa<URL: 1>"},
220 {"[email protected]", // Email address.
221 "<email: 1>"},
222 {"example@@1234", // No PII, it is not invalid email address.
223 "example@@1234"},
224 {"255.255.155.2", // IP address.
225 "<IPv4: 1>"},
226 {"255.255.155.255", // IP address.
227 "<IPv4: 2>"},
228 {"127.0.0.1", // IPv4 loopback.
229 "<127.0.0.0/8: 3>"},
230 {"127.255.0.1", // IPv4 loopback.
231 "<127.0.0.0/8: 4>"},
232 {"0.0.0.0", // Any IPv4.
233 "<0.0.0.0/8: 5>"},
234 {"0.255.255.255", // Any IPv4.
235 "<0.0.0.0/8: 6>"},
236 {"10.10.10.100", // IPv4 private class A.
237 "<10.0.0.0/8: 7>"},
238 {"10.10.10.100", // Intentional duplicate.
239 "<10.0.0.0/8: 7>"},
240 {"10.10.10.101", // IPv4 private class A.
241 "<10.0.0.0/8: 8>"},
242 {"10.255.255.255", // IPv4 private class A.
243 "<10.0.0.0/8: 9>"},
244 {"172.16.0.0", // IPv4 private class B.
245 "<172.16.0.0/12: 10>"},
246 {"172.31.255.255", // IPv4 private class B.
247 "<172.16.0.0/12: 11>"},
248 {"172.11.5.5", // IP address.
249 "<IPv4: 12>"},
250 {"172.111.5.5", // IP address.
251 "<IPv4: 13>"},
252 {"192.168.0.0", // IPv4 private class C.
253 "<192.168.0.0/16: 14>"},
254 {"192.168.255.255", // IPv4 private class C.
255 "<192.168.0.0/16: 15>"},
256 {"192.169.2.120", // IP address.
257 "<IPv4: 16>"},
258 {"169.254.0.1", // Link local.
259 "<169.254.0.0/16: 17>"},
260 {"169.200.0.1", // IP address.
261 "<IPv4: 18>"},
262 {"fe80::", // Link local.
263 "<fe80::/10: 1>"},
264 {"fe80::ffff", // Link local.
265 "<fe80::/10: 2>"},
266 {"febf:ffff::ffff", // Link local.
267 "<fe80::/10: 3>"},
268 {"fecc::1111", // IP address.
269 "<IPv6: 4>"},
270 {"224.0.0.24", // Multicast.
271 "<224.0.0.0/4: 19>"},
272 {"240.0.0.0", // IP address.
273 "<IPv4: 20>"},
274 {"255.255.255.255", // Broadcast.
275 "255.255.255.255"},
276 {"100.115.92.92", // ChromeOS.
277 "100.115.92.92"},
278 {"100.115.91.92", // IP address.
279 "<IPv4: 23>"},
280 {"1.1.1.1", // DNS
281 "1.1.1.1"},
282 {"8.8.8.8", // DNS
283 "8.8.8.8"},
284 {"8.8.4.4", // DNS
285 "8.8.4.4"},
286 {"8.8.8.4", // IP address.
287 "<IPv4: 27>"},
288 {"255.255.259.255", // Not an IP address.
289 "255.255.259.255"},
290 {"255.300.255.255", // Not an IP address.
291 "255.300.255.255"},
292 {"aaaa123.123.45.4aaa", // IP address.
293 "aaaa<IPv4: 28>aaa"},
294 {"11:11;11::11", // IP address.
295 "11:11;<IPv6: 5>"},
296 {"11::11", // IP address.
297 "<IPv6: 5>"},
298 {"11:11:abcdef:0:0:0:0:0", // No PII.
299 "11:11:abcdef:0:0:0:0:0"},
300 {"::", // Unspecified.
301 "::"},
302 {"::1", // Local host.
303 "::1"},
304 {"Instance::Set", // Ignore match, no PII.
305 "Instance::Set"},
306 {"Instant::ff", // Ignore match, no PII.
307 "Instant::ff"},
308 {"net::ERR_CONN_TIMEOUT", // Ignore match, no PII.
309 "net::ERR_CONN_TIMEOUT"},
310 {"ff01::1", // All nodes address (interface local).
311 "ff01::1"},
312 {"ff01::2", // All routers (interface local).
313 "ff01::2"},
314 {"ff01::3", // Multicast (interface local).
315 "<ff01::/16: 13>"},
316 {"ff02::1", // All nodes address (link local).
317 "ff02::1"},
318 {"ff02::2", // All routers (link local).
319 "ff02::2"},
320 {"ff02::3", // Multicast (link local).
321 "<ff02::/16: 16>"},
322 {"ff02::fb", // mDNSv6 (link local).
323 "<ff02::/16: 17>"},
324 {"ff08::fb", // mDNSv6.
325 "<IPv6: 18>"},
326 {"ff0f::101", // All NTP servers.
327 "<IPv6: 19>"},
328 {"::ffff:cb0c:10ea", // IPv4-mapped IPV6 (IP address).
329 "<IPv6: 20>"},
330 {"::ffff:a0a:a0a", // IPv4-mapped IPV6 (private class A).
331 "<M 10.0.0.0/8: 21>"},
332 {"::ffff:a0a:a0a", // Intentional duplicate.
333 "<M 10.0.0.0/8: 21>"},
334 {"::ffff:ac1e:1e1e", // IPv4-mapped IPV6 (private class B).
335 "<M 172.16.0.0/12: 22>"},
336 {"::ffff:c0a8:640a", // IPv4-mapped IPV6 (private class C).
337 "<M 192.168.0.0/16: 23>"},
338 {"::ffff:6473:5c01", // IPv4-mapped IPV6 (Chrome).
339 "<M 100.115.92.1: 24>"},
340 {"64:ff9b::a0a:a0a", // IPv4-translated 6to4 IPV6 (private class A).
341 "<T 10.0.0.0/8: 25>"},
342 {"64:ff9b::6473:5c01", // IPv4-translated 6to4 IPV6 (Chrome).
343 "<T 100.115.92.1: 26>"},
344 {"::0101:ffff:c0a8:640a", // IP address.
345 "<IPv6: 27>"},
346 {"aa:aa:aa:aa:aa:aa", // MAC address (BSSID).
347 "aa:aa:aa:00:00:01"},
348 {"chrome://resources/foo", // Secure chrome resource, whitelisted.
349 "chrome://resources/foo"},
350 {"chrome://resources/f?user=bar", // Potentially PII in parameter.
351 "<URL: 2>"}};
352 std::string anon_input;
353 std::string anon_output;
354 for (const auto& s : data) {
355 anon_input.append(s.first).append("\n");
356 anon_output.append(s.second).append("\n");
357 }
358 EXPECT_EQ(anon_output, anonymizer_.Anonymize(anon_input));
Dominic Battref091addfc2017-12-07 15:45:34359}
360
battre4cdaa7c2016-01-07 11:30:27361} // namespace feedback