blob: 190dfad2e6adc5a483c275a8d964aa7ec0ac28c1 [file] [log] [blame]
battre4cdaa7c2016-01-07 11:30:271// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/feedback/anonymizer_tool.h"
6
7#include <gtest/gtest.h>
8
battre03910b42016-01-11 13:42:349#include "base/strings/string_util.h"
10
battre4cdaa7c2016-01-07 11:30:2711namespace feedback {
12
13class AnonymizerToolTest : public testing::Test {
14 protected:
15 std::string AnonymizeMACAddresses(const std::string& input) {
16 return anonymizer_.AnonymizeMACAddresses(input);
17 }
18
19 std::string AnonymizeCustomPatterns(const std::string& input) {
20 return anonymizer_.AnonymizeCustomPatterns(input);
21 }
22
battre03910b42016-01-11 13:42:3423 std::string AnonymizeCustomPatternWithContext(
battre4cdaa7c2016-01-07 11:30:2724 const std::string& input,
25 const std::string& pattern,
26 std::map<std::string, std::string>* space) {
battre03910b42016-01-11 13:42:3427 return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space);
28 }
29
30 std::string AnonymizeCustomPatternWithoutContext(
31 const std::string& input,
32 const CustomPatternWithoutContext& pattern,
33 std::map<std::string, std::string>* space) {
34 return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern,
35 space);
battre4cdaa7c2016-01-07 11:30:2736 }
37
38 AnonymizerTool anonymizer_;
39};
40
41TEST_F(AnonymizerToolTest, Anonymize) {
42 EXPECT_EQ("", anonymizer_.Anonymize(""));
43 EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
44
45 // Make sure MAC address anonymization is invoked.
46 EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
47
48 // Make sure custom pattern anonymization is invoked.
49 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
afakhry5a59f952017-05-24 21:04:2650
51 // Make sure UUIDs are anonymized.
52 EXPECT_EQ(
53 "REQUEST localhost - - \"POST /printers/<UUID: 1> HTTP/1.1\" 200 291 "
54 "Create-Job successful-ok",
55 anonymizer_.Anonymize(
56 "REQUEST localhost - - \"POST /printers/"
57 "cb738a9f-6433-4d95-a81e-94e4ae0ed30b HTTP/1.1\" 200 291 Create-Job "
58 "successful-ok"));
59 EXPECT_EQ(
60 "REQUEST localhost - - \"POST /printers/<UUID: 2> HTTP/1.1\" 200 286 "
61 "Create-Job successful-ok",
62 anonymizer_.Anonymize(
63 "REQUEST localhost - - \"POST /printers/"
64 "d17188da-9cd3-44f4-b148-3e1d748a3b0f HTTP/1.1\" 200 286 Create-Job "
65 "successful-ok"));
battre4cdaa7c2016-01-07 11:30:2766}
67
68TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
69 EXPECT_EQ("", AnonymizeMACAddresses(""));
70 EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
71 EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
72 EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
73 EXPECT_EQ(
74 "BSSID: aa:bb:cc:00:00:01 in the middle\n"
75 "bb:cc:dd:00:00:02 start of line\n"
76 "end of line aa:bb:cc:00:00:01\n"
77 "no match across lines aa:bb:cc:\n"
78 "dd:ee:ff two on the same line:\n"
79 "x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
80 AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
81 "bb:cc:dd:ee:ff:00 start of line\n"
82 "end of line aa:bb:cc:dd:ee:ff\n"
83 "no match across lines aa:bb:cc:\n"
84 "dd:ee:ff two on the same line:\n"
85 "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
86 EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
87 AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
88}
89
90TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
91 EXPECT_EQ("", AnonymizeCustomPatterns(""));
92
93 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
94 EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'"));
95 EXPECT_EQ("foo Cell ID: '1' bar",
96 AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar"));
97
98 EXPECT_EQ("foo Location area code: '1' bar",
99 AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar"));
100
101 EXPECT_EQ("foo\na SSID='1' b\n'",
102 AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'"));
103 EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'"));
104 EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'"));
105
106 EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo",
107 AnonymizeCustomPatterns(
108 "Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo"));
109
110 EXPECT_EQ(
111 "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
112 AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
battre03910b42016-01-11 13:42:34113
afakhry85eea802017-05-01 17:04:10114 EXPECT_EQ("SerialNumber: 1",
115 AnonymizeCustomPatterns("SerialNumber: 1217D7EF"));
116 EXPECT_EQ("serial number: 2",
117 AnonymizeCustomPatterns("serial number: 50C971FEE7F3x010900"));
118 EXPECT_EQ("SerialNumber: 3",
119 AnonymizeCustomPatterns("SerialNumber: EVT23-17BA01-004"));
Thiemo Nagel21a5d552017-12-12 18:21:19120 EXPECT_EQ("serial=4", AnonymizeCustomPatterns("serial=\"1234AA5678\""));
afakhry85eea802017-05-01 17:04:10121
battre03910b42016-01-11 13:42:34122 EXPECT_EQ("<email: 1>",
123 AnonymizeCustomPatterns("[email protected]"));
124 EXPECT_EQ("Email: <email: 1>.",
125 AnonymizeCustomPatterns("Email: [email protected]."));
126 EXPECT_EQ("Email:\n<email: 2>\n",
127 AnonymizeCustomPatterns("Email:\[email protected]\n"));
128
129 EXPECT_EQ("[<IPv6: 1>]", AnonymizeCustomPatterns(
130 "[2001:0db8:0000:0000:0000:ff00:0042:8329]"));
131 EXPECT_EQ("[<IPv6: 2>]",
132 AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
133 EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
Garrick Evans5a5c24e2019-01-28 12:55:44134 EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[aa::bb]"));
Garrick Evans5fbf45d2019-01-18 12:37:26135 EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
battre03910b42016-01-11 13:42:34136
137 EXPECT_EQ("<URL: 1>",
138 AnonymizeCustomPatterns("http://example.com/foo?test=1"));
139 EXPECT_EQ("Foo <URL: 2> Bar",
140 AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar"));
141 const char* kURLs[] = {
142 "http://example.com/foo?test=1",
143 "http://userid:[email protected]:8080",
144 "http://userid:[email protected]:8080/",
145 "http://@example.com",
146 "http://192.168.0.1",
147 "http://192.168.0.1/",
148 "http://اختبار.com",
149 "http://test.com/foo(bar)baz.html",
150 "http://test.com/foo%20bar",
151 "ftp://test:[email protected]",
152 "chrome://extensions/",
153 "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/options.html",
154 "http://example.com/[email protected]",
pbond4104d692016-02-10 09:41:07155 "rtsp://[email protected]/",
156 "https://aaaaaaaaaaaaaaaa.com",
battre03910b42016-01-11 13:42:34157 };
Avi Drissman8171db7d2018-12-25 23:08:31158 for (size_t i = 0; i < base::size(kURLs); ++i) {
battre03910b42016-01-11 13:42:34159 SCOPED_TRACE(kURLs[i]);
160 std::string got = AnonymizeCustomPatterns(kURLs[i]);
161 EXPECT_TRUE(
162 base::StartsWith(got, "<URL: ", base::CompareCase::INSENSITIVE_ASCII));
163 EXPECT_TRUE(base::EndsWith(got, ">", base::CompareCase::INSENSITIVE_ASCII));
164 }
165 // Test that "Android:" is not considered a schema with empty hier part.
166 EXPECT_EQ("The following applies to Android:",
167 AnonymizeCustomPatterns("The following applies to Android:"));
battre4cdaa7c2016-01-07 11:30:27168}
169
battre03910b42016-01-11 13:42:34170TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) {
battre4cdaa7c2016-01-07 11:30:27171 const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
172 std::map<std::string, std::string> space;
battre03910b42016-01-11 13:42:34173 EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27174 EXPECT_EQ("foo\nbar\n",
battre03910b42016-01-11 13:42:34175 AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space));
176 EXPECT_EQ("id '1'",
177 AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space));
178 EXPECT_EQ("id '2'",
179 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
180 EXPECT_EQ("id: '2'",
181 AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space));
182 EXPECT_EQ("ID: '1'",
183 AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27184 EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
battre03910b42016-01-11 13:42:34185 AnonymizeCustomPatternWithContext(
186 "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27187 space.clear();
battre03910b42016-01-11 13:42:34188 EXPECT_EQ("id '1'",
189 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27190
191 space.clear();
battre03910b42016-01-11 13:42:34192 EXPECT_EQ("x1z",
193 AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space));
194}
195
196TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
197 CustomPatternWithoutContext kPattern = {"pattern", "(o+)"};
198 std::map<std::string, std::string> space;
199 EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space));
200 EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n",
201 AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern,
202 &space));
battre4cdaa7c2016-01-07 11:30:27203}
204
Dominic Battref091addfc2017-12-07 15:45:34205TEST_F(AnonymizerToolTest, AnonymizeChunk) {
206 std::string data =
207 "aaaaaaaa [SSID=123aaaaaa]aaaaa\n" // SSID.
208 "aaaaaaaahttp://tets.comaaaaaaa\n" // URL.
209 "[email protected]\n" // Email address.
210 "example@@1234\n" // No PII, it is not valid email address.
Kevin Cernekeee3328202018-04-19 03:51:29211 "255.255.155.2\n" // IP address.
Dominic Battref091addfc2017-12-07 15:45:34212 "255.255.155.255\n" // IP address.
Garrick Evans5fbf45d2019-01-18 12:37:26213 "127.0.0.1\n" // IPv4 loopback.
214 "127.255.0.1\n" // IPv4 loopback.
215 "0.0.0.0\n" // Any IPv4.
216 "0.255.255.255\n" // Any IPv4.
217 "10.10.10.100\n" // IPv4 private class A.
218 "10.10.10.100\n" // Intentional duplicate.
219 "10.10.10.101\n" // IPv4 private class A.
220 "10.255.255.255\n" // IPv4 private class A.
221 "172.16.0.0\n" // IPv4 private class B.
222 "172.31.255.255\n" // IPv4 private class B.
223 "172.11.5.5\n" // IP address.
224 "172.111.5.5\n" // IP address.
225 "192.168.0.0\n" // IPv4 private class C.
226 "192.168.255.255\n" // IPv4 private class C.
227 "192.169.2.120\n" // IP address.
228 "169.254.0.1\n" // Link local.
229 "169.200.0.1\n" // IP address.
Garrick Evans5a5c24e2019-01-28 12:55:44230 "fe80::\n" // Link local.
231 "fe80::ffff\n" // Link local.
232 "febf:ffff::ffff\n" // Link local.
233 "fecc::1111\n" // IP address.
Garrick Evans5fbf45d2019-01-18 12:37:26234 "224.0.0.24\n" // Multicast.
235 "240.0.0.0\n" // IP address.
236 "255.255.255.255\n" // Broadcast.
237 "100.115.92.92\n" // ChromeOS.
238 "100.115.91.92\n" // IP address.
239 "1.1.1.1\n" // DNS
240 "8.8.8.8\n" // DNS
241 "8.8.4.4\n" // DNS
242 "8.8.8.4\n" // IP address.
Kevin Cernekee70b0dfb2018-04-19 05:40:29243 "255.255.259.255\n" // Not an IP address.
244 "255.300.255.255\n" // Not an IP address.
Dominic Battref091addfc2017-12-07 15:45:34245 "aaaa123.123.45.4aaa\n" // IP address.
246 "11:11;11::11\n" // IP address.
247 "11::11\n" // IP address.
248 "11:11:abcdef:0:0:0:0:0\n" // No PII.
Garrick Evans5a5c24e2019-01-28 12:55:44249 "::\n" // Unspecified.
250 "::1\n" // Local host.
251 "Instance::Set\n" // Ignore match, no PII.
252 "Instant::ff\n" // Ignore match, no PII.
253 "net::ERR_CONN_TIMEOUT\n" // Ignore match, no PII.
254 "ff01::1\n" // All nodes address (interface local).
255 "ff01::2\n" // All routers (interface local).
256 "ff01::3\n" // Multicast (interface local).
257 "ff02::1\n" // All nodes address (link local).
258 "ff02::2\n" // All routers (link local).
259 "ff02::3\n" // Multicast (link local).
260 "ff02::fb\n" // mDNSv6 (link local).
261 "ff08::fb\n" // mDNSv6.
262 "ff0f::101\n" // All NTP servers.
263 "::ffff:cb0c:10ea\n" // IPv4-mapped IPV6 (IP address).
264 "::ffff:a0a:a0a\n" // IPv4-mapped IPV6 (private class A).
265 "::ffff:a0a:a0a\n" // Intentional duplicate.
266 "::ffff:ac1e:1e1e\n" // IPv4-mapped IPV6 (private class B).
267 "::ffff:c0a8:640a\n" // IPv4-mapped IPV6 (private class C).
268 "::ffff:6473:5c01\n" // IPv4-mapped IPV6 (Chrome).
269 "64:ff9b::a0a:a0a\n" // IPv4-translated 6to4 IPV6 (private class A).
270 "64:ff9b::6473:5c01\n" // IPv4-translated 6to4 IPV6 (Chrome).
271 "::0101:ffff:c0a8:640a\n" // IP address.
272 "aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
Dominic Battref091addfc2017-12-07 15:45:34273 std::string result =
274 "aaaaaaaa [SSID=1]aaaaa\n"
275 "aaaaaaaa<URL: 1>\n"
276 "<email: 1>\n"
277 "example@@1234\n"
Kevin Cernekeee3328202018-04-19 03:51:29278 "<IPv4: 1>\n"
279 "<IPv4: 2>\n"
Garrick Evans5fbf45d2019-01-18 12:37:26280 "<127.0.0.0/8: 3>\n"
281 "<127.0.0.0/8: 4>\n"
282 "<0.0.0.0/8: 5>\n"
283 "<0.0.0.0/8: 6>\n"
284 "<10.0.0.0/8: 7>\n"
285 "<10.0.0.0/8: 7>\n"
286 "<10.0.0.0/8: 8>\n"
287 "<10.0.0.0/8: 9>\n"
288 "<172.16.0.0/12: 10>\n"
289 "<172.16.0.0/12: 11>\n"
290 "<IPv4: 12>\n"
291 "<IPv4: 13>\n"
292 "<192.168.0.0/16: 14>\n"
293 "<192.168.0.0/16: 15>\n"
294 "<IPv4: 16>\n"
295 "<169.254.0.0/16: 17>\n"
296 "<IPv4: 18>\n"
Garrick Evans5a5c24e2019-01-28 12:55:44297 "<fe80::/10: 1>\n"
298 "<fe80::/10: 2>\n"
299 "<fe80::/10: 3>\n"
300 "<IPv6: 4>\n"
Garrick Evans5fbf45d2019-01-18 12:37:26301 "<224.0.0.0/4: 19>\n"
302 "<IPv4: 20>\n"
303 "255.255.255.255\n"
304 "100.115.92.92\n"
305 "<IPv4: 23>\n"
306 "1.1.1.1\n"
307 "8.8.8.8\n"
308 "8.8.4.4\n"
309 "<IPv4: 27>\n"
Kevin Cernekee70b0dfb2018-04-19 05:40:29310 "255.255.259.255\n"
311 "255.300.255.255\n"
Garrick Evans5fbf45d2019-01-18 12:37:26312 "aaaa<IPv4: 28>aaa\n"
Garrick Evans5a5c24e2019-01-28 12:55:44313 "11:11;<IPv6: 5>\n"
314 "<IPv6: 5>\n"
Dominic Battref091addfc2017-12-07 15:45:34315 "11:11:abcdef:0:0:0:0:0\n"
Garrick Evans5a5c24e2019-01-28 12:55:44316 "::\n"
317 "::1\n"
318 "Instance::Set\n"
319 "Instant::ff\n"
320 "net::ERR_CONN_TIMEOUT\n"
321 "ff01::1\n"
322 "ff01::2\n"
323 "<ff01::/16: 13>\n"
324 "ff02::1\n"
325 "ff02::2\n"
326 "<ff02::/16: 16>\n"
327 "<ff02::/16: 17>\n"
328 "<IPv6: 18>\n"
329 "<IPv6: 19>\n"
330 "<IPv6: 20>\n"
331 "<M 10.0.0.0/8: 21>\n"
332 "<M 10.0.0.0/8: 21>\n"
333 "<M 172.16.0.0/12: 22>\n"
334 "<M 192.168.0.0/16: 23>\n"
335 "<M 100.115.92.1: 24>\n"
336 "<T 10.0.0.0/8: 25>\n"
337 "<T 100.115.92.1: 26>\n"
338 "<IPv6: 27>\n"
Dominic Battref091addfc2017-12-07 15:45:34339 "aa:aa:aa:00:00:01";
340 EXPECT_EQ(result, anonymizer_.Anonymize(data));
341}
342
battre4cdaa7c2016-01-07 11:30:27343} // namespace feedback