blob: 6464e0270113534037b79d0c6c2f503b4ee66fb3 [file] [log] [blame]
battre4cdaa7c2016-01-07 11:30:271// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/feedback/anonymizer_tool.h"
6
7#include <gtest/gtest.h>
8
Avi Drissman8171db7d2018-12-25 23:08:319#include "base/stl_util.h"
battre03910b42016-01-11 13:42:3410#include "base/strings/string_util.h"
11
battre4cdaa7c2016-01-07 11:30:2712namespace feedback {
13
14class AnonymizerToolTest : public testing::Test {
15 protected:
16 std::string AnonymizeMACAddresses(const std::string& input) {
17 return anonymizer_.AnonymizeMACAddresses(input);
18 }
19
20 std::string AnonymizeCustomPatterns(const std::string& input) {
21 return anonymizer_.AnonymizeCustomPatterns(input);
22 }
23
battre03910b42016-01-11 13:42:3424 std::string AnonymizeCustomPatternWithContext(
battre4cdaa7c2016-01-07 11:30:2725 const std::string& input,
26 const std::string& pattern,
27 std::map<std::string, std::string>* space) {
battre03910b42016-01-11 13:42:3428 return anonymizer_.AnonymizeCustomPatternWithContext(input, pattern, space);
29 }
30
31 std::string AnonymizeCustomPatternWithoutContext(
32 const std::string& input,
33 const CustomPatternWithoutContext& pattern,
34 std::map<std::string, std::string>* space) {
35 return anonymizer_.AnonymizeCustomPatternWithoutContext(input, pattern,
36 space);
battre4cdaa7c2016-01-07 11:30:2737 }
38
39 AnonymizerTool anonymizer_;
40};
41
42TEST_F(AnonymizerToolTest, Anonymize) {
43 EXPECT_EQ("", anonymizer_.Anonymize(""));
44 EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
45
46 // Make sure MAC address anonymization is invoked.
47 EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
48
49 // Make sure custom pattern anonymization is invoked.
50 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
afakhry5a59f952017-05-24 21:04:2651
52 // Make sure UUIDs are anonymized.
53 EXPECT_EQ(
54 "REQUEST localhost - - \"POST /printers/<UUID: 1> HTTP/1.1\" 200 291 "
55 "Create-Job successful-ok",
56 anonymizer_.Anonymize(
57 "REQUEST localhost - - \"POST /printers/"
58 "cb738a9f-6433-4d95-a81e-94e4ae0ed30b HTTP/1.1\" 200 291 Create-Job "
59 "successful-ok"));
60 EXPECT_EQ(
61 "REQUEST localhost - - \"POST /printers/<UUID: 2> HTTP/1.1\" 200 286 "
62 "Create-Job successful-ok",
63 anonymizer_.Anonymize(
64 "REQUEST localhost - - \"POST /printers/"
65 "d17188da-9cd3-44f4-b148-3e1d748a3b0f HTTP/1.1\" 200 286 Create-Job "
66 "successful-ok"));
battre4cdaa7c2016-01-07 11:30:2767}
68
69TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
70 EXPECT_EQ("", AnonymizeMACAddresses(""));
71 EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
72 EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
73 EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
74 EXPECT_EQ(
75 "BSSID: aa:bb:cc:00:00:01 in the middle\n"
76 "bb:cc:dd:00:00:02 start of line\n"
77 "end of line aa:bb:cc:00:00:01\n"
78 "no match across lines aa:bb:cc:\n"
79 "dd:ee:ff two on the same line:\n"
80 "x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
81 AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
82 "bb:cc:dd:ee:ff:00 start of line\n"
83 "end of line aa:bb:cc:dd:ee:ff\n"
84 "no match across lines aa:bb:cc:\n"
85 "dd:ee:ff two on the same line:\n"
86 "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
87 EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
88 AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
89}
90
91TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
92 EXPECT_EQ("", AnonymizeCustomPatterns(""));
93
94 EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
95 EXPECT_EQ("Cell ID: '2'", AnonymizeCustomPatterns("Cell ID: 'C1D2'"));
96 EXPECT_EQ("foo Cell ID: '1' bar",
97 AnonymizeCustomPatterns("foo Cell ID: 'A1B2' bar"));
98
99 EXPECT_EQ("foo Location area code: '1' bar",
100 AnonymizeCustomPatterns("foo Location area code: 'A1B2' bar"));
101
102 EXPECT_EQ("foo\na SSID='1' b\n'",
103 AnonymizeCustomPatterns("foo\na SSID='Joe's' b\n'"));
104 EXPECT_EQ("ssid '2'", AnonymizeCustomPatterns("ssid 'My AP'"));
105 EXPECT_EQ("bssid 'aa:bb'", AnonymizeCustomPatterns("bssid 'aa:bb'"));
106
107 EXPECT_EQ("Scan SSID - hexdump(len=6): 1\nfoo",
108 AnonymizeCustomPatterns(
109 "Scan SSID - hexdump(len=6): 47 6f 6f 67 6c 65\nfoo"));
110
111 EXPECT_EQ(
112 "a\nb [SSID=1] [SSID=2] [SSID=foo\nbar] b",
113 AnonymizeCustomPatterns("a\nb [SSID=foo] [SSID=bar] [SSID=foo\nbar] b"));
battre03910b42016-01-11 13:42:34114
afakhry85eea802017-05-01 17:04:10115 EXPECT_EQ("SerialNumber: 1",
116 AnonymizeCustomPatterns("SerialNumber: 1217D7EF"));
117 EXPECT_EQ("serial number: 2",
118 AnonymizeCustomPatterns("serial number: 50C971FEE7F3x010900"));
119 EXPECT_EQ("SerialNumber: 3",
120 AnonymizeCustomPatterns("SerialNumber: EVT23-17BA01-004"));
Thiemo Nagel21a5d552017-12-12 18:21:19121 EXPECT_EQ("serial=4", AnonymizeCustomPatterns("serial=\"1234AA5678\""));
afakhry85eea802017-05-01 17:04:10122
battre03910b42016-01-11 13:42:34123 EXPECT_EQ("<email: 1>",
124 AnonymizeCustomPatterns("[email protected]"));
125 EXPECT_EQ("Email: <email: 1>.",
126 AnonymizeCustomPatterns("Email: [email protected]."));
127 EXPECT_EQ("Email:\n<email: 2>\n",
128 AnonymizeCustomPatterns("Email:\[email protected]\n"));
129
130 EXPECT_EQ("[<IPv6: 1>]", AnonymizeCustomPatterns(
131 "[2001:0db8:0000:0000:0000:ff00:0042:8329]"));
132 EXPECT_EQ("[<IPv6: 2>]",
133 AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
134 EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
135 EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[::1]"));
Garrick Evans5fbf45d2019-01-18 12:37:26136 EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
battre03910b42016-01-11 13:42:34137
138 EXPECT_EQ("<URL: 1>",
139 AnonymizeCustomPatterns("http://example.com/foo?test=1"));
140 EXPECT_EQ("Foo <URL: 2> Bar",
141 AnonymizeCustomPatterns("Foo http://192.168.0.1/foo?test=1#123 Bar"));
142 const char* kURLs[] = {
143 "http://example.com/foo?test=1",
144 "http://userid:[email protected]:8080",
145 "http://userid:[email protected]:8080/",
146 "http://@example.com",
147 "http://192.168.0.1",
148 "http://192.168.0.1/",
149 "http://اختبار.com",
150 "http://test.com/foo(bar)baz.html",
151 "http://test.com/foo%20bar",
152 "ftp://test:[email protected]",
153 "chrome://extensions/",
154 "chrome-extension://aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/options.html",
155 "http://example.com/[email protected]",
pbond4104d692016-02-10 09:41:07156 "rtsp://[email protected]/",
157 "https://aaaaaaaaaaaaaaaa.com",
battre03910b42016-01-11 13:42:34158 };
Avi Drissman8171db7d2018-12-25 23:08:31159 for (size_t i = 0; i < base::size(kURLs); ++i) {
battre03910b42016-01-11 13:42:34160 SCOPED_TRACE(kURLs[i]);
161 std::string got = AnonymizeCustomPatterns(kURLs[i]);
162 EXPECT_TRUE(
163 base::StartsWith(got, "<URL: ", base::CompareCase::INSENSITIVE_ASCII));
164 EXPECT_TRUE(base::EndsWith(got, ">", base::CompareCase::INSENSITIVE_ASCII));
165 }
166 // Test that "Android:" is not considered a schema with empty hier part.
167 EXPECT_EQ("The following applies to Android:",
168 AnonymizeCustomPatterns("The following applies to Android:"));
battre4cdaa7c2016-01-07 11:30:27169}
170
battre03910b42016-01-11 13:42:34171TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithContext) {
battre4cdaa7c2016-01-07 11:30:27172 const char kPattern[] = "(\\b(?i)id:? ')(\\d+)(')";
173 std::map<std::string, std::string> space;
battre03910b42016-01-11 13:42:34174 EXPECT_EQ("", AnonymizeCustomPatternWithContext("", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27175 EXPECT_EQ("foo\nbar\n",
battre03910b42016-01-11 13:42:34176 AnonymizeCustomPatternWithContext("foo\nbar\n", kPattern, &space));
177 EXPECT_EQ("id '1'",
178 AnonymizeCustomPatternWithContext("id '2345'", kPattern, &space));
179 EXPECT_EQ("id '2'",
180 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
181 EXPECT_EQ("id: '2'",
182 AnonymizeCustomPatternWithContext("id: '1234'", kPattern, &space));
183 EXPECT_EQ("ID: '1'",
184 AnonymizeCustomPatternWithContext("ID: '2345'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27185 EXPECT_EQ("x1 id '1' 1x id '2'\nid '1'\n",
battre03910b42016-01-11 13:42:34186 AnonymizeCustomPatternWithContext(
187 "x1 id '2345' 1x id '1234'\nid '2345'\n", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27188 space.clear();
battre03910b42016-01-11 13:42:34189 EXPECT_EQ("id '1'",
190 AnonymizeCustomPatternWithContext("id '1234'", kPattern, &space));
battre4cdaa7c2016-01-07 11:30:27191
192 space.clear();
battre03910b42016-01-11 13:42:34193 EXPECT_EQ("x1z",
194 AnonymizeCustomPatternWithContext("xyz", "()(y+)()", &space));
195}
196
197TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
198 CustomPatternWithoutContext kPattern = {"pattern", "(o+)"};
199 std::map<std::string, std::string> space;
200 EXPECT_EQ("", AnonymizeCustomPatternWithoutContext("", kPattern, &space));
201 EXPECT_EQ("f<pattern: 1>\nf<pattern: 2>z\nf<pattern: 1>l\n",
202 AnonymizeCustomPatternWithoutContext("fo\nfooz\nfol\n", kPattern,
203 &space));
battre4cdaa7c2016-01-07 11:30:27204}
205
Dominic Battref091addfc2017-12-07 15:45:34206TEST_F(AnonymizerToolTest, AnonymizeChunk) {
207 std::string data =
208 "aaaaaaaa [SSID=123aaaaaa]aaaaa\n" // SSID.
209 "aaaaaaaahttp://tets.comaaaaaaa\n" // URL.
210 "[email protected]\n" // Email address.
211 "example@@1234\n" // No PII, it is not valid email address.
Kevin Cernekeee3328202018-04-19 03:51:29212 "255.255.155.2\n" // IP address.
Dominic Battref091addfc2017-12-07 15:45:34213 "255.255.155.255\n" // IP address.
Garrick Evans5fbf45d2019-01-18 12:37:26214 "127.0.0.1\n" // IPv4 loopback.
215 "127.255.0.1\n" // IPv4 loopback.
216 "0.0.0.0\n" // Any IPv4.
217 "0.255.255.255\n" // Any IPv4.
218 "10.10.10.100\n" // IPv4 private class A.
219 "10.10.10.100\n" // Intentional duplicate.
220 "10.10.10.101\n" // IPv4 private class A.
221 "10.255.255.255\n" // IPv4 private class A.
222 "172.16.0.0\n" // IPv4 private class B.
223 "172.31.255.255\n" // IPv4 private class B.
224 "172.11.5.5\n" // IP address.
225 "172.111.5.5\n" // IP address.
226 "192.168.0.0\n" // IPv4 private class C.
227 "192.168.255.255\n" // IPv4 private class C.
228 "192.169.2.120\n" // IP address.
229 "169.254.0.1\n" // Link local.
230 "169.200.0.1\n" // IP address.
231 "224.0.0.24\n" // Multicast.
232 "240.0.0.0\n" // IP address.
233 "255.255.255.255\n" // Broadcast.
234 "100.115.92.92\n" // ChromeOS.
235 "100.115.91.92\n" // IP address.
236 "1.1.1.1\n" // DNS
237 "8.8.8.8\n" // DNS
238 "8.8.4.4\n" // DNS
239 "8.8.8.4\n" // IP address.
Kevin Cernekee70b0dfb2018-04-19 05:40:29240 "255.255.259.255\n" // Not an IP address.
241 "255.300.255.255\n" // Not an IP address.
Dominic Battref091addfc2017-12-07 15:45:34242 "aaaa123.123.45.4aaa\n" // IP address.
243 "11:11;11::11\n" // IP address.
244 "11::11\n" // IP address.
245 "11:11:abcdef:0:0:0:0:0\n" // No PII.
246 "aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
247 std::string result =
248 "aaaaaaaa [SSID=1]aaaaa\n"
249 "aaaaaaaa<URL: 1>\n"
250 "<email: 1>\n"
251 "example@@1234\n"
Kevin Cernekeee3328202018-04-19 03:51:29252 "<IPv4: 1>\n"
253 "<IPv4: 2>\n"
Garrick Evans5fbf45d2019-01-18 12:37:26254 "<127.0.0.0/8: 3>\n"
255 "<127.0.0.0/8: 4>\n"
256 "<0.0.0.0/8: 5>\n"
257 "<0.0.0.0/8: 6>\n"
258 "<10.0.0.0/8: 7>\n"
259 "<10.0.0.0/8: 7>\n"
260 "<10.0.0.0/8: 8>\n"
261 "<10.0.0.0/8: 9>\n"
262 "<172.16.0.0/12: 10>\n"
263 "<172.16.0.0/12: 11>\n"
264 "<IPv4: 12>\n"
265 "<IPv4: 13>\n"
266 "<192.168.0.0/16: 14>\n"
267 "<192.168.0.0/16: 15>\n"
268 "<IPv4: 16>\n"
269 "<169.254.0.0/16: 17>\n"
270 "<IPv4: 18>\n"
271 "<224.0.0.0/4: 19>\n"
272 "<IPv4: 20>\n"
273 "255.255.255.255\n"
274 "100.115.92.92\n"
275 "<IPv4: 23>\n"
276 "1.1.1.1\n"
277 "8.8.8.8\n"
278 "8.8.4.4\n"
279 "<IPv4: 27>\n"
Kevin Cernekee70b0dfb2018-04-19 05:40:29280 "255.255.259.255\n"
281 "255.300.255.255\n"
Garrick Evans5fbf45d2019-01-18 12:37:26282 "aaaa<IPv4: 28>aaa\n"
Dominic Battref091addfc2017-12-07 15:45:34283 "11:11;<IPv6: 1>\n"
284 "<IPv6: 1>\n"
285 "11:11:abcdef:0:0:0:0:0\n"
286 "aa:aa:aa:00:00:01";
287 EXPECT_EQ(result, anonymizer_.Anonymize(data));
288}
289
battre4cdaa7c2016-01-07 11:30:27290} // namespace feedback