blob: 6b292bca7a51d3713bdc6211b0b640bb9d6ce1c9 [file] [log] [blame]
[email protected]2a96e362012-04-04 00:36:201// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/bind.h"
6#include "base/file_util.h"
[email protected]ea1a3f62012-11-16 20:34:237#include "base/files/scoped_temp_dir.h"
[email protected]2a96e362012-04-04 00:36:208#include "chrome/browser/character_encoding.h"
9#include "chrome/browser/net/url_request_mock_util.h"
10#include "chrome/browser/prefs/pref_service.h"
11#include "chrome/browser/profiles/profile.h"
12#include "chrome/browser/ui/browser.h"
[email protected]a37d4b02012-06-25 21:56:1013#include "chrome/browser/ui/browser_commands.h"
[email protected]52877dbc62012-06-29 22:22:0314#include "chrome/browser/ui/browser_tabstrip.h"
[email protected]2a96e362012-04-04 00:36:2015#include "chrome/common/pref_names.h"
16#include "chrome/test/base/in_process_browser_test.h"
17#include "chrome/test/base/ui_test_utils.h"
18#include "content/public/browser/browser_thread.h"
19#include "content/public/browser/navigation_controller.h"
20#include "content/public/browser/notification_service.h"
21#include "content/public/browser/notification_source.h"
22#include "content/public/browser/notification_types.h"
23#include "content/public/browser/web_contents.h"
[email protected]5b8ff1c2012-06-02 20:42:2024#include "content/public/test/test_navigation_observer.h"
[email protected]2a96e362012-04-04 00:36:2025#include "content/test/net/url_request_mock_http_job.h"
[email protected]2a96e362012-04-04 00:36:2026
[email protected]bc7fadf22012-11-20 01:05:3127namespace {
28
29struct EncodingTestData {
30 const char* file_name;
31 const char* encoding_name;
32};
33
34const EncodingTestData kEncodingTestDatas[] = {
35 { "Big5.html", "Big5" },
36 { "EUC-JP.html", "EUC-JP" },
37 { "gb18030.html", "gb18030" },
38 { "iso-8859-1.html", "ISO-8859-1" },
39 { "ISO-8859-2.html", "ISO-8859-2" },
40 { "ISO-8859-4.html", "ISO-8859-4" },
41 { "ISO-8859-5.html", "ISO-8859-5" },
42 { "ISO-8859-6.html", "ISO-8859-6" },
43 { "ISO-8859-7.html", "ISO-8859-7" },
44 { "ISO-8859-8.html", "ISO-8859-8" },
45 { "ISO-8859-13.html", "ISO-8859-13" },
46 { "ISO-8859-15.html", "ISO-8859-15" },
47 { "KOI8-R.html", "KOI8-R" },
48 { "KOI8-U.html", "KOI8-U" },
49 { "macintosh.html", "macintosh" },
50 { "Shift-JIS.html", "Shift_JIS" },
51 { "US-ASCII.html", "ISO-8859-1" }, // http://crbug.com/15801
52 { "UTF-8.html", "UTF-8" },
53 { "UTF-16LE.html", "UTF-16LE" },
54 { "windows-874.html", "windows-874" },
55 // http://crbug.com/95963
56 // { "windows-949.html", "windows-949" },
57 { "windows-1250.html", "windows-1250" },
58 { "windows-1251.html", "windows-1251" },
59 { "windows-1252.html", "windows-1252" },
60 { "windows-1253.html", "windows-1253" },
61 { "windows-1254.html", "windows-1254" },
62 { "windows-1255.html", "windows-1255" },
63 { "windows-1256.html", "windows-1256" },
64 { "windows-1257.html", "windows-1257" },
65 { "windows-1258.html", "windows-1258" }
66};
67
68} // namespace
69
[email protected]2a96e362012-04-04 00:36:2070using content::BrowserThread;
71
72static const FilePath::CharType* kTestDir = FILE_PATH_LITERAL("encoding_tests");
73
[email protected]bc7fadf22012-11-20 01:05:3174class BrowserEncodingTest
75 : public InProcessBrowserTest,
76 public testing::WithParamInterface<EncodingTestData> {
[email protected]2a96e362012-04-04 00:36:2077 protected:
78 BrowserEncodingTest() {}
79
80 // Saves the current page and verifies that the output matches the expected
81 // result.
82 void SaveAndCompare(const char* filename_to_write, const FilePath& expected) {
83 // Dump the page, the content of dump page should be identical to the
84 // expected result file.
85 FilePath full_file_name = save_dir_.AppendASCII(filename_to_write);
86 // We save the page as way of complete HTML file, which requires a directory
87 // name to save sub resources in it. Although this test file does not have
88 // sub resources, but the directory name is still required.
[email protected]a7fe9112012-07-20 02:34:4589 content::WindowedNotificationObserver observer(
[email protected]2a96e362012-04-04 00:36:2090 content::NOTIFICATION_SAVE_PACKAGE_SUCCESSFULLY_FINISHED,
91 content::NotificationService::AllSources());
[email protected]52877dbc62012-06-29 22:22:0392 chrome::GetActiveWebContents(browser())->SavePage(
[email protected]2a96e362012-04-04 00:36:2093 full_file_name, temp_sub_resource_dir_,
94 content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
95 observer.Wait();
96
97 FilePath expected_file_name = ui_test_utils::GetTestFilePath(
98 FilePath(kTestDir), expected);
99
100 EXPECT_TRUE(file_util::ContentsEqual(full_file_name, expected_file_name));
101 }
102
[email protected]f13550f2012-04-11 23:41:35103 virtual void SetUpOnMainThread() OVERRIDE {
[email protected]2a96e362012-04-04 00:36:20104 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
105 save_dir_ = temp_dir_.path();
106 temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
107
108 BrowserThread::PostTask(
109 BrowserThread::IO, FROM_HERE,
110 base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled, true));
111 }
112
[email protected]ea1a3f62012-11-16 20:34:23113 base::ScopedTempDir temp_dir_;
[email protected]2a96e362012-04-04 00:36:20114 FilePath save_dir_;
115 FilePath temp_sub_resource_dir_;
116};
117
118// TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
119// http://crbug.com/13306.
120// 2. Add more files with multiple encoding name variants for each canonical
121// encoding name). Webkit layout tests cover some, but testing in the UI test is
122// also necessary.
[email protected]bc7fadf22012-11-20 01:05:31123IN_PROC_BROWSER_TEST_P(BrowserEncodingTest, TestEncodingAliasMapping) {
[email protected]2a96e362012-04-04 00:36:20124 const char* const kAliasTestDir = "alias_mapping";
125
126 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAliasTestDir);
[email protected]bc7fadf22012-11-20 01:05:31127 FilePath test_file_path(test_dir_path);
128 test_file_path = test_file_path.AppendASCII(
129 GetParam().file_name);
[email protected]2a96e362012-04-04 00:36:20130
[email protected]bc7fadf22012-11-20 01:05:31131 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
132 ui_test_utils::NavigateToURL(browser(), url);
133 EXPECT_EQ(GetParam().encoding_name,
134 chrome::GetActiveWebContents(browser())->GetEncoding());
[email protected]2a96e362012-04-04 00:36:20135}
136
[email protected]bc7fadf22012-11-20 01:05:31137INSTANTIATE_TEST_CASE_P(EncodingAliases,
138 BrowserEncodingTest,
139 testing::ValuesIn(kEncodingTestDatas));
140
[email protected]2a96e362012-04-04 00:36:20141// Marked as flaky: see http://crbug.com/44668
142IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, TestOverrideEncoding) {
143 const char* const kTestFileName = "gb18030_with_iso88591_meta.html";
144 const char* const kExpectedFileName =
145 "expected_gb18030_saved_from_iso88591_meta.html";
146 const char* const kOverrideTestDir = "user_override";
147
148 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kOverrideTestDir);
149 test_dir_path = test_dir_path.AppendASCII(kTestFileName);
[email protected]2dbcad1c2012-10-30 00:20:09150 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_dir_path);
[email protected]2a96e362012-04-04 00:36:20151 ui_test_utils::NavigateToURL(browser(), url);
[email protected]52877dbc62012-06-29 22:22:03152 content::WebContents* web_contents = chrome::GetActiveWebContents(browser());
[email protected]2a96e362012-04-04 00:36:20153 EXPECT_EQ("ISO-8859-1", web_contents->GetEncoding());
154
155 // Override the encoding to "gb18030".
156 const std::string selected_encoding =
157 CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030");
[email protected]5b8ff1c2012-06-02 20:42:20158 content::TestNavigationObserver navigation_observer(
[email protected]2a96e362012-04-04 00:36:20159 content::Source<content::NavigationController>(
160 &web_contents->GetController()));
161 web_contents->SetOverrideEncoding(selected_encoding);
162 navigation_observer.Wait();
163 EXPECT_EQ("gb18030", web_contents->GetEncoding());
164
165 FilePath expected_filename =
166 FilePath().AppendASCII(kOverrideTestDir).AppendASCII(kExpectedFileName);
167 SaveAndCompare(kTestFileName, expected_filename);
168}
169
170// The following encodings are excluded from the auto-detection test because
171// it's a known issue that the current encoding detector does not detect them:
172// ISO-8859-4
173// ISO-8859-13
174// KOI8-U
175// macintosh
176// windows-874
177// windows-1252
178// windows-1253
179// windows-1257
180// windows-1258
181
182// For Hebrew, the expected encoding value is ISO-8859-8-I. See
183// http://crbug.com/2927 for more details.
[email protected]361cfe62012-04-04 23:28:39184//
185// This test fails frequently on the win_rel trybot. See http://crbug.com/122053
186#if defined(OS_WIN)
187#define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
188#else
189#define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect
190#endif
[email protected]bc7fadf22012-11-20 01:05:31191// TODO(phajdan.jr): See if fix for http://crbug.com/122053 would help here.
[email protected]361cfe62012-04-04 23:28:39192IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) {
[email protected]2a96e362012-04-04 00:36:20193 struct EncodingAutoDetectTestData {
194 const char* test_file_name; // File name of test data.
195 const char* expected_result; // File name of expected results.
196 const char* expected_encoding; // expected encoding.
197 };
198 const EncodingAutoDetectTestData kTestDatas[] = {
199 { "Big5_with_no_encoding_specified.html",
200 "expected_Big5_saved_from_no_encoding_specified.html",
201 "Big5" },
202 { "gb18030_with_no_encoding_specified.html",
203 "expected_gb18030_saved_from_no_encoding_specified.html",
204 "gb18030" },
205 { "iso-8859-1_with_no_encoding_specified.html",
206 "expected_iso-8859-1_saved_from_no_encoding_specified.html",
207 "ISO-8859-1" },
208 { "ISO-8859-5_with_no_encoding_specified.html",
209 "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
210 "ISO-8859-5" },
211 { "ISO-8859-6_with_no_encoding_specified.html",
212 "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
213 "ISO-8859-6" },
214 { "ISO-8859-7_with_no_encoding_specified.html",
215 "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
216 "ISO-8859-7" },
217 { "ISO-8859-8_with_no_encoding_specified.html",
218 "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
219 "ISO-8859-8-I" },
220 { "KOI8-R_with_no_encoding_specified.html",
221 "expected_KOI8-R_saved_from_no_encoding_specified.html",
222 "KOI8-R" },
223 { "Shift-JIS_with_no_encoding_specified.html",
224 "expected_Shift-JIS_saved_from_no_encoding_specified.html",
225 "Shift_JIS" },
226 { "UTF-8_with_no_encoding_specified.html",
227 "expected_UTF-8_saved_from_no_encoding_specified.html",
228 "UTF-8" },
229 { "windows-949_with_no_encoding_specified.html",
230 "expected_windows-949_saved_from_no_encoding_specified.html",
231 "windows-949-2000" },
232 { "windows-1251_with_no_encoding_specified.html",
233 "expected_windows-1251_saved_from_no_encoding_specified.html",
234 "windows-1251" },
235 { "windows-1254_with_no_encoding_specified.html",
236 "expected_windows-1254_saved_from_no_encoding_specified.html",
237 "windows-1254" },
238 { "windows-1255_with_no_encoding_specified.html",
239 "expected_windows-1255_saved_from_no_encoding_specified.html",
240 "windows-1255" },
241 { "windows-1256_with_no_encoding_specified.html",
242 "expected_windows-1256_saved_from_no_encoding_specified.html",
243 "windows-1256" }
244 };
245 const char* const kAutoDetectDir = "auto_detect";
246 // Directory of the files of expected results.
247 const char* const kExpectedResultDir = "expected_results";
248
249 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAutoDetectDir);
250
251 // Set the default charset to one of encodings not supported by the current
252 // auto-detector (Please refer to the above comments) to make sure we
253 // incorrectly decode the page. Now we use ISO-8859-4.
[email protected]ddf72142012-05-22 04:52:40254 browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset,
255 "ISO-8859-4");
[email protected]2a96e362012-04-04 00:36:20256
[email protected]52877dbc62012-06-29 22:22:03257 content::WebContents* web_contents = chrome::GetActiveWebContents(browser());
[email protected]2a96e362012-04-04 00:36:20258 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas); ++i) {
259 // Disable auto detect if it is on.
260 browser()->profile()->GetPrefs()->SetBoolean(
261 prefs::kWebKitUsesUniversalDetector, false);
262
263 FilePath test_file_path(test_dir_path);
264 test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
[email protected]2dbcad1c2012-10-30 00:20:09265 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
[email protected]2a96e362012-04-04 00:36:20266 ui_test_utils::NavigateToURL(browser(), url);
267
268 // Get the encoding used for the page, it must be the default charset we
269 // just set.
270 EXPECT_EQ("ISO-8859-4", web_contents->GetEncoding());
271
272 // Enable the encoding auto detection.
273 browser()->profile()->GetPrefs()->SetBoolean(
274 prefs::kWebKitUsesUniversalDetector, true);
275
[email protected]5b8ff1c2012-06-02 20:42:20276 content::TestNavigationObserver observer(
[email protected]2a96e362012-04-04 00:36:20277 content::Source<content::NavigationController>(
278 &web_contents->GetController()));
[email protected]a37d4b02012-06-25 21:56:10279 chrome::Reload(browser(), CURRENT_TAB);
[email protected]2a96e362012-04-04 00:36:20280 observer.Wait();
281
282 // Re-get the encoding of page. It should return the real encoding now.
283 EXPECT_EQ(kTestDatas[i].expected_encoding, web_contents->GetEncoding());
284
285 // Dump the page, the content of dump page should be equal with our expect
286 // result file.
287 FilePath expected_result_file_name =
288 FilePath().AppendASCII(kAutoDetectDir).AppendASCII(kExpectedResultDir).
289 AppendASCII(kTestDatas[i].expected_result);
290 SaveAndCompare(kTestDatas[i].test_file_name, expected_result_file_name);
291 }
292}