blob: f0aa764e511c4f845ec590fc6db08b64ebf9eca2 [file] [log] [blame]
[email protected]c1978abe2013-04-23 03:08:121// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
avi1023d012015-12-25 02:39:145#include <stddef.h>
6#include <stdint.h>
7
[email protected]c1978abe2013-04-23 03:08:128#include "base/bind.h"
9#include "base/command_line.h"
10#include "base/compiler_specific.h"
[email protected]14c1c232013-06-11 17:52:4411#include "base/containers/hash_tables.h"
[email protected]c1978abe2013-04-23 03:08:1212#include "base/files/file_path.h"
thestigb7aad54f2014-09-05 18:25:3913#include "base/files/file_util.h"
Daniel Chengd5389712017-05-19 08:36:1314#include "base/message_loop/message_loop.h"
[email protected]21aa99682013-06-11 07:17:0115#include "base/strings/string_util.h"
[email protected]74ebfb12013-06-07 20:48:0016#include "base/strings/utf_string_conversions.h"
lukasza7947ccd2016-07-28 21:56:2517#include "base/threading/thread_restrictions.h"
avi1023d012015-12-25 02:39:1418#include "build/build_config.h"
dcheng8b5e3022015-09-02 23:58:5519#include "content/public/browser/render_view_host.h"
20#include "content/public/browser/web_contents.h"
[email protected]c1978abe2013-04-23 03:08:1221#include "content/public/common/content_switches.h"
Daniel Chengd5389712017-05-19 08:36:1322#include "content/public/renderer/render_frame.h"
[email protected]c1978abe2013-04-23 03:08:1223#include "content/public/renderer/render_view.h"
[email protected]6e9def12014-03-27 20:23:2824#include "content/public/test/content_browser_test.h"
25#include "content/public/test/content_browser_test_utils.h"
Daniel Chengd5389712017-05-19 08:36:1326#include "content/public/test/frame_load_waiter.h"
[email protected]c1978abe2013-04-23 03:08:1227#include "content/public/test/test_utils.h"
[email protected]12a936d2013-05-15 04:55:4928#include "content/renderer/savable_resources.h"
[email protected]de7d61ff2013-08-20 11:30:4129#include "content/shell/browser/shell.h"
[email protected]d96cf752014-04-09 04:05:2830#include "net/base/filename_util.h"
[email protected]c1978abe2013-04-23 03:08:1231#include "net/url_request/url_request_context.h"
[email protected]de7d61ff2013-08-20 11:30:4132#include "third_party/WebKit/public/platform/WebCString.h"
33#include "third_party/WebKit/public/platform/WebData.h"
34#include "third_party/WebKit/public/platform/WebString.h"
35#include "third_party/WebKit/public/platform/WebURL.h"
36#include "third_party/WebKit/public/platform/WebVector.h"
[email protected]2255a9332013-06-17 05:12:3137#include "third_party/WebKit/public/web/WebDocument.h"
38#include "third_party/WebKit/public/web/WebElement.h"
[email protected]c93932712014-02-07 18:49:0239#include "third_party/WebKit/public/web/WebElementCollection.h"
lukasza902fcc52015-12-31 04:45:2940#include "third_party/WebKit/public/web/WebFrameSerializer.h"
41#include "third_party/WebKit/public/web/WebFrameSerializerClient.h"
[email protected]d3576942014-04-10 18:45:3742#include "third_party/WebKit/public/web/WebLocalFrame.h"
esprehn467dcd92015-12-03 02:06:0943#include "third_party/WebKit/public/web/WebMetaElement.h"
[email protected]2255a9332013-06-17 05:12:3144#include "third_party/WebKit/public/web/WebNode.h"
[email protected]2255a9332013-06-17 05:12:3145#include "third_party/WebKit/public/web/WebView.h"
[email protected]c1978abe2013-04-23 03:08:1246
[email protected]180ef242013-11-07 06:50:4647using blink::WebCString;
48using blink::WebData;
49using blink::WebDocument;
50using blink::WebElement;
esprehn467dcd92015-12-03 02:06:0951using blink::WebMetaElement;
[email protected]c93932712014-02-07 18:49:0252using blink::WebElementCollection;
[email protected]180ef242013-11-07 06:50:4653using blink::WebFrame;
lukasza902fcc52015-12-31 04:45:2954using blink::WebFrameSerializer;
55using blink::WebFrameSerializerClient;
[email protected]d3576942014-04-10 18:45:3756using blink::WebLocalFrame;
[email protected]180ef242013-11-07 06:50:4657using blink::WebNode;
[email protected]180ef242013-11-07 06:50:4658using blink::WebString;
59using blink::WebURL;
60using blink::WebView;
61using blink::WebVector;
[email protected]c1978abe2013-04-23 03:08:1262
63namespace content {
64
[email protected]c1978abe2013-04-23 03:08:1265bool HasDocType(const WebDocument& doc) {
Blink Reformat1c4d759e2017-04-09 16:34:5466 return doc.FirstChild().IsDocumentTypeNode();
[email protected]c1978abe2013-04-23 03:08:1267}
68
[email protected]c1978abe2013-04-23 03:08:1269class DomSerializerTests : public ContentBrowserTest,
lukasza902fcc52015-12-31 04:45:2970 public WebFrameSerializerClient {
[email protected]c1978abe2013-04-23 03:08:1271 public:
lukasza4a96a1f02015-12-09 16:46:3472 DomSerializerTests() : serialization_reported_end_of_data_(false) {}
[email protected]c1978abe2013-04-23 03:08:1273
avi83883c82014-12-23 00:08:4974 void SetUpCommandLine(base::CommandLine* command_line) override {
[email protected]c1978abe2013-04-23 03:08:1275 command_line->AppendSwitch(switches::kSingleProcess);
[email protected]0c2c0d12014-02-04 17:24:4376#if defined(OS_WIN)
[email protected]5d97f762013-04-23 06:15:4977 // Don't want to try to create a GPU process.
[email protected]7af65ca2014-04-16 20:25:5678 command_line->AppendSwitch(switches::kDisableGpu);
[email protected]5d97f762013-04-23 06:15:4979#endif
[email protected]c1978abe2013-04-23 03:08:1280 }
81
dcheng03226bd2015-09-03 17:36:2382 void SetUpOnMainThread() override {
83 render_view_routing_id_ =
84 shell()->web_contents()->GetRenderViewHost()->GetRoutingID();
85 }
86
[email protected]c1978abe2013-04-23 03:08:1287 // DomSerializerDelegate.
Blink Reformat1c4d759e2017-04-09 16:34:5488 void DidSerializeDataForFrame(const WebCString& data,
lukasza902fcc52015-12-31 04:45:2989 FrameSerializationStatus status) override {
[email protected]c1978abe2013-04-23 03:08:1290 // Check finish status of current frame.
lukasza87f4e2ab2015-11-04 16:03:0391 ASSERT_FALSE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:1292
93 // Add data to corresponding frame's content.
lukasza87f4e2ab2015-11-04 16:03:0394 serialized_contents_ += data;
[email protected]c1978abe2013-04-23 03:08:1295
96 // Current frame is completed saving, change the finish status.
Blink Reformat1c4d759e2017-04-09 16:34:5497 if (status == WebFrameSerializerClient::kCurrentFrameIsFinished)
lukasza87f4e2ab2015-11-04 16:03:0398 serialization_reported_end_of_data_ = true;
[email protected]c1978abe2013-04-23 03:08:1299 }
100
101 RenderView* GetRenderView() {
dcheng03226bd2015-09-03 17:36:23102 return RenderView::FromRoutingID(render_view_routing_id_);
[email protected]c1978abe2013-04-23 03:08:12103 }
104
105 WebView* GetWebView() {
106 return GetRenderView()->GetWebView();
107 }
108
Daniel Chengd5389712017-05-19 08:36:13109 WebLocalFrame* GetMainFrame() {
110 return GetRenderView()->GetMainRenderFrame()->GetWebFrame();
111 }
[email protected]c1978abe2013-04-23 03:08:12112
esprehn961779002015-11-16 04:35:13113 WebFrame* FindSubFrameByURL(const GURL& url) {
Blink Reformat1c4d759e2017-04-09 16:34:54114 for (WebFrame* frame = GetWebView()->MainFrame(); frame;
115 frame = frame->TraverseNext()) {
116 if (GURL(frame->GetDocument().Url()) == url)
esprehn961779002015-11-16 04:35:13117 return frame;
118 }
119 return nullptr;
120 }
121
[email protected]c1978abe2013-04-23 03:08:12122 // Load web page according to input content and relative URLs within
123 // the document.
124 void LoadContents(const std::string& contents,
125 const GURL& base_url,
126 const WebString encoding_info) {
Daniel Chengd5389712017-05-19 08:36:13127 FrameLoadWaiter waiter(GetRenderView()->GetMainRenderFrame());
[email protected]c1978abe2013-04-23 03:08:12128 // If input encoding is empty, use UTF-8 as default encoding.
Blink Reformat1c4d759e2017-04-09 16:34:54129 if (encoding_info.IsEmpty()) {
130 GetMainFrame()->LoadHTMLString(contents, base_url);
[email protected]c1978abe2013-04-23 03:08:12131 } else {
[email protected]c1978abe2013-04-23 03:08:12132 // Do not use WebFrame.LoadHTMLString because it assumes that input
133 // html contents use UTF-8 encoding.
Daniel Chengd5389712017-05-19 08:36:13134 WebData data(contents.data(), contents.length());
135 GetMainFrame()->LoadData(data, "text/html", encoding_info, base_url);
[email protected]c1978abe2013-04-23 03:08:12136 }
Daniel Chengd5389712017-05-19 08:36:13137 base::MessageLoop::ScopedNestableTaskAllower allow(
138 base::MessageLoop::current());
139 waiter.Wait();
[email protected]c1978abe2013-04-23 03:08:12140 }
141
lukasza777a7dd2016-01-25 23:55:47142 class SingleLinkRewritingDelegate
143 : public WebFrameSerializer::LinkRewritingDelegate {
144 public:
145 SingleLinkRewritingDelegate(const WebURL& url, const WebString& localPath)
146 : url_(url), local_path_(localPath) {}
147
Blink Reformat1c4d759e2017-04-09 16:34:54148 bool RewriteFrameSource(WebFrame* frame,
lukasza777a7dd2016-01-25 23:55:47149 WebString* rewritten_link) override {
150 return false;
151 }
152
Blink Reformat1c4d759e2017-04-09 16:34:54153 bool RewriteLink(const WebURL& url, WebString* rewritten_link) override {
lukasza777a7dd2016-01-25 23:55:47154 if (url != url_)
155 return false;
156
157 *rewritten_link = local_path_;
158 return true;
159 }
160
161 private:
162 const WebURL url_;
163 const WebString local_path_;
164 };
165
lukaszad083d292015-09-30 00:42:44166 // Serialize DOM belonging to a frame with the specified |frame_url|.
167 void SerializeDomForURL(const GURL& frame_url) {
168 // Find corresponding WebFrame according to frame_url.
esprehn961779002015-11-16 04:35:13169 WebFrame* web_frame = FindSubFrameByURL(frame_url);
[email protected]c1978abe2013-04-23 03:08:12170 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54171 WebString file_path = WebString::FromUTF8("c:\\dummy.htm");
lukasza777a7dd2016-01-25 23:55:47172 SingleLinkRewritingDelegate delegate(frame_url, file_path);
[email protected]c1978abe2013-04-23 03:08:12173 // Start serializing DOM.
Blink Reformat1c4d759e2017-04-09 16:34:54174 bool result = WebFrameSerializer::Serialize(web_frame->ToWebLocalFrame(),
lukasza777a7dd2016-01-25 23:55:47175 this, &delegate);
[email protected]c1978abe2013-04-23 03:08:12176 ASSERT_TRUE(result);
[email protected]c1978abe2013-04-23 03:08:12177 }
178
179 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) {
180 // Make sure original contents have document type.
esprehn961779002015-11-16 04:35:13181 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12182 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54183 WebDocument doc = web_frame->GetDocument();
[email protected]c1978abe2013-04-23 03:08:12184 ASSERT_TRUE(HasDocType(doc));
185 // Do serialization.
lukaszad083d292015-09-30 00:42:44186 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12187 // Load the serialized contents.
lukasza87f4e2ab2015-11-04 16:03:03188 ASSERT_TRUE(serialization_reported_end_of_data_);
189 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54190 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12191 // Make sure serialized contents still have document type.
192 web_frame = GetMainFrame();
Blink Reformat1c4d759e2017-04-09 16:34:54193 doc = web_frame->GetDocument();
[email protected]c1978abe2013-04-23 03:08:12194 ASSERT_TRUE(HasDocType(doc));
195 }
196
197 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) {
198 // Make sure original contents do not have document type.
esprehn961779002015-11-16 04:35:13199 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12200 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54201 WebDocument doc = web_frame->GetDocument();
[email protected]c1978abe2013-04-23 03:08:12202 ASSERT_TRUE(!HasDocType(doc));
203 // Do serialization.
lukaszad083d292015-09-30 00:42:44204 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12205 // Load the serialized contents.
lukasza87f4e2ab2015-11-04 16:03:03206 ASSERT_TRUE(serialization_reported_end_of_data_);
207 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54208 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12209 // Make sure serialized contents do not have document type.
210 web_frame = GetMainFrame();
Blink Reformat1c4d759e2017-04-09 16:34:54211 doc = web_frame->GetDocument();
[email protected]c1978abe2013-04-23 03:08:12212 ASSERT_TRUE(!HasDocType(doc));
213 }
214
215 void SerializeXMLDocWithBuiltInEntitiesOnRenderer(
216 const GURL& xml_file_url, const std::string& original_contents) {
217 // Do serialization.
lukaszad083d292015-09-30 00:42:44218 SerializeDomForURL(xml_file_url);
[email protected]c1978abe2013-04-23 03:08:12219 // Compare the serialized contents with original contents.
lukasza87f4e2ab2015-11-04 16:03:03220 ASSERT_TRUE(serialization_reported_end_of_data_);
221 ASSERT_EQ(original_contents, serialized_contents_);
[email protected]c1978abe2013-04-23 03:08:12222 }
223
224 void SerializeHTMLDOMWithAddingMOTWOnRenderer(
[email protected]53625ac2013-04-23 04:28:11225 const GURL& file_url, const std::string& original_contents) {
226 // Make sure original contents does not have MOTW;
227 std::string motw_declaration =
Blink Reformat1c4d759e2017-04-09 16:34:54228 WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8();
[email protected]53625ac2013-04-23 04:28:11229 ASSERT_FALSE(motw_declaration.empty());
230 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
231 // declaration to ASCII and search whether original contents has it or not.
232 ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration));
233
[email protected]c1978abe2013-04-23 03:08:12234 // Do serialization.
lukaszad083d292015-09-30 00:42:44235 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12236 // Make sure the serialized contents have MOTW ;
lukasza87f4e2ab2015-11-04 16:03:03237 ASSERT_TRUE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:12238 ASSERT_FALSE(std::string::npos ==
lukasza87f4e2ab2015-11-04 16:03:03239 serialized_contents_.find(motw_declaration));
[email protected]c1978abe2013-04-23 03:08:12240 }
241
242 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer(
243 const GURL& file_url) {
244 // Make sure there is no META charset declaration in original document.
esprehn961779002015-11-16 04:35:13245 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12246 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54247 WebDocument doc = web_frame->GetDocument();
248 ASSERT_TRUE(doc.IsHTMLDocument());
249 WebElement head_element = doc.Head();
250 ASSERT_TRUE(!head_element.IsNull());
[email protected]c1978abe2013-04-23 03:08:12251 // Go through all children of HEAD element.
Blink Reformat1c4d759e2017-04-09 16:34:54252 WebElementCollection meta_elements =
253 head_element.GetElementsByHTMLTagName("meta");
254 for (WebElement element = meta_elements.FirstItem(); !element.IsNull();
255 element = meta_elements.NextItem()) {
256 ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty());
[email protected]c1978abe2013-04-23 03:08:12257 }
258 // Do serialization.
lukaszad083d292015-09-30 00:42:44259 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12260
261 // Load the serialized contents.
lukasza87f4e2ab2015-11-04 16:03:03262 ASSERT_TRUE(serialization_reported_end_of_data_);
263 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54264 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12265 // Make sure the first child of HEAD element is META which has charset
266 // declaration in serialized contents.
267 web_frame = GetMainFrame();
268 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54269 doc = web_frame->GetDocument();
270 ASSERT_TRUE(doc.IsHTMLDocument());
271 head_element = doc.Head();
272 ASSERT_TRUE(!head_element.IsNull());
273 ASSERT_TRUE(!head_element.FirstChild().IsNull());
274 ASSERT_TRUE(head_element.FirstChild().IsElementNode());
275 WebMetaElement meta_element =
276 head_element.FirstChild().To<WebMetaElement>();
277 ASSERT_EQ(meta_element.ComputeEncoding(),
278 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12279
280 // Make sure no more additional META tags which have charset declaration.
Blink Reformat1c4d759e2017-04-09 16:34:54281 meta_elements = head_element.GetElementsByHTMLTagName("meta");
282 for (WebElement element = meta_elements.FirstItem(); !element.IsNull();
283 element = meta_elements.NextItem()) {
esprehn467dcd92015-12-03 02:06:09284 if (element == meta_element)
285 continue;
Blink Reformat1c4d759e2017-04-09 16:34:54286 ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty());
[email protected]c1978abe2013-04-23 03:08:12287 }
288 }
289
290 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer(
291 const GURL& file_url) {
292 // Make sure there are multiple META charset declarations in original
293 // document.
esprehn961779002015-11-16 04:35:13294 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12295 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54296 WebDocument doc = web_frame->GetDocument();
297 ASSERT_TRUE(doc.IsHTMLDocument());
298 WebElement head_element = doc.Head();
299 ASSERT_TRUE(!head_element.IsNull());
[email protected]c1978abe2013-04-23 03:08:12300 // Go through all children of HEAD element.
301 int charset_declaration_count = 0;
Blink Reformat1c4d759e2017-04-09 16:34:54302 WebElementCollection meta_elements =
303 head_element.GetElementsByHTMLTagName("meta");
304 for (WebElement element = meta_elements.FirstItem(); !element.IsNull();
305 element = meta_elements.NextItem()) {
306 if (!element.To<WebMetaElement>().ComputeEncoding().IsEmpty())
esprehn467dcd92015-12-03 02:06:09307 ++charset_declaration_count;
[email protected]c1978abe2013-04-23 03:08:12308 }
309 // The original doc has more than META tags which have charset declaration.
310 ASSERT_TRUE(charset_declaration_count > 1);
311
312 // Do serialization.
lukaszad083d292015-09-30 00:42:44313 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12314
315 // Load the serialized contents.
lukasza87f4e2ab2015-11-04 16:03:03316 ASSERT_TRUE(serialization_reported_end_of_data_);
317 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54318 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12319 // Make sure only first child of HEAD element is META which has charset
320 // declaration in serialized contents.
321 web_frame = GetMainFrame();
322 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54323 doc = web_frame->GetDocument();
324 ASSERT_TRUE(doc.IsHTMLDocument());
325 head_element = doc.Head();
326 ASSERT_TRUE(!head_element.IsNull());
327 ASSERT_TRUE(!head_element.FirstChild().IsNull());
328 ASSERT_TRUE(head_element.FirstChild().IsElementNode());
329 WebMetaElement meta_element =
330 head_element.FirstChild().To<WebMetaElement>();
331 ASSERT_EQ(meta_element.ComputeEncoding(),
332 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12333
334 // Make sure no more additional META tags which have charset declaration.
Blink Reformat1c4d759e2017-04-09 16:34:54335 meta_elements = head_element.GetElementsByHTMLTagName("meta");
336 for (WebElement element = meta_elements.FirstItem(); !element.IsNull();
337 element = meta_elements.NextItem()) {
esprehn467dcd92015-12-03 02:06:09338 if (element == meta_element)
339 continue;
Blink Reformat1c4d759e2017-04-09 16:34:54340 ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty());
[email protected]c1978abe2013-04-23 03:08:12341 }
342 }
343
344 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() {
345 base::FilePath page_file_path = GetTestFilePath(
346 "dom_serializer", "dom_serializer/htmlentities_in_text.htm");
347 // Get file URL. The URL is dummy URL to identify the following loading
348 // actions. The test content is in constant:original_contents.
349 GURL file_url = net::FilePathToFileURL(page_file_path);
350 ASSERT_TRUE(file_url.SchemeIsFile());
351 // Test contents.
352 static const char* const original_contents =
353 "<html><body>&amp;&lt;&gt;\"\'</body></html>";
354 // Load the test contents.
355 LoadContents(original_contents, file_url, WebString());
356
357 // Get BODY's text content in DOM.
esprehn961779002015-11-16 04:35:13358 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12359 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54360 WebDocument doc = web_frame->GetDocument();
361 ASSERT_TRUE(doc.IsHTMLDocument());
362 WebElement body_ele = doc.Body();
363 ASSERT_TRUE(!body_ele.IsNull());
364 WebNode text_node = body_ele.FirstChild();
365 ASSERT_TRUE(text_node.IsTextNode());
366 ASSERT_TRUE(std::string(text_node.NodeValue().Utf8()) == "&<>\"\'");
[email protected]c1978abe2013-04-23 03:08:12367 // Do serialization.
lukaszad083d292015-09-30 00:42:44368 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12369 // Compare the serialized contents with original contents.
lukasza87f4e2ab2015-11-04 16:03:03370 ASSERT_TRUE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:12371 // Compare the serialized contents with original contents to make sure
372 // they are same.
373 // Because we add MOTW when serializing DOM, so before comparison, we also
374 // need to add MOTW to original_contents.
375 std::string original_str =
Blink Reformat1c4d759e2017-04-09 16:34:54376 WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8();
[email protected]c1978abe2013-04-23 03:08:12377 original_str += original_contents;
378 // Since WebCore now inserts a new HEAD element if there is no HEAD element
379 // when creating BODY element. (Please see
380 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and
381 // corresponding META content if we find WebCore-generated HEAD element.
Blink Reformat1c4d759e2017-04-09 16:34:54382 if (!doc.Head().IsNull()) {
383 WebString encoding = web_frame->GetDocument().Encoding();
[email protected]c1978abe2013-04-23 03:08:12384 std::string htmlTag("<html>");
385 std::string::size_type pos = original_str.find(htmlTag);
386 ASSERT_NE(std::string::npos, pos);
387 pos += htmlTag.length();
388 std::string head_part("<head>");
389 head_part +=
Blink Reformat1c4d759e2017-04-09 16:34:54390 WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8();
[email protected]c1978abe2013-04-23 03:08:12391 head_part += "</head>";
392 original_str.insert(pos, head_part);
393 }
lukasza87f4e2ab2015-11-04 16:03:03394 ASSERT_EQ(original_str, serialized_contents_);
[email protected]c1978abe2013-04-23 03:08:12395 }
396
397 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() {
398 base::FilePath page_file_path = GetTestFilePath(
399 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm");
400 // Get file URL. The URL is dummy URL to identify the following loading
401 // actions. The test content is in constant:original_contents.
402 GURL file_url = net::FilePathToFileURL(page_file_path);
403 ASSERT_TRUE(file_url.SchemeIsFile());
404 // Test contents.
405 static const char* const original_contents =
tkent99b65112015-08-17 03:05:07406 "<html><body title=\"&amp;&lt;&gt;&quot;&#39;\"></body></html>";
[email protected]c1978abe2013-04-23 03:08:12407 // Load the test contents.
408 LoadContents(original_contents, file_url, WebString());
409 // Get value of BODY's title attribute in DOM.
esprehn961779002015-11-16 04:35:13410 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12411 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54412 WebDocument doc = web_frame->GetDocument();
413 ASSERT_TRUE(doc.IsHTMLDocument());
414 WebElement body_ele = doc.Body();
415 ASSERT_TRUE(!body_ele.IsNull());
416 WebString value = body_ele.GetAttribute("title");
417 ASSERT_TRUE(std::string(value.Utf8()) == "&<>\"\'");
[email protected]c1978abe2013-04-23 03:08:12418 // Do serialization.
lukaszad083d292015-09-30 00:42:44419 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12420 // Compare the serialized contents with original contents.
lukasza87f4e2ab2015-11-04 16:03:03421 ASSERT_TRUE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:12422 // Compare the serialized contents with original contents to make sure
423 // they are same.
424 std::string original_str =
Blink Reformat1c4d759e2017-04-09 16:34:54425 WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8();
[email protected]c1978abe2013-04-23 03:08:12426 original_str += original_contents;
Blink Reformat1c4d759e2017-04-09 16:34:54427 if (!doc.IsNull()) {
428 WebString encoding = web_frame->GetDocument().Encoding();
[email protected]c1978abe2013-04-23 03:08:12429 std::string htmlTag("<html>");
430 std::string::size_type pos = original_str.find(htmlTag);
431 ASSERT_NE(std::string::npos, pos);
432 pos += htmlTag.length();
433 std::string head_part("<head>");
434 head_part +=
Blink Reformat1c4d759e2017-04-09 16:34:54435 WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8();
[email protected]c1978abe2013-04-23 03:08:12436 head_part += "</head>";
437 original_str.insert(pos, head_part);
438 }
lukasza87f4e2ab2015-11-04 16:03:03439 ASSERT_EQ(original_str, serialized_contents_);
[email protected]c1978abe2013-04-23 03:08:12440 }
441
442 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) {
443 // Get value of BODY's title attribute in DOM.
esprehn961779002015-11-16 04:35:13444 WebFrame* web_frame = FindSubFrameByURL(file_url);
Blink Reformat1c4d759e2017-04-09 16:34:54445 WebDocument doc = web_frame->GetDocument();
446 ASSERT_TRUE(doc.IsHTMLDocument());
447 WebElement body_element = doc.Body();
[email protected]c1978abe2013-04-23 03:08:12448 // Unescaped string for "&percnt;&nsup;&sup1;&apos;".
449 static const wchar_t parsed_value[] = {
450 '%', 0x2285, 0x00b9, '\'', 0
451 };
Blink Reformat1c4d759e2017-04-09 16:34:54452 WebString value = body_element.GetAttribute("title");
453 WebString content = doc.ContentAsTextForTesting();
454 ASSERT_TRUE(base::UTF16ToWide(value.Utf16()) == parsed_value);
455 ASSERT_TRUE(base::UTF16ToWide(content.Utf16()) == parsed_value);
[email protected]c1978abe2013-04-23 03:08:12456
457 // Do serialization.
lukaszad083d292015-09-30 00:42:44458 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12459 // Check the serialized string.
lukasza87f4e2ab2015-11-04 16:03:03460 ASSERT_TRUE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:12461 // Confirm that the serialized string has no non-standard HTML entities.
lukasza87f4e2ab2015-11-04 16:03:03462 ASSERT_EQ(std::string::npos, serialized_contents_.find("&percnt;"));
463 ASSERT_EQ(std::string::npos, serialized_contents_.find("&nsup;"));
464 ASSERT_EQ(std::string::npos, serialized_contents_.find("&sup1;"));
465 ASSERT_EQ(std::string::npos, serialized_contents_.find("&apos;"));
[email protected]c1978abe2013-04-23 03:08:12466 }
467
468 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url,
469 const GURL& path_dir_url) {
470 // There are total 2 available base tags in this test file.
471 const int kTotalBaseTagCountInTestFile = 2;
472
473 // Since for this test, we assume there is no savable sub-resource links for
474 // this test file, also all links are relative URLs in this test file, so we
475 // need to check those relative URLs and make sure document has BASE tag.
esprehn961779002015-11-16 04:35:13476 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12477 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54478 WebDocument doc = web_frame->GetDocument();
479 ASSERT_TRUE(doc.IsHTMLDocument());
[email protected]c1978abe2013-04-23 03:08:12480 // Go through all descent nodes.
Blink Reformat1c4d759e2017-04-09 16:34:54481 WebElementCollection all = doc.All();
[email protected]c1978abe2013-04-23 03:08:12482 int original_base_tag_count = 0;
Blink Reformat1c4d759e2017-04-09 16:34:54483 for (WebElement element = all.FirstItem(); !element.IsNull();
484 element = all.NextItem()) {
485 if (element.HasHTMLTagName("base")) {
[email protected]c1978abe2013-04-23 03:08:12486 original_base_tag_count++;
487 } else {
488 // Get link.
[email protected]12a936d2013-05-15 04:55:49489 WebString value = GetSubResourceLinkFromElement(element);
Blink Reformat1c4d759e2017-04-09 16:34:54490 if (value.IsNull() && element.HasHTMLTagName("a")) {
491 value = element.GetAttribute("href");
492 if (value.IsEmpty())
[email protected]c1978abe2013-04-23 03:08:12493 value = WebString();
494 }
495 // Each link is relative link.
Blink Reformat1c4d759e2017-04-09 16:34:54496 if (!value.IsNull()) {
497 GURL link(value.Utf8());
[email protected]c1978abe2013-04-23 03:08:12498 ASSERT_TRUE(link.scheme().empty());
499 }
500 }
501 }
502 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
503 // Make sure in original document, the base URL is not equal with the
504 // |path_dir_url|.
Blink Reformat1c4d759e2017-04-09 16:34:54505 GURL original_base_url(doc.BaseURL());
[email protected]c1978abe2013-04-23 03:08:12506 ASSERT_NE(original_base_url, path_dir_url);
507
508 // Do serialization.
lukaszad083d292015-09-30 00:42:44509 SerializeDomForURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12510
511 // Load the serialized contents.
lukasza87f4e2ab2015-11-04 16:03:03512 ASSERT_TRUE(serialization_reported_end_of_data_);
513 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54514 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12515
516 // Make sure all links are absolute URLs and doc there are some number of
517 // BASE tags in serialized HTML data. Each of those BASE tags have same base
518 // URL which is as same as URL of current test file.
519 web_frame = GetMainFrame();
520 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54521 doc = web_frame->GetDocument();
522 ASSERT_TRUE(doc.IsHTMLDocument());
[email protected]c1978abe2013-04-23 03:08:12523 // Go through all descent nodes.
Blink Reformat1c4d759e2017-04-09 16:34:54524 all = doc.All();
[email protected]c1978abe2013-04-23 03:08:12525 int new_base_tag_count = 0;
Blink Reformat1c4d759e2017-04-09 16:34:54526 for (WebNode node = all.FirstItem(); !node.IsNull();
527 node = all.NextItem()) {
528 if (!node.IsElementNode())
[email protected]c1978abe2013-04-23 03:08:12529 continue;
Blink Reformat1c4d759e2017-04-09 16:34:54530 WebElement element = node.To<WebElement>();
531 if (element.HasHTMLTagName("base")) {
[email protected]c1978abe2013-04-23 03:08:12532 new_base_tag_count++;
533 } else {
534 // Get link.
[email protected]12a936d2013-05-15 04:55:49535 WebString value = GetSubResourceLinkFromElement(element);
Blink Reformat1c4d759e2017-04-09 16:34:54536 if (value.IsNull() && element.HasHTMLTagName("a")) {
537 value = element.GetAttribute("href");
538 if (value.IsEmpty())
[email protected]c1978abe2013-04-23 03:08:12539 value = WebString();
540 }
541 // Each link is absolute link.
Blink Reformat1c4d759e2017-04-09 16:34:54542 if (!value.IsNull()) {
543 GURL link(std::string(value.Utf8()));
[email protected]c1978abe2013-04-23 03:08:12544 ASSERT_FALSE(link.scheme().empty());
545 }
546 }
547 }
tkent99b65112015-08-17 03:05:07548 // We have one more added BASE tag which is generated by JavaScript.
549 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1);
[email protected]c1978abe2013-04-23 03:08:12550 // Make sure in new document, the base URL is equal with the |path_dir_url|.
Blink Reformat1c4d759e2017-04-09 16:34:54551 GURL new_base_url(doc.BaseURL());
[email protected]c1978abe2013-04-23 03:08:12552 ASSERT_EQ(new_base_url, path_dir_url);
553 }
554
555 void SerializeHTMLDOMWithEmptyHeadOnRenderer() {
556 base::FilePath page_file_path = GetTestFilePath(
557 "dom_serializer", "empty_head.htm");
558 GURL file_url = net::FilePathToFileURL(page_file_path);
559 ASSERT_TRUE(file_url.SchemeIsFile());
560
561 // Load the test html content.
562 static const char* const empty_head_contents =
563 "<html><head></head><body>hello world</body></html>";
564 LoadContents(empty_head_contents, file_url, WebString());
565
566 // Make sure the head tag is empty.
567 WebFrame* web_frame = GetMainFrame();
568 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54569 WebDocument doc = web_frame->GetDocument();
570 ASSERT_TRUE(doc.IsHTMLDocument());
571 WebElement head_element = doc.Head();
572 ASSERT_TRUE(!head_element.IsNull());
573 ASSERT_TRUE(head_element.FirstChild().IsNull());
[email protected]c1978abe2013-04-23 03:08:12574
575 // Do serialization.
lukaszad083d292015-09-30 00:42:44576 SerializeDomForURL(file_url);
lukasza87f4e2ab2015-11-04 16:03:03577 ASSERT_TRUE(serialization_reported_end_of_data_);
[email protected]c1978abe2013-04-23 03:08:12578
579 // Reload serialized contents and make sure there is only one META tag.
lukasza87f4e2ab2015-11-04 16:03:03580 LoadContents(serialized_contents_, file_url,
Blink Reformat1c4d759e2017-04-09 16:34:54581 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12582 web_frame = GetMainFrame();
583 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54584 doc = web_frame->GetDocument();
585 ASSERT_TRUE(doc.IsHTMLDocument());
586 head_element = doc.Head();
587 ASSERT_TRUE(!head_element.IsNull());
588 ASSERT_TRUE(!head_element.FirstChild().IsNull());
589 ASSERT_TRUE(head_element.FirstChild().IsElementNode());
590 ASSERT_TRUE(head_element.FirstChild().NextSibling().IsNull());
591 WebMetaElement meta_element =
592 head_element.FirstChild().To<WebMetaElement>();
593 ASSERT_EQ(meta_element.ComputeEncoding(),
594 web_frame->GetDocument().Encoding());
[email protected]c1978abe2013-04-23 03:08:12595
596 // Check the body's first node is text node and its contents are
597 // "hello world"
Blink Reformat1c4d759e2017-04-09 16:34:54598 WebElement body_element = doc.Body();
599 ASSERT_TRUE(!body_element.IsNull());
600 WebNode text_node = body_element.FirstChild();
601 ASSERT_TRUE(text_node.IsTextNode());
602 ASSERT_EQ("hello world", text_node.NodeValue());
[email protected]c1978abe2013-04-23 03:08:12603 }
604
[email protected]c1978abe2013-04-23 03:08:12605 void SubResourceForElementsInNonHTMLNamespaceOnRenderer(
606 const GURL& file_url) {
esprehn961779002015-11-16 04:35:13607 WebFrame* web_frame = FindSubFrameByURL(file_url);
[email protected]c1978abe2013-04-23 03:08:12608 ASSERT_TRUE(web_frame != NULL);
Blink Reformat1c4d759e2017-04-09 16:34:54609 WebDocument doc = web_frame->GetDocument();
610 WebNode lastNodeInBody = doc.Body().LastChild();
611 ASSERT_TRUE(lastNodeInBody.IsElementNode());
612 WebString uri =
613 GetSubResourceLinkFromElement(lastNodeInBody.To<WebElement>());
614 EXPECT_TRUE(uri.IsNull());
[email protected]c1978abe2013-04-23 03:08:12615 }
616
617 private:
avi1023d012015-12-25 02:39:14618 int32_t render_view_routing_id_;
lukasza87f4e2ab2015-11-04 16:03:03619 std::string serialized_contents_;
620 bool serialization_reported_end_of_data_;
[email protected]c1978abe2013-04-23 03:08:12621};
622
623// If original contents have document type, the serialized contents also have
624// document type.
tsergeant97b442d2016-03-10 05:46:04625// Disabled on OSX by ellyjones@ on 2015-05-18, see https://crbug.com/488495,
626// on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575
ellyjones15a15d2d2015-05-18 21:26:37627
628IN_PROC_BROWSER_TEST_F(DomSerializerTests,
tsergeant97b442d2016-03-10 05:46:04629 DISABLED_SerializeHTMLDOMWithDocType) {
[email protected]c1978abe2013-04-23 03:08:12630 base::FilePath page_file_path =
631 GetTestFilePath("dom_serializer", "youtube_1.htm");
632 GURL file_url = net::FilePathToFileURL(page_file_path);
633 ASSERT_TRUE(file_url.SchemeIsFile());
634 // Load the test file.
635 NavigateToURL(shell(), file_url);
636
637 PostTaskToInProcessRendererAndWait(
638 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer,
639 base::Unretained(this), file_url));
640}
641
642// If original contents do not have document type, the serialized contents
643// also do not have document type.
644IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) {
645 base::FilePath page_file_path =
646 GetTestFilePath("dom_serializer", "youtube_2.htm");
647 GURL file_url = net::FilePathToFileURL(page_file_path);
648 ASSERT_TRUE(file_url.SchemeIsFile());
649 // Load the test file.
650 NavigateToURL(shell(), file_url);
651
652 PostTaskToInProcessRendererAndWait(
653 base::Bind(
654 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer,
655 base::Unretained(this), file_url));
656}
657
658// Serialize XML document which has all 5 built-in entities. After
659// finishing serialization, the serialized contents should be same
660// with original XML document.
tkent99b65112015-08-17 03:05:07661IN_PROC_BROWSER_TEST_F(DomSerializerTests,
lukaszaa5bb0172016-01-12 19:53:15662 SerializeXMLDocWithBuiltInEntities) {
[email protected]c1978abe2013-04-23 03:08:12663 base::FilePath page_file_path =
664 GetTestFilePath("dom_serializer", "note.html");
665 base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml");
lukasza7947ccd2016-07-28 21:56:25666
[email protected]c1978abe2013-04-23 03:08:12667 std::string original_contents;
lukasza7947ccd2016-07-28 21:56:25668 {
669 // Read original contents for later comparison.
670 base::ThreadRestrictions::ScopedAllowIO allow_io_for_test_verifications;
671 ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents));
672 }
673
[email protected]c1978abe2013-04-23 03:08:12674 // Get file URL.
675 GURL file_url = net::FilePathToFileURL(page_file_path);
676 GURL xml_file_url = net::FilePathToFileURL(xml_file_path);
677 ASSERT_TRUE(file_url.SchemeIsFile());
lukasza7947ccd2016-07-28 21:56:25678
[email protected]c1978abe2013-04-23 03:08:12679 // Load the test file.
680 NavigateToURL(shell(), file_url);
681
682 PostTaskToInProcessRendererAndWait(
683 base::Bind(
684 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer,
685 base::Unretained(this), xml_file_url, original_contents));
686}
687
688// When serializing DOM, we add MOTW declaration before html tag.
689IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) {
690 base::FilePath page_file_path =
691 GetTestFilePath("dom_serializer", "youtube_2.htm");
lukasza7947ccd2016-07-28 21:56:25692
[email protected]c1978abe2013-04-23 03:08:12693 std::string original_contents;
lukasza7947ccd2016-07-28 21:56:25694 {
695 // Read original contents for later comparison .
696 base::ThreadRestrictions::ScopedAllowIO allow_io_for_test_verifications;
697 ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents));
698 }
699
[email protected]c1978abe2013-04-23 03:08:12700 // Get file URL.
701 GURL file_url = net::FilePathToFileURL(page_file_path);
702 ASSERT_TRUE(file_url.SchemeIsFile());
[email protected]53625ac2013-04-23 04:28:11703
[email protected]c1978abe2013-04-23 03:08:12704 // Load the test file.
705 NavigateToURL(shell(), file_url);
706
707 PostTaskToInProcessRendererAndWait(
708 base::Bind(
709 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer,
[email protected]53625ac2013-04-23 04:28:11710 base::Unretained(this), file_url, original_contents));
[email protected]c1978abe2013-04-23 03:08:12711}
712
713// When serializing DOM, we will add the META which have correct charset
714// declaration as first child of HEAD element for resolving WebKit bug:
715// http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
716// does not have META charset declaration.
tsergeant97b442d2016-03-10 05:46:04717// Disabled on OSX by battre@ on 2015-05-21, see https://crbug.com/488495,
718// on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575
719IN_PROC_BROWSER_TEST_F(
720 DomSerializerTests,
721 DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
[email protected]c1978abe2013-04-23 03:08:12722 base::FilePath page_file_path =
723 GetTestFilePath("dom_serializer", "youtube_1.htm");
724 // Get file URL.
725 GURL file_url = net::FilePathToFileURL(page_file_path);
726 ASSERT_TRUE(file_url.SchemeIsFile());
727 // Load the test file.
728 NavigateToURL(shell(), file_url);
729
730 PostTaskToInProcessRendererAndWait(
731 base::Bind(
732 &DomSerializerTests::
733 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer,
734 base::Unretained(this), file_url));
735}
736
737// When serializing DOM, if the original document has multiple META charset
738// declaration, we will add the META which have correct charset declaration
739// as first child of HEAD element and remove all original META charset
740// declarations.
741IN_PROC_BROWSER_TEST_F(DomSerializerTests,
742 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) {
743 base::FilePath page_file_path =
744 GetTestFilePath("dom_serializer", "youtube_2.htm");
745 // Get file URL.
746 GURL file_url = net::FilePathToFileURL(page_file_path);
747 ASSERT_TRUE(file_url.SchemeIsFile());
748 // Load the test file.
749 NavigateToURL(shell(), file_url);
750
751 PostTaskToInProcessRendererAndWait(
752 base::Bind(
753 &DomSerializerTests::
754 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer,
755 base::Unretained(this), file_url));
756}
757
758// Test situation of html entities in text when serializing HTML DOM.
759IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) {
760 // Need to spin up the renderer and also navigate to a file url so that the
761 // renderer code doesn't attempt a fork when it sees a load to file scheme
762 // from non-file scheme.
763 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
764
765 PostTaskToInProcessRendererAndWait(
766 base::Bind(
767 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer,
768 base::Unretained(this)));
769}
770
771// Test situation of html entities in attribute value when serializing
772// HTML DOM.
773// This test started to fail at WebKit r65388. See http://crbug.com/52279.
774IN_PROC_BROWSER_TEST_F(DomSerializerTests,
lukaszaa5bb0172016-01-12 19:53:15775 SerializeHTMLDOMWithEntitiesInAttributeValue) {
[email protected]c1978abe2013-04-23 03:08:12776 // Need to spin up the renderer and also navigate to a file url so that the
777 // renderer code doesn't attempt a fork when it sees a load to file scheme
778 // from non-file scheme.
779 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
780
781 PostTaskToInProcessRendererAndWait(
782 base::Bind(
783 &DomSerializerTests::
784 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer,
785 base::Unretained(this)));
786}
787
788// Test situation of non-standard HTML entities when serializing HTML DOM.
789// This test started to fail at WebKit r65351. See http://crbug.com/52279.
790IN_PROC_BROWSER_TEST_F(DomSerializerTests,
791 SerializeHTMLDOMWithNonStandardEntities) {
792 // Make a test file URL and load it.
793 base::FilePath page_file_path = GetTestFilePath(
794 "dom_serializer", "nonstandard_htmlentities.htm");
795 GURL file_url = net::FilePathToFileURL(page_file_path);
796 NavigateToURL(shell(), file_url);
797
798 PostTaskToInProcessRendererAndWait(
799 base::Bind(
800 &DomSerializerTests::
801 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer,
802 base::Unretained(this), file_url));
803}
804
805// Test situation of BASE tag in original document when serializing HTML DOM.
806// When serializing, we should comment the BASE tag, append a new BASE tag.
807// rewrite all the savable URLs to relative local path, and change other URLs
808// to absolute URLs.
tkent99b65112015-08-17 03:05:07809IN_PROC_BROWSER_TEST_F(DomSerializerTests,
lukaszaa5bb0172016-01-12 19:53:15810 SerializeHTMLDOMWithBaseTag) {
[email protected]c1978abe2013-04-23 03:08:12811 base::FilePath page_file_path = GetTestFilePath(
812 "dom_serializer", "html_doc_has_base_tag.htm");
813
814 // Get page dir URL which is base URL of this file.
815 base::FilePath dir_name = page_file_path.DirName();
816 dir_name = dir_name.Append(
817 base::FilePath::StringType(base::FilePath::kSeparators[0], 1));
818 GURL path_dir_url = net::FilePathToFileURL(dir_name);
819
820 // Get file URL.
821 GURL file_url = net::FilePathToFileURL(page_file_path);
822 ASSERT_TRUE(file_url.SchemeIsFile());
823 // Load the test file.
824 NavigateToURL(shell(), file_url);
825
826 PostTaskToInProcessRendererAndWait(
827 base::Bind(
828 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer,
829 base::Unretained(this), file_url, path_dir_url));
830}
831
832// Serializing page which has an empty HEAD tag.
[email protected]185d5012014-06-10 22:01:34833IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) {
[email protected]c1978abe2013-04-23 03:08:12834 // Need to spin up the renderer and also navigate to a file url so that the
835 // renderer code doesn't attempt a fork when it sees a load to file scheme
836 // from non-file scheme.
837 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
838
839 PostTaskToInProcessRendererAndWait(
840 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer,
841 base::Unretained(this)));
842}
843
[email protected]c1978abe2013-04-23 03:08:12844IN_PROC_BROWSER_TEST_F(DomSerializerTests,
845 SubResourceForElementsInNonHTMLNamespace) {
846 base::FilePath page_file_path = GetTestFilePath(
847 "dom_serializer", "non_html_namespace.htm");
848 GURL file_url = net::FilePathToFileURL(page_file_path);
849 NavigateToURL(shell(), file_url);
850
851 PostTaskToInProcessRendererAndWait(
852 base::Bind(
853 &DomSerializerTests::
854 SubResourceForElementsInNonHTMLNamespaceOnRenderer,
855 base::Unretained(this), file_url));
856}
857
[email protected]c1978abe2013-04-23 03:08:12858} // namespace content