[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 5 | #include <stddef.h> |
| 6 | #include <stdint.h> |
| 7 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 8 | #include "base/bind.h" |
| 9 | #include "base/command_line.h" |
| 10 | #include "base/compiler_specific.h" |
[email protected] | 14c1c23 | 2013-06-11 17:52:44 | [diff] [blame] | 11 | #include "base/containers/hash_tables.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 12 | #include "base/files/file_path.h" |
thestig | b7aad54f | 2014-09-05 18:25:39 | [diff] [blame] | 13 | #include "base/files/file_util.h" |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 14 | #include "base/message_loop/message_loop.h" |
[email protected] | 21aa9968 | 2013-06-11 07:17:01 | [diff] [blame] | 15 | #include "base/strings/string_util.h" |
[email protected] | 74ebfb1 | 2013-06-07 20:48:00 | [diff] [blame] | 16 | #include "base/strings/utf_string_conversions.h" |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 17 | #include "base/threading/thread_restrictions.h" |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 18 | #include "build/build_config.h" |
dcheng | 8b5e302 | 2015-09-02 23:58:55 | [diff] [blame] | 19 | #include "content/public/browser/render_view_host.h" |
| 20 | #include "content/public/browser/web_contents.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 21 | #include "content/public/common/content_switches.h" |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 22 | #include "content/public/renderer/render_frame.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 23 | #include "content/public/renderer/render_view.h" |
[email protected] | 6e9def1 | 2014-03-27 20:23:28 | [diff] [blame] | 24 | #include "content/public/test/content_browser_test.h" |
| 25 | #include "content/public/test/content_browser_test_utils.h" |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 26 | #include "content/public/test/frame_load_waiter.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 27 | #include "content/public/test/test_utils.h" |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 28 | #include "content/renderer/savable_resources.h" |
[email protected] | de7d61ff | 2013-08-20 11:30:41 | [diff] [blame] | 29 | #include "content/shell/browser/shell.h" |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 30 | #include "net/base/filename_util.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 31 | #include "net/url_request/url_request_context.h" |
[email protected] | de7d61ff | 2013-08-20 11:30:41 | [diff] [blame] | 32 | #include "third_party/WebKit/public/platform/WebCString.h" |
| 33 | #include "third_party/WebKit/public/platform/WebData.h" |
| 34 | #include "third_party/WebKit/public/platform/WebString.h" |
| 35 | #include "third_party/WebKit/public/platform/WebURL.h" |
| 36 | #include "third_party/WebKit/public/platform/WebVector.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 37 | #include "third_party/WebKit/public/web/WebDocument.h" |
| 38 | #include "third_party/WebKit/public/web/WebElement.h" |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 39 | #include "third_party/WebKit/public/web/WebElementCollection.h" |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 40 | #include "third_party/WebKit/public/web/WebFrameSerializer.h" |
| 41 | #include "third_party/WebKit/public/web/WebFrameSerializerClient.h" |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 42 | #include "third_party/WebKit/public/web/WebLocalFrame.h" |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 43 | #include "third_party/WebKit/public/web/WebMetaElement.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 44 | #include "third_party/WebKit/public/web/WebNode.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 45 | #include "third_party/WebKit/public/web/WebView.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 46 | |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 47 | using blink::WebCString; |
| 48 | using blink::WebData; |
| 49 | using blink::WebDocument; |
| 50 | using blink::WebElement; |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 51 | using blink::WebMetaElement; |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 52 | using blink::WebElementCollection; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 53 | using blink::WebFrame; |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 54 | using blink::WebFrameSerializer; |
| 55 | using blink::WebFrameSerializerClient; |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 56 | using blink::WebLocalFrame; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 57 | using blink::WebNode; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 58 | using blink::WebString; |
| 59 | using blink::WebURL; |
| 60 | using blink::WebView; |
| 61 | using blink::WebVector; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 62 | |
| 63 | namespace content { |
| 64 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 65 | bool HasDocType(const WebDocument& doc) { |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 66 | return doc.FirstChild().IsDocumentTypeNode(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 67 | } |
| 68 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 69 | class DomSerializerTests : public ContentBrowserTest, |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 70 | public WebFrameSerializerClient { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 71 | public: |
lukasza | 4a96a1f0 | 2015-12-09 16:46:34 | [diff] [blame] | 72 | DomSerializerTests() : serialization_reported_end_of_data_(false) {} |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 73 | |
avi | 83883c8 | 2014-12-23 00:08:49 | [diff] [blame] | 74 | void SetUpCommandLine(base::CommandLine* command_line) override { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 75 | command_line->AppendSwitch(switches::kSingleProcess); |
[email protected] | 0c2c0d1 | 2014-02-04 17:24:43 | [diff] [blame] | 76 | #if defined(OS_WIN) |
[email protected] | 5d97f76 | 2013-04-23 06:15:49 | [diff] [blame] | 77 | // Don't want to try to create a GPU process. |
[email protected] | 7af65ca | 2014-04-16 20:25:56 | [diff] [blame] | 78 | command_line->AppendSwitch(switches::kDisableGpu); |
[email protected] | 5d97f76 | 2013-04-23 06:15:49 | [diff] [blame] | 79 | #endif |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 80 | } |
| 81 | |
dcheng | 03226bd | 2015-09-03 17:36:23 | [diff] [blame] | 82 | void SetUpOnMainThread() override { |
| 83 | render_view_routing_id_ = |
| 84 | shell()->web_contents()->GetRenderViewHost()->GetRoutingID(); |
| 85 | } |
| 86 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 87 | // DomSerializerDelegate. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 88 | void DidSerializeDataForFrame(const WebCString& data, |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 89 | FrameSerializationStatus status) override { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 90 | // Check finish status of current frame. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 91 | ASSERT_FALSE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 92 | |
| 93 | // Add data to corresponding frame's content. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 94 | serialized_contents_ += data; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 95 | |
| 96 | // Current frame is completed saving, change the finish status. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 97 | if (status == WebFrameSerializerClient::kCurrentFrameIsFinished) |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 98 | serialization_reported_end_of_data_ = true; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | RenderView* GetRenderView() { |
dcheng | 03226bd | 2015-09-03 17:36:23 | [diff] [blame] | 102 | return RenderView::FromRoutingID(render_view_routing_id_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 103 | } |
| 104 | |
| 105 | WebView* GetWebView() { |
| 106 | return GetRenderView()->GetWebView(); |
| 107 | } |
| 108 | |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 109 | WebLocalFrame* GetMainFrame() { |
| 110 | return GetRenderView()->GetMainRenderFrame()->GetWebFrame(); |
| 111 | } |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 112 | |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 113 | WebFrame* FindSubFrameByURL(const GURL& url) { |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 114 | for (WebFrame* frame = GetWebView()->MainFrame(); frame; |
| 115 | frame = frame->TraverseNext()) { |
| 116 | if (GURL(frame->GetDocument().Url()) == url) |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 117 | return frame; |
| 118 | } |
| 119 | return nullptr; |
| 120 | } |
| 121 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 122 | // Load web page according to input content and relative URLs within |
| 123 | // the document. |
| 124 | void LoadContents(const std::string& contents, |
| 125 | const GURL& base_url, |
| 126 | const WebString encoding_info) { |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 127 | FrameLoadWaiter waiter(GetRenderView()->GetMainRenderFrame()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 128 | // If input encoding is empty, use UTF-8 as default encoding. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 129 | if (encoding_info.IsEmpty()) { |
| 130 | GetMainFrame()->LoadHTMLString(contents, base_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 131 | } else { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 132 | // Do not use WebFrame.LoadHTMLString because it assumes that input |
| 133 | // html contents use UTF-8 encoding. |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 134 | WebData data(contents.data(), contents.length()); |
| 135 | GetMainFrame()->LoadData(data, "text/html", encoding_info, base_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 136 | } |
Daniel Cheng | d538971 | 2017-05-19 08:36:13 | [diff] [blame] | 137 | base::MessageLoop::ScopedNestableTaskAllower allow( |
| 138 | base::MessageLoop::current()); |
| 139 | waiter.Wait(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 140 | } |
| 141 | |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 142 | class SingleLinkRewritingDelegate |
| 143 | : public WebFrameSerializer::LinkRewritingDelegate { |
| 144 | public: |
| 145 | SingleLinkRewritingDelegate(const WebURL& url, const WebString& localPath) |
| 146 | : url_(url), local_path_(localPath) {} |
| 147 | |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 148 | bool RewriteFrameSource(WebFrame* frame, |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 149 | WebString* rewritten_link) override { |
| 150 | return false; |
| 151 | } |
| 152 | |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 153 | bool RewriteLink(const WebURL& url, WebString* rewritten_link) override { |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 154 | if (url != url_) |
| 155 | return false; |
| 156 | |
| 157 | *rewritten_link = local_path_; |
| 158 | return true; |
| 159 | } |
| 160 | |
| 161 | private: |
| 162 | const WebURL url_; |
| 163 | const WebString local_path_; |
| 164 | }; |
| 165 | |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 166 | // Serialize DOM belonging to a frame with the specified |frame_url|. |
| 167 | void SerializeDomForURL(const GURL& frame_url) { |
| 168 | // Find corresponding WebFrame according to frame_url. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 169 | WebFrame* web_frame = FindSubFrameByURL(frame_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 170 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 171 | WebString file_path = WebString::FromUTF8("c:\\dummy.htm"); |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 172 | SingleLinkRewritingDelegate delegate(frame_url, file_path); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 173 | // Start serializing DOM. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 174 | bool result = WebFrameSerializer::Serialize(web_frame->ToWebLocalFrame(), |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 175 | this, &delegate); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 176 | ASSERT_TRUE(result); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) { |
| 180 | // Make sure original contents have document type. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 181 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 182 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 183 | WebDocument doc = web_frame->GetDocument(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 184 | ASSERT_TRUE(HasDocType(doc)); |
| 185 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 186 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 187 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 188 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 189 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 190 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 191 | // Make sure serialized contents still have document type. |
| 192 | web_frame = GetMainFrame(); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 193 | doc = web_frame->GetDocument(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 194 | ASSERT_TRUE(HasDocType(doc)); |
| 195 | } |
| 196 | |
| 197 | void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) { |
| 198 | // Make sure original contents do not have document type. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 199 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 200 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 201 | WebDocument doc = web_frame->GetDocument(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 202 | ASSERT_TRUE(!HasDocType(doc)); |
| 203 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 204 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 205 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 206 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 207 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 208 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 209 | // Make sure serialized contents do not have document type. |
| 210 | web_frame = GetMainFrame(); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 211 | doc = web_frame->GetDocument(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 212 | ASSERT_TRUE(!HasDocType(doc)); |
| 213 | } |
| 214 | |
| 215 | void SerializeXMLDocWithBuiltInEntitiesOnRenderer( |
| 216 | const GURL& xml_file_url, const std::string& original_contents) { |
| 217 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 218 | SerializeDomForURL(xml_file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 219 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 220 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 221 | ASSERT_EQ(original_contents, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 222 | } |
| 223 | |
| 224 | void SerializeHTMLDOMWithAddingMOTWOnRenderer( |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 225 | const GURL& file_url, const std::string& original_contents) { |
| 226 | // Make sure original contents does not have MOTW; |
| 227 | std::string motw_declaration = |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 228 | WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 229 | ASSERT_FALSE(motw_declaration.empty()); |
| 230 | // The encoding of original contents is ISO-8859-1, so we convert the MOTW |
| 231 | // declaration to ASCII and search whether original contents has it or not. |
| 232 | ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); |
| 233 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 234 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 235 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 236 | // Make sure the serialized contents have MOTW ; |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 237 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 238 | ASSERT_FALSE(std::string::npos == |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 239 | serialized_contents_.find(motw_declaration)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 240 | } |
| 241 | |
| 242 | void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer( |
| 243 | const GURL& file_url) { |
| 244 | // Make sure there is no META charset declaration in original document. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 245 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 246 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 247 | WebDocument doc = web_frame->GetDocument(); |
| 248 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 249 | WebElement head_element = doc.Head(); |
| 250 | ASSERT_TRUE(!head_element.IsNull()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 251 | // Go through all children of HEAD element. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 252 | WebElementCollection meta_elements = |
| 253 | head_element.GetElementsByHTMLTagName("meta"); |
| 254 | for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); |
| 255 | element = meta_elements.NextItem()) { |
| 256 | ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 257 | } |
| 258 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 259 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 260 | |
| 261 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 262 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 263 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 264 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 265 | // Make sure the first child of HEAD element is META which has charset |
| 266 | // declaration in serialized contents. |
| 267 | web_frame = GetMainFrame(); |
| 268 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 269 | doc = web_frame->GetDocument(); |
| 270 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 271 | head_element = doc.Head(); |
| 272 | ASSERT_TRUE(!head_element.IsNull()); |
| 273 | ASSERT_TRUE(!head_element.FirstChild().IsNull()); |
| 274 | ASSERT_TRUE(head_element.FirstChild().IsElementNode()); |
| 275 | WebMetaElement meta_element = |
| 276 | head_element.FirstChild().To<WebMetaElement>(); |
| 277 | ASSERT_EQ(meta_element.ComputeEncoding(), |
| 278 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 279 | |
| 280 | // Make sure no more additional META tags which have charset declaration. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 281 | meta_elements = head_element.GetElementsByHTMLTagName("meta"); |
| 282 | for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); |
| 283 | element = meta_elements.NextItem()) { |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 284 | if (element == meta_element) |
| 285 | continue; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 286 | ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 287 | } |
| 288 | } |
| 289 | |
| 290 | void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer( |
| 291 | const GURL& file_url) { |
| 292 | // Make sure there are multiple META charset declarations in original |
| 293 | // document. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 294 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 295 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 296 | WebDocument doc = web_frame->GetDocument(); |
| 297 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 298 | WebElement head_element = doc.Head(); |
| 299 | ASSERT_TRUE(!head_element.IsNull()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 300 | // Go through all children of HEAD element. |
| 301 | int charset_declaration_count = 0; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 302 | WebElementCollection meta_elements = |
| 303 | head_element.GetElementsByHTMLTagName("meta"); |
| 304 | for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); |
| 305 | element = meta_elements.NextItem()) { |
| 306 | if (!element.To<WebMetaElement>().ComputeEncoding().IsEmpty()) |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 307 | ++charset_declaration_count; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 308 | } |
| 309 | // The original doc has more than META tags which have charset declaration. |
| 310 | ASSERT_TRUE(charset_declaration_count > 1); |
| 311 | |
| 312 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 313 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 314 | |
| 315 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 316 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 317 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 318 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 319 | // Make sure only first child of HEAD element is META which has charset |
| 320 | // declaration in serialized contents. |
| 321 | web_frame = GetMainFrame(); |
| 322 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 323 | doc = web_frame->GetDocument(); |
| 324 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 325 | head_element = doc.Head(); |
| 326 | ASSERT_TRUE(!head_element.IsNull()); |
| 327 | ASSERT_TRUE(!head_element.FirstChild().IsNull()); |
| 328 | ASSERT_TRUE(head_element.FirstChild().IsElementNode()); |
| 329 | WebMetaElement meta_element = |
| 330 | head_element.FirstChild().To<WebMetaElement>(); |
| 331 | ASSERT_EQ(meta_element.ComputeEncoding(), |
| 332 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 333 | |
| 334 | // Make sure no more additional META tags which have charset declaration. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 335 | meta_elements = head_element.GetElementsByHTMLTagName("meta"); |
| 336 | for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); |
| 337 | element = meta_elements.NextItem()) { |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 338 | if (element == meta_element) |
| 339 | continue; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 340 | ASSERT_TRUE(element.To<WebMetaElement>().ComputeEncoding().IsEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 341 | } |
| 342 | } |
| 343 | |
| 344 | void SerializeHTMLDOMWithEntitiesInTextOnRenderer() { |
| 345 | base::FilePath page_file_path = GetTestFilePath( |
| 346 | "dom_serializer", "dom_serializer/htmlentities_in_text.htm"); |
| 347 | // Get file URL. The URL is dummy URL to identify the following loading |
| 348 | // actions. The test content is in constant:original_contents. |
| 349 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 350 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 351 | // Test contents. |
| 352 | static const char* const original_contents = |
| 353 | "<html><body>&<>\"\'</body></html>"; |
| 354 | // Load the test contents. |
| 355 | LoadContents(original_contents, file_url, WebString()); |
| 356 | |
| 357 | // Get BODY's text content in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 358 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 359 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 360 | WebDocument doc = web_frame->GetDocument(); |
| 361 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 362 | WebElement body_ele = doc.Body(); |
| 363 | ASSERT_TRUE(!body_ele.IsNull()); |
| 364 | WebNode text_node = body_ele.FirstChild(); |
| 365 | ASSERT_TRUE(text_node.IsTextNode()); |
| 366 | ASSERT_TRUE(std::string(text_node.NodeValue().Utf8()) == "&<>\"\'"); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 367 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 368 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 369 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 370 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 371 | // Compare the serialized contents with original contents to make sure |
| 372 | // they are same. |
| 373 | // Because we add MOTW when serializing DOM, so before comparison, we also |
| 374 | // need to add MOTW to original_contents. |
| 375 | std::string original_str = |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 376 | WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 377 | original_str += original_contents; |
| 378 | // Since WebCore now inserts a new HEAD element if there is no HEAD element |
| 379 | // when creating BODY element. (Please see |
| 380 | // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and |
| 381 | // corresponding META content if we find WebCore-generated HEAD element. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 382 | if (!doc.Head().IsNull()) { |
| 383 | WebString encoding = web_frame->GetDocument().Encoding(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 384 | std::string htmlTag("<html>"); |
| 385 | std::string::size_type pos = original_str.find(htmlTag); |
| 386 | ASSERT_NE(std::string::npos, pos); |
| 387 | pos += htmlTag.length(); |
| 388 | std::string head_part("<head>"); |
| 389 | head_part += |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 390 | WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 391 | head_part += "</head>"; |
| 392 | original_str.insert(pos, head_part); |
| 393 | } |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 394 | ASSERT_EQ(original_str, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 395 | } |
| 396 | |
| 397 | void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() { |
| 398 | base::FilePath page_file_path = GetTestFilePath( |
| 399 | "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm"); |
| 400 | // Get file URL. The URL is dummy URL to identify the following loading |
| 401 | // actions. The test content is in constant:original_contents. |
| 402 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 403 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 404 | // Test contents. |
| 405 | static const char* const original_contents = |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 406 | "<html><body title=\"&<>"'\"></body></html>"; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 407 | // Load the test contents. |
| 408 | LoadContents(original_contents, file_url, WebString()); |
| 409 | // Get value of BODY's title attribute in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 410 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 411 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 412 | WebDocument doc = web_frame->GetDocument(); |
| 413 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 414 | WebElement body_ele = doc.Body(); |
| 415 | ASSERT_TRUE(!body_ele.IsNull()); |
| 416 | WebString value = body_ele.GetAttribute("title"); |
| 417 | ASSERT_TRUE(std::string(value.Utf8()) == "&<>\"\'"); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 418 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 419 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 420 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 421 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 422 | // Compare the serialized contents with original contents to make sure |
| 423 | // they are same. |
| 424 | std::string original_str = |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 425 | WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 426 | original_str += original_contents; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 427 | if (!doc.IsNull()) { |
| 428 | WebString encoding = web_frame->GetDocument().Encoding(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 429 | std::string htmlTag("<html>"); |
| 430 | std::string::size_type pos = original_str.find(htmlTag); |
| 431 | ASSERT_NE(std::string::npos, pos); |
| 432 | pos += htmlTag.length(); |
| 433 | std::string head_part("<head>"); |
| 434 | head_part += |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 435 | WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 436 | head_part += "</head>"; |
| 437 | original_str.insert(pos, head_part); |
| 438 | } |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 439 | ASSERT_EQ(original_str, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 440 | } |
| 441 | |
| 442 | void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) { |
| 443 | // Get value of BODY's title attribute in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 444 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 445 | WebDocument doc = web_frame->GetDocument(); |
| 446 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 447 | WebElement body_element = doc.Body(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 448 | // Unescaped string for "%⊅¹'". |
| 449 | static const wchar_t parsed_value[] = { |
| 450 | '%', 0x2285, 0x00b9, '\'', 0 |
| 451 | }; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 452 | WebString value = body_element.GetAttribute("title"); |
| 453 | WebString content = doc.ContentAsTextForTesting(); |
| 454 | ASSERT_TRUE(base::UTF16ToWide(value.Utf16()) == parsed_value); |
| 455 | ASSERT_TRUE(base::UTF16ToWide(content.Utf16()) == parsed_value); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 456 | |
| 457 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 458 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 459 | // Check the serialized string. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 460 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 461 | // Confirm that the serialized string has no non-standard HTML entities. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 462 | ASSERT_EQ(std::string::npos, serialized_contents_.find("%")); |
| 463 | ASSERT_EQ(std::string::npos, serialized_contents_.find("⊅")); |
| 464 | ASSERT_EQ(std::string::npos, serialized_contents_.find("¹")); |
| 465 | ASSERT_EQ(std::string::npos, serialized_contents_.find("'")); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 466 | } |
| 467 | |
| 468 | void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url, |
| 469 | const GURL& path_dir_url) { |
| 470 | // There are total 2 available base tags in this test file. |
| 471 | const int kTotalBaseTagCountInTestFile = 2; |
| 472 | |
| 473 | // Since for this test, we assume there is no savable sub-resource links for |
| 474 | // this test file, also all links are relative URLs in this test file, so we |
| 475 | // need to check those relative URLs and make sure document has BASE tag. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 476 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 477 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 478 | WebDocument doc = web_frame->GetDocument(); |
| 479 | ASSERT_TRUE(doc.IsHTMLDocument()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 480 | // Go through all descent nodes. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 481 | WebElementCollection all = doc.All(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 482 | int original_base_tag_count = 0; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 483 | for (WebElement element = all.FirstItem(); !element.IsNull(); |
| 484 | element = all.NextItem()) { |
| 485 | if (element.HasHTMLTagName("base")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 486 | original_base_tag_count++; |
| 487 | } else { |
| 488 | // Get link. |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 489 | WebString value = GetSubResourceLinkFromElement(element); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 490 | if (value.IsNull() && element.HasHTMLTagName("a")) { |
| 491 | value = element.GetAttribute("href"); |
| 492 | if (value.IsEmpty()) |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 493 | value = WebString(); |
| 494 | } |
| 495 | // Each link is relative link. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 496 | if (!value.IsNull()) { |
| 497 | GURL link(value.Utf8()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 498 | ASSERT_TRUE(link.scheme().empty()); |
| 499 | } |
| 500 | } |
| 501 | } |
| 502 | ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); |
| 503 | // Make sure in original document, the base URL is not equal with the |
| 504 | // |path_dir_url|. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 505 | GURL original_base_url(doc.BaseURL()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 506 | ASSERT_NE(original_base_url, path_dir_url); |
| 507 | |
| 508 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 509 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 510 | |
| 511 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 512 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 513 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 514 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 515 | |
| 516 | // Make sure all links are absolute URLs and doc there are some number of |
| 517 | // BASE tags in serialized HTML data. Each of those BASE tags have same base |
| 518 | // URL which is as same as URL of current test file. |
| 519 | web_frame = GetMainFrame(); |
| 520 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 521 | doc = web_frame->GetDocument(); |
| 522 | ASSERT_TRUE(doc.IsHTMLDocument()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 523 | // Go through all descent nodes. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 524 | all = doc.All(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 525 | int new_base_tag_count = 0; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 526 | for (WebNode node = all.FirstItem(); !node.IsNull(); |
| 527 | node = all.NextItem()) { |
| 528 | if (!node.IsElementNode()) |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 529 | continue; |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 530 | WebElement element = node.To<WebElement>(); |
| 531 | if (element.HasHTMLTagName("base")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 532 | new_base_tag_count++; |
| 533 | } else { |
| 534 | // Get link. |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 535 | WebString value = GetSubResourceLinkFromElement(element); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 536 | if (value.IsNull() && element.HasHTMLTagName("a")) { |
| 537 | value = element.GetAttribute("href"); |
| 538 | if (value.IsEmpty()) |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 539 | value = WebString(); |
| 540 | } |
| 541 | // Each link is absolute link. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 542 | if (!value.IsNull()) { |
| 543 | GURL link(std::string(value.Utf8())); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 544 | ASSERT_FALSE(link.scheme().empty()); |
| 545 | } |
| 546 | } |
| 547 | } |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 548 | // We have one more added BASE tag which is generated by JavaScript. |
| 549 | ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 550 | // Make sure in new document, the base URL is equal with the |path_dir_url|. |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 551 | GURL new_base_url(doc.BaseURL()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 552 | ASSERT_EQ(new_base_url, path_dir_url); |
| 553 | } |
| 554 | |
| 555 | void SerializeHTMLDOMWithEmptyHeadOnRenderer() { |
| 556 | base::FilePath page_file_path = GetTestFilePath( |
| 557 | "dom_serializer", "empty_head.htm"); |
| 558 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 559 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 560 | |
| 561 | // Load the test html content. |
| 562 | static const char* const empty_head_contents = |
| 563 | "<html><head></head><body>hello world</body></html>"; |
| 564 | LoadContents(empty_head_contents, file_url, WebString()); |
| 565 | |
| 566 | // Make sure the head tag is empty. |
| 567 | WebFrame* web_frame = GetMainFrame(); |
| 568 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 569 | WebDocument doc = web_frame->GetDocument(); |
| 570 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 571 | WebElement head_element = doc.Head(); |
| 572 | ASSERT_TRUE(!head_element.IsNull()); |
| 573 | ASSERT_TRUE(head_element.FirstChild().IsNull()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 574 | |
| 575 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 576 | SerializeDomForURL(file_url); |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 577 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 578 | |
| 579 | // Reload serialized contents and make sure there is only one META tag. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 580 | LoadContents(serialized_contents_, file_url, |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 581 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 582 | web_frame = GetMainFrame(); |
| 583 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 584 | doc = web_frame->GetDocument(); |
| 585 | ASSERT_TRUE(doc.IsHTMLDocument()); |
| 586 | head_element = doc.Head(); |
| 587 | ASSERT_TRUE(!head_element.IsNull()); |
| 588 | ASSERT_TRUE(!head_element.FirstChild().IsNull()); |
| 589 | ASSERT_TRUE(head_element.FirstChild().IsElementNode()); |
| 590 | ASSERT_TRUE(head_element.FirstChild().NextSibling().IsNull()); |
| 591 | WebMetaElement meta_element = |
| 592 | head_element.FirstChild().To<WebMetaElement>(); |
| 593 | ASSERT_EQ(meta_element.ComputeEncoding(), |
| 594 | web_frame->GetDocument().Encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 595 | |
| 596 | // Check the body's first node is text node and its contents are |
| 597 | // "hello world" |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 598 | WebElement body_element = doc.Body(); |
| 599 | ASSERT_TRUE(!body_element.IsNull()); |
| 600 | WebNode text_node = body_element.FirstChild(); |
| 601 | ASSERT_TRUE(text_node.IsTextNode()); |
| 602 | ASSERT_EQ("hello world", text_node.NodeValue()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 603 | } |
| 604 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 605 | void SubResourceForElementsInNonHTMLNamespaceOnRenderer( |
| 606 | const GURL& file_url) { |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 607 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 608 | ASSERT_TRUE(web_frame != NULL); |
Blink Reformat | 1c4d759e | 2017-04-09 16:34:54 | [diff] [blame] | 609 | WebDocument doc = web_frame->GetDocument(); |
| 610 | WebNode lastNodeInBody = doc.Body().LastChild(); |
| 611 | ASSERT_TRUE(lastNodeInBody.IsElementNode()); |
| 612 | WebString uri = |
| 613 | GetSubResourceLinkFromElement(lastNodeInBody.To<WebElement>()); |
| 614 | EXPECT_TRUE(uri.IsNull()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 615 | } |
| 616 | |
| 617 | private: |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 618 | int32_t render_view_routing_id_; |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 619 | std::string serialized_contents_; |
| 620 | bool serialization_reported_end_of_data_; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 621 | }; |
| 622 | |
| 623 | // If original contents have document type, the serialized contents also have |
| 624 | // document type. |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 625 | // Disabled on OSX by ellyjones@ on 2015-05-18, see https://crbug.com/488495, |
| 626 | // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 |
ellyjones | 15a15d2d | 2015-05-18 21:26:37 | [diff] [blame] | 627 | |
| 628 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 629 | DISABLED_SerializeHTMLDOMWithDocType) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 630 | base::FilePath page_file_path = |
| 631 | GetTestFilePath("dom_serializer", "youtube_1.htm"); |
| 632 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 633 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 634 | // Load the test file. |
| 635 | NavigateToURL(shell(), file_url); |
| 636 | |
| 637 | PostTaskToInProcessRendererAndWait( |
| 638 | base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer, |
| 639 | base::Unretained(this), file_url)); |
| 640 | } |
| 641 | |
| 642 | // If original contents do not have document type, the serialized contents |
| 643 | // also do not have document type. |
| 644 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { |
| 645 | base::FilePath page_file_path = |
| 646 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
| 647 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 648 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 649 | // Load the test file. |
| 650 | NavigateToURL(shell(), file_url); |
| 651 | |
| 652 | PostTaskToInProcessRendererAndWait( |
| 653 | base::Bind( |
| 654 | &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer, |
| 655 | base::Unretained(this), file_url)); |
| 656 | } |
| 657 | |
| 658 | // Serialize XML document which has all 5 built-in entities. After |
| 659 | // finishing serialization, the serialized contents should be same |
| 660 | // with original XML document. |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 661 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 662 | SerializeXMLDocWithBuiltInEntities) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 663 | base::FilePath page_file_path = |
| 664 | GetTestFilePath("dom_serializer", "note.html"); |
| 665 | base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml"); |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 666 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 667 | std::string original_contents; |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 668 | { |
| 669 | // Read original contents for later comparison. |
| 670 | base::ThreadRestrictions::ScopedAllowIO allow_io_for_test_verifications; |
| 671 | ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents)); |
| 672 | } |
| 673 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 674 | // Get file URL. |
| 675 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 676 | GURL xml_file_url = net::FilePathToFileURL(xml_file_path); |
| 677 | ASSERT_TRUE(file_url.SchemeIsFile()); |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 678 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 679 | // Load the test file. |
| 680 | NavigateToURL(shell(), file_url); |
| 681 | |
| 682 | PostTaskToInProcessRendererAndWait( |
| 683 | base::Bind( |
| 684 | &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer, |
| 685 | base::Unretained(this), xml_file_url, original_contents)); |
| 686 | } |
| 687 | |
| 688 | // When serializing DOM, we add MOTW declaration before html tag. |
| 689 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { |
| 690 | base::FilePath page_file_path = |
| 691 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 692 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 693 | std::string original_contents; |
lukasza | 7947ccd | 2016-07-28 21:56:25 | [diff] [blame] | 694 | { |
| 695 | // Read original contents for later comparison . |
| 696 | base::ThreadRestrictions::ScopedAllowIO allow_io_for_test_verifications; |
| 697 | ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents)); |
| 698 | } |
| 699 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 700 | // Get file URL. |
| 701 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 702 | ASSERT_TRUE(file_url.SchemeIsFile()); |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 703 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 704 | // Load the test file. |
| 705 | NavigateToURL(shell(), file_url); |
| 706 | |
| 707 | PostTaskToInProcessRendererAndWait( |
| 708 | base::Bind( |
| 709 | &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer, |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 710 | base::Unretained(this), file_url, original_contents)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 711 | } |
| 712 | |
| 713 | // When serializing DOM, we will add the META which have correct charset |
| 714 | // declaration as first child of HEAD element for resolving WebKit bug: |
| 715 | // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document |
| 716 | // does not have META charset declaration. |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 717 | // Disabled on OSX by battre@ on 2015-05-21, see https://crbug.com/488495, |
| 718 | // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 |
| 719 | IN_PROC_BROWSER_TEST_F( |
| 720 | DomSerializerTests, |
| 721 | DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 722 | base::FilePath page_file_path = |
| 723 | GetTestFilePath("dom_serializer", "youtube_1.htm"); |
| 724 | // Get file URL. |
| 725 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 726 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 727 | // Load the test file. |
| 728 | NavigateToURL(shell(), file_url); |
| 729 | |
| 730 | PostTaskToInProcessRendererAndWait( |
| 731 | base::Bind( |
| 732 | &DomSerializerTests:: |
| 733 | SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer, |
| 734 | base::Unretained(this), file_url)); |
| 735 | } |
| 736 | |
| 737 | // When serializing DOM, if the original document has multiple META charset |
| 738 | // declaration, we will add the META which have correct charset declaration |
| 739 | // as first child of HEAD element and remove all original META charset |
| 740 | // declarations. |
| 741 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 742 | SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { |
| 743 | base::FilePath page_file_path = |
| 744 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
| 745 | // Get file URL. |
| 746 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 747 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 748 | // Load the test file. |
| 749 | NavigateToURL(shell(), file_url); |
| 750 | |
| 751 | PostTaskToInProcessRendererAndWait( |
| 752 | base::Bind( |
| 753 | &DomSerializerTests:: |
| 754 | SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer, |
| 755 | base::Unretained(this), file_url)); |
| 756 | } |
| 757 | |
| 758 | // Test situation of html entities in text when serializing HTML DOM. |
| 759 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { |
| 760 | // Need to spin up the renderer and also navigate to a file url so that the |
| 761 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 762 | // from non-file scheme. |
| 763 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 764 | |
| 765 | PostTaskToInProcessRendererAndWait( |
| 766 | base::Bind( |
| 767 | &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer, |
| 768 | base::Unretained(this))); |
| 769 | } |
| 770 | |
| 771 | // Test situation of html entities in attribute value when serializing |
| 772 | // HTML DOM. |
| 773 | // This test started to fail at WebKit r65388. See http://crbug.com/52279. |
| 774 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 775 | SerializeHTMLDOMWithEntitiesInAttributeValue) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 776 | // Need to spin up the renderer and also navigate to a file url so that the |
| 777 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 778 | // from non-file scheme. |
| 779 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 780 | |
| 781 | PostTaskToInProcessRendererAndWait( |
| 782 | base::Bind( |
| 783 | &DomSerializerTests:: |
| 784 | SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer, |
| 785 | base::Unretained(this))); |
| 786 | } |
| 787 | |
| 788 | // Test situation of non-standard HTML entities when serializing HTML DOM. |
| 789 | // This test started to fail at WebKit r65351. See http://crbug.com/52279. |
| 790 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 791 | SerializeHTMLDOMWithNonStandardEntities) { |
| 792 | // Make a test file URL and load it. |
| 793 | base::FilePath page_file_path = GetTestFilePath( |
| 794 | "dom_serializer", "nonstandard_htmlentities.htm"); |
| 795 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 796 | NavigateToURL(shell(), file_url); |
| 797 | |
| 798 | PostTaskToInProcessRendererAndWait( |
| 799 | base::Bind( |
| 800 | &DomSerializerTests:: |
| 801 | SerializeHTMLDOMWithNonStandardEntitiesOnRenderer, |
| 802 | base::Unretained(this), file_url)); |
| 803 | } |
| 804 | |
| 805 | // Test situation of BASE tag in original document when serializing HTML DOM. |
| 806 | // When serializing, we should comment the BASE tag, append a new BASE tag. |
| 807 | // rewrite all the savable URLs to relative local path, and change other URLs |
| 808 | // to absolute URLs. |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 809 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 810 | SerializeHTMLDOMWithBaseTag) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 811 | base::FilePath page_file_path = GetTestFilePath( |
| 812 | "dom_serializer", "html_doc_has_base_tag.htm"); |
| 813 | |
| 814 | // Get page dir URL which is base URL of this file. |
| 815 | base::FilePath dir_name = page_file_path.DirName(); |
| 816 | dir_name = dir_name.Append( |
| 817 | base::FilePath::StringType(base::FilePath::kSeparators[0], 1)); |
| 818 | GURL path_dir_url = net::FilePathToFileURL(dir_name); |
| 819 | |
| 820 | // Get file URL. |
| 821 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 822 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 823 | // Load the test file. |
| 824 | NavigateToURL(shell(), file_url); |
| 825 | |
| 826 | PostTaskToInProcessRendererAndWait( |
| 827 | base::Bind( |
| 828 | &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer, |
| 829 | base::Unretained(this), file_url, path_dir_url)); |
| 830 | } |
| 831 | |
| 832 | // Serializing page which has an empty HEAD tag. |
[email protected] | 185d501 | 2014-06-10 22:01:34 | [diff] [blame] | 833 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 834 | // Need to spin up the renderer and also navigate to a file url so that the |
| 835 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 836 | // from non-file scheme. |
| 837 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 838 | |
| 839 | PostTaskToInProcessRendererAndWait( |
| 840 | base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer, |
| 841 | base::Unretained(this))); |
| 842 | } |
| 843 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 844 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 845 | SubResourceForElementsInNonHTMLNamespace) { |
| 846 | base::FilePath page_file_path = GetTestFilePath( |
| 847 | "dom_serializer", "non_html_namespace.htm"); |
| 848 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 849 | NavigateToURL(shell(), file_url); |
| 850 | |
| 851 | PostTaskToInProcessRendererAndWait( |
| 852 | base::Bind( |
| 853 | &DomSerializerTests:: |
| 854 | SubResourceForElementsInNonHTMLNamespaceOnRenderer, |
| 855 | base::Unretained(this), file_url)); |
| 856 | } |
| 857 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 858 | } // namespace content |