[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 5 | #include <stddef.h> |
| 6 | #include <stdint.h> |
| 7 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 8 | #include "base/bind.h" |
| 9 | #include "base/command_line.h" |
| 10 | #include "base/compiler_specific.h" |
[email protected] | 14c1c23 | 2013-06-11 17:52:44 | [diff] [blame] | 11 | #include "base/containers/hash_tables.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 12 | #include "base/files/file_path.h" |
thestig | b7aad54f | 2014-09-05 18:25:39 | [diff] [blame] | 13 | #include "base/files/file_util.h" |
[email protected] | 21aa9968 | 2013-06-11 07:17:01 | [diff] [blame] | 14 | #include "base/strings/string_util.h" |
[email protected] | 74ebfb1 | 2013-06-07 20:48:00 | [diff] [blame] | 15 | #include "base/strings/utf_string_conversions.h" |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 16 | #include "build/build_config.h" |
dcheng | 8b5e302 | 2015-09-02 23:58:55 | [diff] [blame] | 17 | #include "content/public/browser/render_view_host.h" |
| 18 | #include "content/public/browser/web_contents.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 19 | #include "content/public/common/content_switches.h" |
| 20 | #include "content/public/renderer/render_view.h" |
| 21 | #include "content/public/renderer/render_view_observer.h" |
[email protected] | 6e9def1 | 2014-03-27 20:23:28 | [diff] [blame] | 22 | #include "content/public/test/content_browser_test.h" |
| 23 | #include "content/public/test/content_browser_test_utils.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 24 | #include "content/public/test/test_utils.h" |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 25 | #include "content/renderer/savable_resources.h" |
[email protected] | de7d61ff | 2013-08-20 11:30:41 | [diff] [blame] | 26 | #include "content/shell/browser/shell.h" |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 27 | #include "net/base/filename_util.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 28 | #include "net/url_request/url_request_context.h" |
[email protected] | de7d61ff | 2013-08-20 11:30:41 | [diff] [blame] | 29 | #include "third_party/WebKit/public/platform/WebCString.h" |
| 30 | #include "third_party/WebKit/public/platform/WebData.h" |
| 31 | #include "third_party/WebKit/public/platform/WebString.h" |
| 32 | #include "third_party/WebKit/public/platform/WebURL.h" |
| 33 | #include "third_party/WebKit/public/platform/WebVector.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 34 | #include "third_party/WebKit/public/web/WebDocument.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 35 | #include "third_party/WebKit/public/web/WebElement.h" |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 36 | #include "third_party/WebKit/public/web/WebElementCollection.h" |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 37 | #include "third_party/WebKit/public/web/WebFrameSerializer.h" |
| 38 | #include "third_party/WebKit/public/web/WebFrameSerializerClient.h" |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 39 | #include "third_party/WebKit/public/web/WebLocalFrame.h" |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 40 | #include "third_party/WebKit/public/web/WebMetaElement.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 41 | #include "third_party/WebKit/public/web/WebNode.h" |
[email protected] | 2255a933 | 2013-06-17 05:12:31 | [diff] [blame] | 42 | #include "third_party/WebKit/public/web/WebView.h" |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 43 | |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 44 | using blink::WebCString; |
| 45 | using blink::WebData; |
| 46 | using blink::WebDocument; |
| 47 | using blink::WebElement; |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 48 | using blink::WebMetaElement; |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 49 | using blink::WebElementCollection; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 50 | using blink::WebFrame; |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 51 | using blink::WebFrameSerializer; |
| 52 | using blink::WebFrameSerializerClient; |
[email protected] | d357694 | 2014-04-10 18:45:37 | [diff] [blame] | 53 | using blink::WebLocalFrame; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 54 | using blink::WebNode; |
[email protected] | 180ef24 | 2013-11-07 06:50:46 | [diff] [blame] | 55 | using blink::WebString; |
| 56 | using blink::WebURL; |
| 57 | using blink::WebView; |
| 58 | using blink::WebVector; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 59 | |
| 60 | namespace content { |
| 61 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 62 | bool HasDocType(const WebDocument& doc) { |
esprehn | 867ce32 | 2015-11-16 20:58:49 | [diff] [blame] | 63 | return doc.firstChild().isDocumentTypeNode(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 64 | } |
| 65 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 66 | class LoadObserver : public RenderViewObserver { |
| 67 | public: |
| 68 | LoadObserver(RenderView* render_view, const base::Closure& quit_closure) |
| 69 | : RenderViewObserver(render_view), |
| 70 | quit_closure_(quit_closure) {} |
| 71 | |
dcheng | 6d18e40 | 2014-10-21 12:32:52 | [diff] [blame] | 72 | void DidFinishLoad(blink::WebLocalFrame* frame) override { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 73 | if (frame == render_view()->GetWebView()->mainFrame()) |
| 74 | quit_closure_.Run(); |
| 75 | } |
| 76 | |
| 77 | private: |
| 78 | base::Closure quit_closure_; |
| 79 | }; |
| 80 | |
| 81 | class DomSerializerTests : public ContentBrowserTest, |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 82 | public WebFrameSerializerClient { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 83 | public: |
lukasza | 4a96a1f0 | 2015-12-09 16:46:34 | [diff] [blame] | 84 | DomSerializerTests() : serialization_reported_end_of_data_(false) {} |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 85 | |
avi | 83883c8 | 2014-12-23 00:08:49 | [diff] [blame] | 86 | void SetUpCommandLine(base::CommandLine* command_line) override { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 87 | command_line->AppendSwitch(switches::kSingleProcess); |
[email protected] | 0c2c0d1 | 2014-02-04 17:24:43 | [diff] [blame] | 88 | #if defined(OS_WIN) |
[email protected] | 5d97f76 | 2013-04-23 06:15:49 | [diff] [blame] | 89 | // Don't want to try to create a GPU process. |
[email protected] | 7af65ca | 2014-04-16 20:25:56 | [diff] [blame] | 90 | command_line->AppendSwitch(switches::kDisableGpu); |
[email protected] | 5d97f76 | 2013-04-23 06:15:49 | [diff] [blame] | 91 | #endif |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 92 | } |
| 93 | |
dcheng | 03226bd | 2015-09-03 17:36:23 | [diff] [blame] | 94 | void SetUpOnMainThread() override { |
| 95 | render_view_routing_id_ = |
| 96 | shell()->web_contents()->GetRenderViewHost()->GetRoutingID(); |
| 97 | } |
| 98 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 99 | // DomSerializerDelegate. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 100 | void didSerializeDataForFrame(const WebCString& data, |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 101 | FrameSerializationStatus status) override { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 102 | // Check finish status of current frame. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 103 | ASSERT_FALSE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 104 | |
| 105 | // Add data to corresponding frame's content. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 106 | serialized_contents_ += data; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 107 | |
| 108 | // Current frame is completed saving, change the finish status. |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 109 | if (status == WebFrameSerializerClient::CurrentFrameIsFinished) |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 110 | serialization_reported_end_of_data_ = true; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 111 | } |
| 112 | |
| 113 | RenderView* GetRenderView() { |
dcheng | 03226bd | 2015-09-03 17:36:23 | [diff] [blame] | 114 | return RenderView::FromRoutingID(render_view_routing_id_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 115 | } |
| 116 | |
| 117 | WebView* GetWebView() { |
| 118 | return GetRenderView()->GetWebView(); |
| 119 | } |
| 120 | |
| 121 | WebFrame* GetMainFrame() { |
| 122 | return GetWebView()->mainFrame(); |
| 123 | } |
| 124 | |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 125 | WebFrame* FindSubFrameByURL(const GURL& url) { |
| 126 | for (WebFrame* frame = GetWebView()->mainFrame(); frame; |
| 127 | frame = frame->traverseNext(false)) { |
| 128 | if (GURL(frame->document().url()) == url) |
| 129 | return frame; |
| 130 | } |
| 131 | return nullptr; |
| 132 | } |
| 133 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 134 | // Load web page according to input content and relative URLs within |
| 135 | // the document. |
| 136 | void LoadContents(const std::string& contents, |
| 137 | const GURL& base_url, |
| 138 | const WebString encoding_info) { |
| 139 | scoped_refptr<MessageLoopRunner> runner = new MessageLoopRunner; |
| 140 | LoadObserver observer(GetRenderView(), runner->QuitClosure()); |
| 141 | |
| 142 | // If input encoding is empty, use UTF-8 as default encoding. |
| 143 | if (encoding_info.isEmpty()) { |
| 144 | GetMainFrame()->loadHTMLString(contents, base_url); |
| 145 | } else { |
| 146 | WebData data(contents.data(), contents.length()); |
| 147 | |
| 148 | // Do not use WebFrame.LoadHTMLString because it assumes that input |
| 149 | // html contents use UTF-8 encoding. |
| 150 | // TODO(darin): This should use WebFrame::loadData. |
| 151 | WebFrame* web_frame = GetMainFrame(); |
| 152 | |
| 153 | ASSERT_TRUE(web_frame != NULL); |
| 154 | |
clamy | 0f4ccc8 | 2016-02-24 12:43:53 | [diff] [blame] | 155 | web_frame->toWebLocalFrame()->loadData(data, "text/html", encoding_info, |
| 156 | base_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 157 | } |
| 158 | |
| 159 | runner->Run(); |
| 160 | } |
| 161 | |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 162 | class SingleLinkRewritingDelegate |
| 163 | : public WebFrameSerializer::LinkRewritingDelegate { |
| 164 | public: |
| 165 | SingleLinkRewritingDelegate(const WebURL& url, const WebString& localPath) |
| 166 | : url_(url), local_path_(localPath) {} |
| 167 | |
| 168 | bool rewriteFrameSource(WebFrame* frame, |
| 169 | WebString* rewritten_link) override { |
| 170 | return false; |
| 171 | } |
| 172 | |
| 173 | bool rewriteLink(const WebURL& url, WebString* rewritten_link) override { |
| 174 | if (url != url_) |
| 175 | return false; |
| 176 | |
| 177 | *rewritten_link = local_path_; |
| 178 | return true; |
| 179 | } |
| 180 | |
| 181 | private: |
| 182 | const WebURL url_; |
| 183 | const WebString local_path_; |
| 184 | }; |
| 185 | |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 186 | // Serialize DOM belonging to a frame with the specified |frame_url|. |
| 187 | void SerializeDomForURL(const GURL& frame_url) { |
| 188 | // Find corresponding WebFrame according to frame_url. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 189 | WebFrame* web_frame = FindSubFrameByURL(frame_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 190 | ASSERT_TRUE(web_frame != NULL); |
[email protected] | 728c2ee | 2013-06-25 04:01:07 | [diff] [blame] | 191 | WebString file_path = |
| 192 | base::FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")).AsUTF16Unsafe(); |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 193 | SingleLinkRewritingDelegate delegate(frame_url, file_path); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 194 | // Start serializing DOM. |
lukasza | 777a7dd | 2016-01-25 23:55:47 | [diff] [blame] | 195 | bool result = WebFrameSerializer::serialize(web_frame->toWebLocalFrame(), |
| 196 | this, &delegate); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 197 | ASSERT_TRUE(result); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 198 | } |
| 199 | |
| 200 | void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) { |
| 201 | // Make sure original contents have document type. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 202 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 203 | ASSERT_TRUE(web_frame != NULL); |
| 204 | WebDocument doc = web_frame->document(); |
| 205 | ASSERT_TRUE(HasDocType(doc)); |
| 206 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 207 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 208 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 209 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 210 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 211 | web_frame->document().encoding()); |
| 212 | // Make sure serialized contents still have document type. |
| 213 | web_frame = GetMainFrame(); |
| 214 | doc = web_frame->document(); |
| 215 | ASSERT_TRUE(HasDocType(doc)); |
| 216 | } |
| 217 | |
| 218 | void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) { |
| 219 | // Make sure original contents do not have document type. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 220 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 221 | ASSERT_TRUE(web_frame != NULL); |
| 222 | WebDocument doc = web_frame->document(); |
| 223 | ASSERT_TRUE(!HasDocType(doc)); |
| 224 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 225 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 226 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 227 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 228 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 229 | web_frame->document().encoding()); |
| 230 | // Make sure serialized contents do not have document type. |
| 231 | web_frame = GetMainFrame(); |
| 232 | doc = web_frame->document(); |
| 233 | ASSERT_TRUE(!HasDocType(doc)); |
| 234 | } |
| 235 | |
| 236 | void SerializeXMLDocWithBuiltInEntitiesOnRenderer( |
| 237 | const GURL& xml_file_url, const std::string& original_contents) { |
| 238 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 239 | SerializeDomForURL(xml_file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 240 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 241 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 242 | ASSERT_EQ(original_contents, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 243 | } |
| 244 | |
| 245 | void SerializeHTMLDOMWithAddingMOTWOnRenderer( |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 246 | const GURL& file_url, const std::string& original_contents) { |
| 247 | // Make sure original contents does not have MOTW; |
| 248 | std::string motw_declaration = |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 249 | WebFrameSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 250 | ASSERT_FALSE(motw_declaration.empty()); |
| 251 | // The encoding of original contents is ISO-8859-1, so we convert the MOTW |
| 252 | // declaration to ASCII and search whether original contents has it or not. |
| 253 | ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); |
| 254 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 255 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 256 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 257 | // Make sure the serialized contents have MOTW ; |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 258 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 259 | ASSERT_FALSE(std::string::npos == |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 260 | serialized_contents_.find(motw_declaration)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 261 | } |
| 262 | |
| 263 | void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer( |
| 264 | const GURL& file_url) { |
| 265 | // Make sure there is no META charset declaration in original document. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 266 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 267 | ASSERT_TRUE(web_frame != NULL); |
| 268 | WebDocument doc = web_frame->document(); |
| 269 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 270 | WebElement head_element = doc.head(); |
| 271 | ASSERT_TRUE(!head_element.isNull()); |
| 272 | // Go through all children of HEAD element. |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 273 | WebElementCollection meta_elements = head_element. |
| 274 | getElementsByHTMLTagName("meta"); |
| 275 | for (WebElement element = meta_elements.firstItem(); !element.isNull(); |
| 276 | element = meta_elements.nextItem()) { |
| 277 | ASSERT_TRUE(element.to<WebMetaElement>().computeEncoding().isEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 278 | } |
| 279 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 280 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 281 | |
| 282 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 283 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 284 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 285 | web_frame->document().encoding()); |
| 286 | // Make sure the first child of HEAD element is META which has charset |
| 287 | // declaration in serialized contents. |
| 288 | web_frame = GetMainFrame(); |
| 289 | ASSERT_TRUE(web_frame != NULL); |
| 290 | doc = web_frame->document(); |
| 291 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 292 | head_element = doc.head(); |
| 293 | ASSERT_TRUE(!head_element.isNull()); |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 294 | ASSERT_TRUE(!head_element.firstChild().isNull()); |
| 295 | ASSERT_TRUE(head_element.firstChild().isElementNode()); |
| 296 | WebMetaElement meta_element = head_element.firstChild(). |
| 297 | to<WebMetaElement>(); |
| 298 | ASSERT_EQ(meta_element.computeEncoding(), web_frame->document().encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 299 | |
| 300 | // Make sure no more additional META tags which have charset declaration. |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 301 | meta_elements = head_element.getElementsByHTMLTagName("meta"); |
| 302 | for (WebElement element = meta_elements.firstItem(); !element.isNull(); |
| 303 | element = meta_elements.nextItem()) { |
| 304 | if (element == meta_element) |
| 305 | continue; |
| 306 | ASSERT_TRUE(element.to<WebMetaElement>().computeEncoding().isEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 307 | } |
| 308 | } |
| 309 | |
| 310 | void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer( |
| 311 | const GURL& file_url) { |
| 312 | // Make sure there are multiple META charset declarations in original |
| 313 | // document. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 314 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 315 | ASSERT_TRUE(web_frame != NULL); |
| 316 | WebDocument doc = web_frame->document(); |
| 317 | ASSERT_TRUE(doc.isHTMLDocument()); |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 318 | WebElement head_element = doc.head(); |
| 319 | ASSERT_TRUE(!head_element.isNull()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 320 | // Go through all children of HEAD element. |
| 321 | int charset_declaration_count = 0; |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 322 | WebElementCollection meta_elements = head_element. |
| 323 | getElementsByHTMLTagName("meta"); |
| 324 | for (WebElement element = meta_elements.firstItem(); !element.isNull(); |
| 325 | element = meta_elements.nextItem()) { |
| 326 | if (!element.to<WebMetaElement>().computeEncoding().isEmpty()) |
| 327 | ++charset_declaration_count; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 328 | } |
| 329 | // The original doc has more than META tags which have charset declaration. |
| 330 | ASSERT_TRUE(charset_declaration_count > 1); |
| 331 | |
| 332 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 333 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 334 | |
| 335 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 336 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 337 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 338 | web_frame->document().encoding()); |
| 339 | // Make sure only first child of HEAD element is META which has charset |
| 340 | // declaration in serialized contents. |
| 341 | web_frame = GetMainFrame(); |
| 342 | ASSERT_TRUE(web_frame != NULL); |
| 343 | doc = web_frame->document(); |
| 344 | ASSERT_TRUE(doc.isHTMLDocument()); |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 345 | head_element = doc.head(); |
| 346 | ASSERT_TRUE(!head_element.isNull()); |
| 347 | ASSERT_TRUE(!head_element.firstChild().isNull()); |
| 348 | ASSERT_TRUE(head_element.firstChild().isElementNode()); |
| 349 | WebMetaElement meta_element = head_element.firstChild(). |
| 350 | to<WebMetaElement>(); |
| 351 | ASSERT_EQ(meta_element.computeEncoding(), web_frame->document().encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 352 | |
| 353 | // Make sure no more additional META tags which have charset declaration. |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 354 | meta_elements = head_element.getElementsByHTMLTagName("meta"); |
| 355 | for (WebElement element = meta_elements.firstItem(); !element.isNull(); |
| 356 | element = meta_elements.nextItem()) { |
| 357 | if (element == meta_element) |
| 358 | continue; |
| 359 | ASSERT_TRUE(element.to<WebMetaElement>().computeEncoding().isEmpty()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 360 | } |
| 361 | } |
| 362 | |
| 363 | void SerializeHTMLDOMWithEntitiesInTextOnRenderer() { |
| 364 | base::FilePath page_file_path = GetTestFilePath( |
| 365 | "dom_serializer", "dom_serializer/htmlentities_in_text.htm"); |
| 366 | // Get file URL. The URL is dummy URL to identify the following loading |
| 367 | // actions. The test content is in constant:original_contents. |
| 368 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 369 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 370 | // Test contents. |
| 371 | static const char* const original_contents = |
| 372 | "<html><body>&<>\"\'</body></html>"; |
| 373 | // Load the test contents. |
| 374 | LoadContents(original_contents, file_url, WebString()); |
| 375 | |
| 376 | // Get BODY's text content in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 377 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 378 | ASSERT_TRUE(web_frame != NULL); |
| 379 | WebDocument doc = web_frame->document(); |
| 380 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 381 | WebElement body_ele = doc.body(); |
| 382 | ASSERT_TRUE(!body_ele.isNull()); |
| 383 | WebNode text_node = body_ele.firstChild(); |
| 384 | ASSERT_TRUE(text_node.isTextNode()); |
esprehn | 01ebca21a | 2015-09-17 09:32:56 | [diff] [blame] | 385 | ASSERT_TRUE(std::string(text_node.nodeValue().utf8()) == "&<>\"\'"); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 386 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 387 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 388 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 389 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 390 | // Compare the serialized contents with original contents to make sure |
| 391 | // they are same. |
| 392 | // Because we add MOTW when serializing DOM, so before comparison, we also |
| 393 | // need to add MOTW to original_contents. |
| 394 | std::string original_str = |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 395 | WebFrameSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 396 | original_str += original_contents; |
| 397 | // Since WebCore now inserts a new HEAD element if there is no HEAD element |
| 398 | // when creating BODY element. (Please see |
| 399 | // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and |
| 400 | // corresponding META content if we find WebCore-generated HEAD element. |
| 401 | if (!doc.head().isNull()) { |
| 402 | WebString encoding = web_frame->document().encoding(); |
| 403 | std::string htmlTag("<html>"); |
| 404 | std::string::size_type pos = original_str.find(htmlTag); |
| 405 | ASSERT_NE(std::string::npos, pos); |
| 406 | pos += htmlTag.length(); |
| 407 | std::string head_part("<head>"); |
| 408 | head_part += |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 409 | WebFrameSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 410 | head_part += "</head>"; |
| 411 | original_str.insert(pos, head_part); |
| 412 | } |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 413 | ASSERT_EQ(original_str, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 414 | } |
| 415 | |
| 416 | void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() { |
| 417 | base::FilePath page_file_path = GetTestFilePath( |
| 418 | "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm"); |
| 419 | // Get file URL. The URL is dummy URL to identify the following loading |
| 420 | // actions. The test content is in constant:original_contents. |
| 421 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 422 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 423 | // Test contents. |
| 424 | static const char* const original_contents = |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 425 | "<html><body title=\"&<>"'\"></body></html>"; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 426 | // Load the test contents. |
| 427 | LoadContents(original_contents, file_url, WebString()); |
| 428 | // Get value of BODY's title attribute in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 429 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 430 | ASSERT_TRUE(web_frame != NULL); |
| 431 | WebDocument doc = web_frame->document(); |
| 432 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 433 | WebElement body_ele = doc.body(); |
| 434 | ASSERT_TRUE(!body_ele.isNull()); |
| 435 | WebString value = body_ele.getAttribute("title"); |
| 436 | ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); |
| 437 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 438 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 439 | // Compare the serialized contents with original contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 440 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 441 | // Compare the serialized contents with original contents to make sure |
| 442 | // they are same. |
| 443 | std::string original_str = |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 444 | WebFrameSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 445 | original_str += original_contents; |
| 446 | if (!doc.isNull()) { |
| 447 | WebString encoding = web_frame->document().encoding(); |
| 448 | std::string htmlTag("<html>"); |
| 449 | std::string::size_type pos = original_str.find(htmlTag); |
| 450 | ASSERT_NE(std::string::npos, pos); |
| 451 | pos += htmlTag.length(); |
| 452 | std::string head_part("<head>"); |
| 453 | head_part += |
lukasza | 902fcc5 | 2015-12-31 04:45:29 | [diff] [blame] | 454 | WebFrameSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 455 | head_part += "</head>"; |
| 456 | original_str.insert(pos, head_part); |
| 457 | } |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 458 | ASSERT_EQ(original_str, serialized_contents_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 459 | } |
| 460 | |
| 461 | void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) { |
| 462 | // Get value of BODY's title attribute in DOM. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 463 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 464 | WebDocument doc = web_frame->document(); |
| 465 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 466 | WebElement body_element = doc.body(); |
| 467 | // Unescaped string for "%⊅¹'". |
| 468 | static const wchar_t parsed_value[] = { |
| 469 | '%', 0x2285, 0x00b9, '\'', 0 |
| 470 | }; |
| 471 | WebString value = body_element.getAttribute("title"); |
dglazkov | 3c1f64f | 2015-05-15 17:50:52 | [diff] [blame] | 472 | WebString content = doc.contentAsTextForTesting(); |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 473 | ASSERT_TRUE(base::UTF16ToWide(value) == parsed_value); |
dglazkov | 3c1f64f | 2015-05-15 17:50:52 | [diff] [blame] | 474 | ASSERT_TRUE(base::UTF16ToWide(content) == parsed_value); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 475 | |
| 476 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 477 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 478 | // Check the serialized string. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 479 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 480 | // Confirm that the serialized string has no non-standard HTML entities. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 481 | ASSERT_EQ(std::string::npos, serialized_contents_.find("%")); |
| 482 | ASSERT_EQ(std::string::npos, serialized_contents_.find("⊅")); |
| 483 | ASSERT_EQ(std::string::npos, serialized_contents_.find("¹")); |
| 484 | ASSERT_EQ(std::string::npos, serialized_contents_.find("'")); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 485 | } |
| 486 | |
| 487 | void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url, |
| 488 | const GURL& path_dir_url) { |
| 489 | // There are total 2 available base tags in this test file. |
| 490 | const int kTotalBaseTagCountInTestFile = 2; |
| 491 | |
| 492 | // Since for this test, we assume there is no savable sub-resource links for |
| 493 | // this test file, also all links are relative URLs in this test file, so we |
| 494 | // need to check those relative URLs and make sure document has BASE tag. |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 495 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 496 | ASSERT_TRUE(web_frame != NULL); |
| 497 | WebDocument doc = web_frame->document(); |
| 498 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 499 | // Go through all descent nodes. |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 500 | WebElementCollection all = doc.all(); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 501 | int original_base_tag_count = 0; |
[email protected] | c9393271 | 2014-02-07 18:49:02 | [diff] [blame] | 502 | for (WebElement element = all.firstItem(); !element.isNull(); |
| 503 | element = all.nextItem()) { |
[email protected] | 498269f | 2014-06-24 13:40:50 | [diff] [blame] | 504 | if (element.hasHTMLTagName("base")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 505 | original_base_tag_count++; |
| 506 | } else { |
| 507 | // Get link. |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 508 | WebString value = GetSubResourceLinkFromElement(element); |
[email protected] | 498269f | 2014-06-24 13:40:50 | [diff] [blame] | 509 | if (value.isNull() && element.hasHTMLTagName("a")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 510 | value = element.getAttribute("href"); |
| 511 | if (value.isEmpty()) |
| 512 | value = WebString(); |
| 513 | } |
| 514 | // Each link is relative link. |
| 515 | if (!value.isNull()) { |
| 516 | GURL link(value.utf8()); |
| 517 | ASSERT_TRUE(link.scheme().empty()); |
| 518 | } |
| 519 | } |
| 520 | } |
| 521 | ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); |
| 522 | // Make sure in original document, the base URL is not equal with the |
| 523 | // |path_dir_url|. |
| 524 | GURL original_base_url(doc.baseURL()); |
| 525 | ASSERT_NE(original_base_url, path_dir_url); |
| 526 | |
| 527 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 528 | SerializeDomForURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 529 | |
| 530 | // Load the serialized contents. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 531 | ASSERT_TRUE(serialization_reported_end_of_data_); |
| 532 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 533 | web_frame->document().encoding()); |
| 534 | |
| 535 | // Make sure all links are absolute URLs and doc there are some number of |
| 536 | // BASE tags in serialized HTML data. Each of those BASE tags have same base |
| 537 | // URL which is as same as URL of current test file. |
| 538 | web_frame = GetMainFrame(); |
| 539 | ASSERT_TRUE(web_frame != NULL); |
| 540 | doc = web_frame->document(); |
| 541 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 542 | // Go through all descent nodes. |
| 543 | all = doc.all(); |
| 544 | int new_base_tag_count = 0; |
| 545 | for (WebNode node = all.firstItem(); !node.isNull(); |
| 546 | node = all.nextItem()) { |
| 547 | if (!node.isElementNode()) |
| 548 | continue; |
| 549 | WebElement element = node.to<WebElement>(); |
[email protected] | 498269f | 2014-06-24 13:40:50 | [diff] [blame] | 550 | if (element.hasHTMLTagName("base")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 551 | new_base_tag_count++; |
| 552 | } else { |
| 553 | // Get link. |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 554 | WebString value = GetSubResourceLinkFromElement(element); |
[email protected] | 498269f | 2014-06-24 13:40:50 | [diff] [blame] | 555 | if (value.isNull() && element.hasHTMLTagName("a")) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 556 | value = element.getAttribute("href"); |
| 557 | if (value.isEmpty()) |
| 558 | value = WebString(); |
| 559 | } |
| 560 | // Each link is absolute link. |
| 561 | if (!value.isNull()) { |
| 562 | GURL link(std::string(value.utf8())); |
| 563 | ASSERT_FALSE(link.scheme().empty()); |
| 564 | } |
| 565 | } |
| 566 | } |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 567 | // We have one more added BASE tag which is generated by JavaScript. |
| 568 | ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 569 | // Make sure in new document, the base URL is equal with the |path_dir_url|. |
| 570 | GURL new_base_url(doc.baseURL()); |
| 571 | ASSERT_EQ(new_base_url, path_dir_url); |
| 572 | } |
| 573 | |
| 574 | void SerializeHTMLDOMWithEmptyHeadOnRenderer() { |
| 575 | base::FilePath page_file_path = GetTestFilePath( |
| 576 | "dom_serializer", "empty_head.htm"); |
| 577 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 578 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 579 | |
| 580 | // Load the test html content. |
| 581 | static const char* const empty_head_contents = |
| 582 | "<html><head></head><body>hello world</body></html>"; |
| 583 | LoadContents(empty_head_contents, file_url, WebString()); |
| 584 | |
| 585 | // Make sure the head tag is empty. |
| 586 | WebFrame* web_frame = GetMainFrame(); |
| 587 | ASSERT_TRUE(web_frame != NULL); |
| 588 | WebDocument doc = web_frame->document(); |
| 589 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 590 | WebElement head_element = doc.head(); |
| 591 | ASSERT_TRUE(!head_element.isNull()); |
| 592 | ASSERT_TRUE(!head_element.hasChildNodes()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 593 | |
| 594 | // Do serialization. |
lukasza | d083d29 | 2015-09-30 00:42:44 | [diff] [blame] | 595 | SerializeDomForURL(file_url); |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 596 | ASSERT_TRUE(serialization_reported_end_of_data_); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 597 | |
| 598 | // Reload serialized contents and make sure there is only one META tag. |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 599 | LoadContents(serialized_contents_, file_url, |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 600 | web_frame->document().encoding()); |
| 601 | web_frame = GetMainFrame(); |
| 602 | ASSERT_TRUE(web_frame != NULL); |
| 603 | doc = web_frame->document(); |
| 604 | ASSERT_TRUE(doc.isHTMLDocument()); |
| 605 | head_element = doc.head(); |
| 606 | ASSERT_TRUE(!head_element.isNull()); |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 607 | ASSERT_TRUE(!head_element.firstChild().isNull()); |
| 608 | ASSERT_TRUE(head_element.firstChild().isElementNode()); |
| 609 | ASSERT_TRUE(head_element.firstChild().nextSibling().isNull()); |
| 610 | WebMetaElement meta_element = head_element.firstChild(). |
| 611 | to<WebMetaElement>(); |
| 612 | ASSERT_EQ(meta_element.computeEncoding(), web_frame->document().encoding()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 613 | |
| 614 | // Check the body's first node is text node and its contents are |
| 615 | // "hello world" |
| 616 | WebElement body_element = doc.body(); |
| 617 | ASSERT_TRUE(!body_element.isNull()); |
| 618 | WebNode text_node = body_element.firstChild(); |
| 619 | ASSERT_TRUE(text_node.isTextNode()); |
esprehn | 467dcd9 | 2015-12-03 02:06:09 | [diff] [blame] | 620 | ASSERT_EQ("hello world", text_node.nodeValue()); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 621 | } |
| 622 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 623 | void SubResourceForElementsInNonHTMLNamespaceOnRenderer( |
| 624 | const GURL& file_url) { |
esprehn | 96177900 | 2015-11-16 04:35:13 | [diff] [blame] | 625 | WebFrame* web_frame = FindSubFrameByURL(file_url); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 626 | ASSERT_TRUE(web_frame != NULL); |
| 627 | WebDocument doc = web_frame->document(); |
| 628 | WebNode lastNodeInBody = doc.body().lastChild(); |
esprehn | 4121229 | 2015-09-17 16:48:03 | [diff] [blame] | 629 | ASSERT_TRUE(lastNodeInBody.isElementNode()); |
[email protected] | 12a936d | 2013-05-15 04:55:49 | [diff] [blame] | 630 | WebString uri = GetSubResourceLinkFromElement( |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 631 | lastNodeInBody.to<WebElement>()); |
| 632 | EXPECT_TRUE(uri.isNull()); |
| 633 | } |
| 634 | |
| 635 | private: |
avi | 1023d01 | 2015-12-25 02:39:14 | [diff] [blame] | 636 | int32_t render_view_routing_id_; |
lukasza | 87f4e2ab | 2015-11-04 16:03:03 | [diff] [blame] | 637 | std::string serialized_contents_; |
| 638 | bool serialization_reported_end_of_data_; |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 639 | }; |
| 640 | |
| 641 | // If original contents have document type, the serialized contents also have |
| 642 | // document type. |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 643 | // Disabled on OSX by ellyjones@ on 2015-05-18, see https://crbug.com/488495, |
| 644 | // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 |
ellyjones | 15a15d2d | 2015-05-18 21:26:37 | [diff] [blame] | 645 | |
| 646 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 647 | DISABLED_SerializeHTMLDOMWithDocType) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 648 | base::FilePath page_file_path = |
| 649 | GetTestFilePath("dom_serializer", "youtube_1.htm"); |
| 650 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 651 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 652 | // Load the test file. |
| 653 | NavigateToURL(shell(), file_url); |
| 654 | |
| 655 | PostTaskToInProcessRendererAndWait( |
| 656 | base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer, |
| 657 | base::Unretained(this), file_url)); |
| 658 | } |
| 659 | |
| 660 | // If original contents do not have document type, the serialized contents |
| 661 | // also do not have document type. |
| 662 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { |
| 663 | base::FilePath page_file_path = |
| 664 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
| 665 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 666 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 667 | // Load the test file. |
| 668 | NavigateToURL(shell(), file_url); |
| 669 | |
| 670 | PostTaskToInProcessRendererAndWait( |
| 671 | base::Bind( |
| 672 | &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer, |
| 673 | base::Unretained(this), file_url)); |
| 674 | } |
| 675 | |
| 676 | // Serialize XML document which has all 5 built-in entities. After |
| 677 | // finishing serialization, the serialized contents should be same |
| 678 | // with original XML document. |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 679 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 680 | SerializeXMLDocWithBuiltInEntities) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 681 | base::FilePath page_file_path = |
| 682 | GetTestFilePath("dom_serializer", "note.html"); |
| 683 | base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml"); |
| 684 | // Read original contents for later comparison. |
| 685 | std::string original_contents; |
[email protected] | 82f84b9 | 2013-08-30 18:23:50 | [diff] [blame] | 686 | ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 687 | // Get file URL. |
| 688 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 689 | GURL xml_file_url = net::FilePathToFileURL(xml_file_path); |
| 690 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 691 | // Load the test file. |
| 692 | NavigateToURL(shell(), file_url); |
| 693 | |
| 694 | PostTaskToInProcessRendererAndWait( |
| 695 | base::Bind( |
| 696 | &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer, |
| 697 | base::Unretained(this), xml_file_url, original_contents)); |
| 698 | } |
| 699 | |
| 700 | // When serializing DOM, we add MOTW declaration before html tag. |
| 701 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { |
| 702 | base::FilePath page_file_path = |
| 703 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
| 704 | // Read original contents for later comparison . |
| 705 | std::string original_contents; |
[email protected] | 82f84b9 | 2013-08-30 18:23:50 | [diff] [blame] | 706 | ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 707 | // Get file URL. |
| 708 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 709 | ASSERT_TRUE(file_url.SchemeIsFile()); |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 710 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 711 | // Load the test file. |
| 712 | NavigateToURL(shell(), file_url); |
| 713 | |
| 714 | PostTaskToInProcessRendererAndWait( |
| 715 | base::Bind( |
| 716 | &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer, |
[email protected] | 53625ac | 2013-04-23 04:28:11 | [diff] [blame] | 717 | base::Unretained(this), file_url, original_contents)); |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 718 | } |
| 719 | |
| 720 | // When serializing DOM, we will add the META which have correct charset |
| 721 | // declaration as first child of HEAD element for resolving WebKit bug: |
| 722 | // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document |
| 723 | // does not have META charset declaration. |
tsergeant | 97b442d | 2016-03-10 05:46:04 | [diff] [blame] | 724 | // Disabled on OSX by battre@ on 2015-05-21, see https://crbug.com/488495, |
| 725 | // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 |
| 726 | IN_PROC_BROWSER_TEST_F( |
| 727 | DomSerializerTests, |
| 728 | DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 729 | base::FilePath page_file_path = |
| 730 | GetTestFilePath("dom_serializer", "youtube_1.htm"); |
| 731 | // Get file URL. |
| 732 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 733 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 734 | // Load the test file. |
| 735 | NavigateToURL(shell(), file_url); |
| 736 | |
| 737 | PostTaskToInProcessRendererAndWait( |
| 738 | base::Bind( |
| 739 | &DomSerializerTests:: |
| 740 | SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer, |
| 741 | base::Unretained(this), file_url)); |
| 742 | } |
| 743 | |
| 744 | // When serializing DOM, if the original document has multiple META charset |
| 745 | // declaration, we will add the META which have correct charset declaration |
| 746 | // as first child of HEAD element and remove all original META charset |
| 747 | // declarations. |
| 748 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 749 | SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { |
| 750 | base::FilePath page_file_path = |
| 751 | GetTestFilePath("dom_serializer", "youtube_2.htm"); |
| 752 | // Get file URL. |
| 753 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 754 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 755 | // Load the test file. |
| 756 | NavigateToURL(shell(), file_url); |
| 757 | |
| 758 | PostTaskToInProcessRendererAndWait( |
| 759 | base::Bind( |
| 760 | &DomSerializerTests:: |
| 761 | SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer, |
| 762 | base::Unretained(this), file_url)); |
| 763 | } |
| 764 | |
| 765 | // Test situation of html entities in text when serializing HTML DOM. |
| 766 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { |
| 767 | // Need to spin up the renderer and also navigate to a file url so that the |
| 768 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 769 | // from non-file scheme. |
| 770 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 771 | |
| 772 | PostTaskToInProcessRendererAndWait( |
| 773 | base::Bind( |
| 774 | &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer, |
| 775 | base::Unretained(this))); |
| 776 | } |
| 777 | |
| 778 | // Test situation of html entities in attribute value when serializing |
| 779 | // HTML DOM. |
| 780 | // This test started to fail at WebKit r65388. See http://crbug.com/52279. |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 781 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 782 | SerializeHTMLDOMWithEntitiesInAttributeValue) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 783 | // Need to spin up the renderer and also navigate to a file url so that the |
| 784 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 785 | // from non-file scheme. |
| 786 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 787 | |
| 788 | PostTaskToInProcessRendererAndWait( |
| 789 | base::Bind( |
| 790 | &DomSerializerTests:: |
| 791 | SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer, |
| 792 | base::Unretained(this))); |
| 793 | } |
| 794 | |
| 795 | // Test situation of non-standard HTML entities when serializing HTML DOM. |
| 796 | // This test started to fail at WebKit r65351. See http://crbug.com/52279. |
| 797 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 798 | SerializeHTMLDOMWithNonStandardEntities) { |
| 799 | // Make a test file URL and load it. |
| 800 | base::FilePath page_file_path = GetTestFilePath( |
| 801 | "dom_serializer", "nonstandard_htmlentities.htm"); |
| 802 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 803 | NavigateToURL(shell(), file_url); |
| 804 | |
| 805 | PostTaskToInProcessRendererAndWait( |
| 806 | base::Bind( |
| 807 | &DomSerializerTests:: |
| 808 | SerializeHTMLDOMWithNonStandardEntitiesOnRenderer, |
| 809 | base::Unretained(this), file_url)); |
| 810 | } |
| 811 | |
| 812 | // Test situation of BASE tag in original document when serializing HTML DOM. |
| 813 | // When serializing, we should comment the BASE tag, append a new BASE tag. |
| 814 | // rewrite all the savable URLs to relative local path, and change other URLs |
| 815 | // to absolute URLs. |
tkent | 99b6511 | 2015-08-17 03:05:07 | [diff] [blame] | 816 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
lukasza | a5bb017 | 2016-01-12 19:53:15 | [diff] [blame] | 817 | SerializeHTMLDOMWithBaseTag) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 818 | base::FilePath page_file_path = GetTestFilePath( |
| 819 | "dom_serializer", "html_doc_has_base_tag.htm"); |
| 820 | |
| 821 | // Get page dir URL which is base URL of this file. |
| 822 | base::FilePath dir_name = page_file_path.DirName(); |
| 823 | dir_name = dir_name.Append( |
| 824 | base::FilePath::StringType(base::FilePath::kSeparators[0], 1)); |
| 825 | GURL path_dir_url = net::FilePathToFileURL(dir_name); |
| 826 | |
| 827 | // Get file URL. |
| 828 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 829 | ASSERT_TRUE(file_url.SchemeIsFile()); |
| 830 | // Load the test file. |
| 831 | NavigateToURL(shell(), file_url); |
| 832 | |
| 833 | PostTaskToInProcessRendererAndWait( |
| 834 | base::Bind( |
| 835 | &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer, |
| 836 | base::Unretained(this), file_url, path_dir_url)); |
| 837 | } |
| 838 | |
| 839 | // Serializing page which has an empty HEAD tag. |
[email protected] | 185d501 | 2014-06-10 22:01:34 | [diff] [blame] | 840 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 841 | // Need to spin up the renderer and also navigate to a file url so that the |
| 842 | // renderer code doesn't attempt a fork when it sees a load to file scheme |
| 843 | // from non-file scheme. |
| 844 | NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
| 845 | |
| 846 | PostTaskToInProcessRendererAndWait( |
| 847 | base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer, |
| 848 | base::Unretained(this))); |
| 849 | } |
| 850 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 851 | IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 852 | SubResourceForElementsInNonHTMLNamespace) { |
| 853 | base::FilePath page_file_path = GetTestFilePath( |
| 854 | "dom_serializer", "non_html_namespace.htm"); |
| 855 | GURL file_url = net::FilePathToFileURL(page_file_path); |
| 856 | NavigateToURL(shell(), file_url); |
| 857 | |
| 858 | PostTaskToInProcessRendererAndWait( |
| 859 | base::Bind( |
| 860 | &DomSerializerTests:: |
| 861 | SubResourceForElementsInNonHTMLNamespaceOnRenderer, |
| 862 | base::Unretained(this), file_url)); |
| 863 | } |
| 864 | |
[email protected] | c1978abe | 2013-04-23 03:08:12 | [diff] [blame] | 865 | } // namespace content |