blob: 0c1d9962e0b59f6285e9274fb7b16d533529570d [file] [log] [blame]
[email protected]ebc9b662014-01-30 03:37:331// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/ui/elide_url.h"
6
[email protected]0ca7e8a2014-04-16 01:54:217#include "base/logging.h"
[email protected]ebc9b662014-01-30 03:37:338#include "base/strings/string_split.h"
9#include "base/strings/utf_string_conversions.h"
10#include "net/base/escape.h"
11#include "net/base/net_util.h"
12#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
13#include "ui/gfx/text_elider.h"
14#include "ui/gfx/text_utils.h"
15#include "url/gurl.h"
16
17using base::UTF8ToUTF16;
18using gfx::ElideText;
19using gfx::GetStringWidthF;
20using gfx::kEllipsisUTF16;
21using gfx::kForwardSlash;
22
23namespace {
24
[email protected]9d3e32b32014-02-04 16:01:4725const base::char16 kDot = '.';
26
[email protected]ebc9b662014-01-30 03:37:3327// Build a path from the first |num_components| elements in |path_elements|.
28// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
29base::string16 BuildPathFromComponents(
30 const base::string16& path_prefix,
31 const std::vector<base::string16>& path_elements,
32 const base::string16& filename,
33 size_t num_components) {
34 // Add the initial elements of the path.
35 base::string16 path = path_prefix;
36
37 // Build path from first |num_components| elements.
38 for (size_t j = 0; j < num_components; ++j)
39 path += path_elements[j] + kForwardSlash;
40
41 // Add |filename|, ellipsis if necessary.
42 if (num_components != (path_elements.size() - 1))
43 path += base::string16(kEllipsisUTF16) + kForwardSlash;
44 path += filename;
45
46 return path;
47}
48
49// Takes a prefix (Domain, or Domain+subdomain) and a collection of path
50// components and elides if possible. Returns a string containing the longest
51// possible elided path, or an empty string if elision is not possible.
52base::string16 ElideComponentizedPath(
53 const base::string16& url_path_prefix,
54 const std::vector<base::string16>& url_path_elements,
55 const base::string16& url_filename,
56 const base::string16& url_query,
57 const gfx::FontList& font_list,
58 float available_pixel_width) {
59 const size_t url_path_number_of_elements = url_path_elements.size();
60
61 CHECK(url_path_number_of_elements);
62 for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
63 base::string16 elided_path = BuildPathFromComponents(url_path_prefix,
64 url_path_elements, url_filename, i);
65 if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
66 return ElideText(elided_path + url_query, font_list,
[email protected]f3ce6212014-06-05 22:42:0867 available_pixel_width, gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:3368 }
69
70 return base::string16();
71}
72
[email protected]9d3e32b32014-02-04 16:01:4773// Splits the hostname in the |url| into sub-strings for the full hostname,
74// the domain (TLD+1), and the subdomain (everything leading the domain).
75void SplitHost(const GURL& url,
76 base::string16* url_host,
77 base::string16* url_domain,
78 base::string16* url_subdomain) {
79 // Get Host.
80 *url_host = UTF8ToUTF16(url.host());
81
82 // Get domain and registry information from the URL.
83 *url_domain = UTF8ToUTF16(
84 net::registry_controlled_domains::GetDomainAndRegistry(
85 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
86 if (url_domain->empty())
87 *url_domain = *url_host;
88
89 // Add port if required.
90 if (!url.port().empty()) {
91 *url_host += UTF8ToUTF16(":" + url.port());
92 *url_domain += UTF8ToUTF16(":" + url.port());
93 }
94
95 // Get sub domain.
96 const size_t domain_start_index = url_host->find(*url_domain);
97 base::string16 kWwwPrefix = UTF8ToUTF16("www.");
98 if (domain_start_index != base::string16::npos)
99 *url_subdomain = url_host->substr(0, domain_start_index);
100 if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() ||
101 url.SchemeIsFile())) {
102 url_subdomain->clear();
103 }
104}
105
[email protected]ebc9b662014-01-30 03:37:33106} // namespace
107
108// TODO(pkasting): http://crbug.com/77883 This whole function gets
109// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
110// a rendered string is always the sum of the widths of its substrings. Also I
111// suspect it could be made simpler.
112base::string16 ElideUrl(const GURL& url,
113 const gfx::FontList& font_list,
114 float available_pixel_width,
115 const std::string& languages) {
116 // Get a formatted string and corresponding parsing of the url.
[email protected]b45334502014-04-30 19:44:05117 url::Parsed parsed;
[email protected]ebc9b662014-01-30 03:37:33118 const base::string16 url_string =
119 net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
120 net::UnescapeRule::SPACES, &parsed, NULL, NULL);
121 if (available_pixel_width <= 0)
122 return url_string;
123
124 // If non-standard, return plain eliding.
125 if (!url.IsStandard())
126 return ElideText(url_string, font_list, available_pixel_width,
[email protected]f3ce6212014-06-05 22:42:08127 gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:33128
129 // Now start eliding url_string to fit within available pixel width.
130 // Fist pass - check to see whether entire url_string fits.
131 const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
132 if (available_pixel_width >= pixel_width_url_string)
133 return url_string;
134
135 // Get the path substring, including query and reference.
136 const size_t path_start_index = parsed.path.begin;
137 const size_t path_len = parsed.path.len;
138 base::string16 url_path_query_etc = url_string.substr(path_start_index);
139 base::string16 url_path = url_string.substr(path_start_index, path_len);
140
141 // Return general elided text if url minus the query fits.
142 const base::string16 url_minus_query =
143 url_string.substr(0, path_start_index + path_len);
144 if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
145 return ElideText(url_string, font_list, available_pixel_width,
[email protected]f3ce6212014-06-05 22:42:08146 gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:33147
[email protected]9d3e32b32014-02-04 16:01:47148 base::string16 url_host;
149 base::string16 url_domain;
[email protected]ebc9b662014-01-30 03:37:33150 base::string16 url_subdomain;
[email protected]9d3e32b32014-02-04 16:01:47151 SplitHost(url, &url_host, &url_domain, &url_subdomain);
[email protected]ebc9b662014-01-30 03:37:33152
153 // If this is a file type, the path is now defined as everything after ":".
154 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
155 // domain is now C: - this is a nice hack for eliding to work pleasantly.
156 if (url.SchemeIsFile()) {
157 // Split the path string using ":"
158 std::vector<base::string16> file_path_split;
159 base::SplitString(url_path, ':', &file_path_split);
160 if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
161 url_host.clear();
162 url_domain.clear();
163 url_subdomain.clear();
164
165 const base::string16 kColon = UTF8ToUTF16(":");
166 url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
167 url_path_query_etc = url_path = file_path_split.at(1);
168 }
169 }
170
171 // Second Pass - remove scheme - the rest fits.
172 const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
173 const float pixel_width_url_path = GetStringWidthF(url_path_query_etc,
174 font_list);
175 if (available_pixel_width >=
176 pixel_width_url_host + pixel_width_url_path)
177 return url_host + url_path_query_etc;
178
179 // Third Pass: Subdomain, domain and entire path fits.
180 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
181 const float pixel_width_url_subdomain =
182 GetStringWidthF(url_subdomain, font_list);
183 if (available_pixel_width >=
184 pixel_width_url_subdomain + pixel_width_url_domain +
185 pixel_width_url_path)
186 return url_subdomain + url_domain + url_path_query_etc;
187
188 // Query element.
189 base::string16 url_query;
190 const float kPixelWidthDotsTrailer = GetStringWidthF(
191 base::string16(kEllipsisUTF16), font_list);
192 if (parsed.query.is_nonempty()) {
193 url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
194 if (available_pixel_width >=
195 (pixel_width_url_subdomain + pixel_width_url_domain +
196 pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
197 return ElideText(url_subdomain + url_domain + url_path_query_etc,
[email protected]f3ce6212014-06-05 22:42:08198 font_list, available_pixel_width, gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:33199 }
200 }
201
202 // Parse url_path using '/'.
203 std::vector<base::string16> url_path_elements;
204 base::SplitString(url_path, kForwardSlash, &url_path_elements);
205
206 // Get filename - note that for a path ending with /
207 // such as www.google.com/intl/ads/, the file name is ads/.
208 size_t url_path_number_of_elements = url_path_elements.size();
209 DCHECK(url_path_number_of_elements != 0);
210 base::string16 url_filename;
211 if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
212 url_filename = *(url_path_elements.end() - 1);
213 } else if (url_path_number_of_elements > 1) { // Path ends with a '/'.
214 url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
215 kForwardSlash;
216 url_path_number_of_elements--;
217 }
218 DCHECK(url_path_number_of_elements != 0);
219
220 const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
221 if (url_path_number_of_elements <= 1 ||
222 url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
223 // No path to elide, or too long of a path (could overflow in loop below)
224 // Just elide this as a text string.
225 return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
[email protected]f3ce6212014-06-05 22:42:08226 available_pixel_width, gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:33227 }
228
229 // Start eliding the path and replacing elements by ".../".
230 const base::string16 kEllipsisAndSlash =
231 base::string16(kEllipsisUTF16) + kForwardSlash;
232 const float pixel_width_ellipsis_slash =
233 GetStringWidthF(kEllipsisAndSlash, font_list);
234
235 // Check with both subdomain and domain.
236 base::string16 elided_path =
237 ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
238 url_filename, url_query, font_list,
239 available_pixel_width);
240 if (!elided_path.empty())
241 return elided_path;
242
243 // Check with only domain.
244 // If a subdomain is present, add an ellipsis before domain.
245 // This is added only if the subdomain pixel width is larger than
246 // the pixel width of kEllipsis. Otherwise, subdomain remains,
247 // which means that this case has been resolved earlier.
248 base::string16 url_elided_domain = url_subdomain + url_domain;
249 if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
250 if (!url_subdomain.empty())
251 url_elided_domain = kEllipsisAndSlash[0] + url_domain;
252 else
253 url_elided_domain = url_domain;
254
255 elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
256 url_filename, url_query, font_list,
257 available_pixel_width);
258
259 if (!elided_path.empty())
260 return elided_path;
261 }
262
263 // Return elided domain/.../filename anyway.
264 base::string16 final_elided_url_string(url_elided_domain);
265 const float url_elided_domain_width = GetStringWidthF(url_elided_domain,
266 font_list);
267
268 // A hack to prevent trailing ".../...".
269 if ((available_pixel_width - url_elided_domain_width) >
270 pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
271 GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) {
272 final_elided_url_string += BuildPathFromComponents(base::string16(),
273 url_path_elements, url_filename, 1);
274 } else {
275 final_elided_url_string += url_path;
276 }
277
278 return ElideText(final_elided_url_string, font_list, available_pixel_width,
[email protected]f3ce6212014-06-05 22:42:08279 gfx::ELIDE_TAIL);
[email protected]ebc9b662014-01-30 03:37:33280}
281
[email protected]9d3e32b32014-02-04 16:01:47282base::string16 ElideHost(const GURL& url,
283 const gfx::FontList& font_list,
284 float available_pixel_width) {
285 base::string16 url_host;
286 base::string16 url_domain;
287 base::string16 url_subdomain;
288 SplitHost(url, &url_host, &url_domain, &url_subdomain);
289
290 const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
291 if (available_pixel_width >= pixel_width_url_host)
292 return url_host;
293
294 if (url_subdomain.empty())
295 return url_domain;
296
297 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
298 float subdomain_width = available_pixel_width - pixel_width_url_domain;
299 if (subdomain_width <= 0)
300 return base::string16(kEllipsisUTF16) + kDot + url_domain;
301
[email protected]f3ce6212014-06-05 22:42:08302 const base::string16 elided_subdomain = ElideText(
303 url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD);
[email protected]9d3e32b32014-02-04 16:01:47304 return elided_subdomain + url_domain;
305}