[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 1 | // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "chrome/browser/ui/elide_url.h" |
| 6 | |
[email protected] | 0ca7e8a | 2014-04-16 01:54:21 | [diff] [blame] | 7 | #include "base/logging.h" |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 8 | #include "base/strings/string_split.h" |
| 9 | #include "base/strings/utf_string_conversions.h" |
| 10 | #include "net/base/escape.h" |
| 11 | #include "net/base/net_util.h" |
| 12 | #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| 13 | #include "ui/gfx/text_elider.h" |
| 14 | #include "ui/gfx/text_utils.h" |
| 15 | #include "url/gurl.h" |
| 16 | |
| 17 | using base::UTF8ToUTF16; |
| 18 | using gfx::ElideText; |
| 19 | using gfx::GetStringWidthF; |
| 20 | using gfx::kEllipsisUTF16; |
| 21 | using gfx::kForwardSlash; |
| 22 | |
| 23 | namespace { |
| 24 | |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 25 | const base::char16 kDot = '.'; |
| 26 | |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 27 | // Build a path from the first |num_components| elements in |path_elements|. |
| 28 | // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate. |
| 29 | base::string16 BuildPathFromComponents( |
| 30 | const base::string16& path_prefix, |
| 31 | const std::vector<base::string16>& path_elements, |
| 32 | const base::string16& filename, |
| 33 | size_t num_components) { |
| 34 | // Add the initial elements of the path. |
| 35 | base::string16 path = path_prefix; |
| 36 | |
| 37 | // Build path from first |num_components| elements. |
| 38 | for (size_t j = 0; j < num_components; ++j) |
| 39 | path += path_elements[j] + kForwardSlash; |
| 40 | |
| 41 | // Add |filename|, ellipsis if necessary. |
| 42 | if (num_components != (path_elements.size() - 1)) |
| 43 | path += base::string16(kEllipsisUTF16) + kForwardSlash; |
| 44 | path += filename; |
| 45 | |
| 46 | return path; |
| 47 | } |
| 48 | |
| 49 | // Takes a prefix (Domain, or Domain+subdomain) and a collection of path |
| 50 | // components and elides if possible. Returns a string containing the longest |
| 51 | // possible elided path, or an empty string if elision is not possible. |
| 52 | base::string16 ElideComponentizedPath( |
| 53 | const base::string16& url_path_prefix, |
| 54 | const std::vector<base::string16>& url_path_elements, |
| 55 | const base::string16& url_filename, |
| 56 | const base::string16& url_query, |
| 57 | const gfx::FontList& font_list, |
| 58 | float available_pixel_width) { |
| 59 | const size_t url_path_number_of_elements = url_path_elements.size(); |
| 60 | |
| 61 | CHECK(url_path_number_of_elements); |
| 62 | for (size_t i = url_path_number_of_elements - 1; i > 0; --i) { |
| 63 | base::string16 elided_path = BuildPathFromComponents(url_path_prefix, |
| 64 | url_path_elements, url_filename, i); |
| 65 | if (available_pixel_width >= GetStringWidthF(elided_path, font_list)) |
| 66 | return ElideText(elided_path + url_query, font_list, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 67 | available_pixel_width, gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 68 | } |
| 69 | |
| 70 | return base::string16(); |
| 71 | } |
| 72 | |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 73 | // Splits the hostname in the |url| into sub-strings for the full hostname, |
| 74 | // the domain (TLD+1), and the subdomain (everything leading the domain). |
| 75 | void SplitHost(const GURL& url, |
| 76 | base::string16* url_host, |
| 77 | base::string16* url_domain, |
| 78 | base::string16* url_subdomain) { |
| 79 | // Get Host. |
| 80 | *url_host = UTF8ToUTF16(url.host()); |
| 81 | |
| 82 | // Get domain and registry information from the URL. |
| 83 | *url_domain = UTF8ToUTF16( |
| 84 | net::registry_controlled_domains::GetDomainAndRegistry( |
| 85 | url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES)); |
| 86 | if (url_domain->empty()) |
| 87 | *url_domain = *url_host; |
| 88 | |
| 89 | // Add port if required. |
| 90 | if (!url.port().empty()) { |
| 91 | *url_host += UTF8ToUTF16(":" + url.port()); |
| 92 | *url_domain += UTF8ToUTF16(":" + url.port()); |
| 93 | } |
| 94 | |
| 95 | // Get sub domain. |
| 96 | const size_t domain_start_index = url_host->find(*url_domain); |
| 97 | base::string16 kWwwPrefix = UTF8ToUTF16("www."); |
| 98 | if (domain_start_index != base::string16::npos) |
| 99 | *url_subdomain = url_host->substr(0, domain_start_index); |
| 100 | if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() || |
| 101 | url.SchemeIsFile())) { |
| 102 | url_subdomain->clear(); |
| 103 | } |
| 104 | } |
| 105 | |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 106 | } // namespace |
| 107 | |
| 108 | // TODO(pkasting): http://crbug.com/77883 This whole function gets |
| 109 | // kerning/ligatures/etc. issues potentially wrong by assuming that the width of |
| 110 | // a rendered string is always the sum of the widths of its substrings. Also I |
| 111 | // suspect it could be made simpler. |
| 112 | base::string16 ElideUrl(const GURL& url, |
| 113 | const gfx::FontList& font_list, |
| 114 | float available_pixel_width, |
| 115 | const std::string& languages) { |
| 116 | // Get a formatted string and corresponding parsing of the url. |
[email protected] | b4533450 | 2014-04-30 19:44:05 | [diff] [blame] | 117 | url::Parsed parsed; |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 118 | const base::string16 url_string = |
| 119 | net::FormatUrl(url, languages, net::kFormatUrlOmitAll, |
| 120 | net::UnescapeRule::SPACES, &parsed, NULL, NULL); |
| 121 | if (available_pixel_width <= 0) |
| 122 | return url_string; |
| 123 | |
| 124 | // If non-standard, return plain eliding. |
| 125 | if (!url.IsStandard()) |
| 126 | return ElideText(url_string, font_list, available_pixel_width, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 127 | gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 128 | |
| 129 | // Now start eliding url_string to fit within available pixel width. |
| 130 | // Fist pass - check to see whether entire url_string fits. |
| 131 | const float pixel_width_url_string = GetStringWidthF(url_string, font_list); |
| 132 | if (available_pixel_width >= pixel_width_url_string) |
| 133 | return url_string; |
| 134 | |
| 135 | // Get the path substring, including query and reference. |
| 136 | const size_t path_start_index = parsed.path.begin; |
| 137 | const size_t path_len = parsed.path.len; |
| 138 | base::string16 url_path_query_etc = url_string.substr(path_start_index); |
| 139 | base::string16 url_path = url_string.substr(path_start_index, path_len); |
| 140 | |
| 141 | // Return general elided text if url minus the query fits. |
| 142 | const base::string16 url_minus_query = |
| 143 | url_string.substr(0, path_start_index + path_len); |
| 144 | if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list)) |
| 145 | return ElideText(url_string, font_list, available_pixel_width, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 146 | gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 147 | |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 148 | base::string16 url_host; |
| 149 | base::string16 url_domain; |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 150 | base::string16 url_subdomain; |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 151 | SplitHost(url, &url_host, &url_domain, &url_subdomain); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 152 | |
| 153 | // If this is a file type, the path is now defined as everything after ":". |
| 154 | // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the |
| 155 | // domain is now C: - this is a nice hack for eliding to work pleasantly. |
| 156 | if (url.SchemeIsFile()) { |
| 157 | // Split the path string using ":" |
| 158 | std::vector<base::string16> file_path_split; |
| 159 | base::SplitString(url_path, ':', &file_path_split); |
| 160 | if (file_path_split.size() > 1) { // File is of type "file:///C:/.." |
| 161 | url_host.clear(); |
| 162 | url_domain.clear(); |
| 163 | url_subdomain.clear(); |
| 164 | |
| 165 | const base::string16 kColon = UTF8ToUTF16(":"); |
| 166 | url_host = url_domain = file_path_split.at(0).substr(1) + kColon; |
| 167 | url_path_query_etc = url_path = file_path_split.at(1); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | // Second Pass - remove scheme - the rest fits. |
| 172 | const float pixel_width_url_host = GetStringWidthF(url_host, font_list); |
| 173 | const float pixel_width_url_path = GetStringWidthF(url_path_query_etc, |
| 174 | font_list); |
| 175 | if (available_pixel_width >= |
| 176 | pixel_width_url_host + pixel_width_url_path) |
| 177 | return url_host + url_path_query_etc; |
| 178 | |
| 179 | // Third Pass: Subdomain, domain and entire path fits. |
| 180 | const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); |
| 181 | const float pixel_width_url_subdomain = |
| 182 | GetStringWidthF(url_subdomain, font_list); |
| 183 | if (available_pixel_width >= |
| 184 | pixel_width_url_subdomain + pixel_width_url_domain + |
| 185 | pixel_width_url_path) |
| 186 | return url_subdomain + url_domain + url_path_query_etc; |
| 187 | |
| 188 | // Query element. |
| 189 | base::string16 url_query; |
| 190 | const float kPixelWidthDotsTrailer = GetStringWidthF( |
| 191 | base::string16(kEllipsisUTF16), font_list); |
| 192 | if (parsed.query.is_nonempty()) { |
| 193 | url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin); |
| 194 | if (available_pixel_width >= |
| 195 | (pixel_width_url_subdomain + pixel_width_url_domain + |
| 196 | pixel_width_url_path - GetStringWidthF(url_query, font_list))) { |
| 197 | return ElideText(url_subdomain + url_domain + url_path_query_etc, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 198 | font_list, available_pixel_width, gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 199 | } |
| 200 | } |
| 201 | |
| 202 | // Parse url_path using '/'. |
| 203 | std::vector<base::string16> url_path_elements; |
| 204 | base::SplitString(url_path, kForwardSlash, &url_path_elements); |
| 205 | |
| 206 | // Get filename - note that for a path ending with / |
| 207 | // such as www.google.com/intl/ads/, the file name is ads/. |
| 208 | size_t url_path_number_of_elements = url_path_elements.size(); |
| 209 | DCHECK(url_path_number_of_elements != 0); |
| 210 | base::string16 url_filename; |
| 211 | if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) { |
| 212 | url_filename = *(url_path_elements.end() - 1); |
| 213 | } else if (url_path_number_of_elements > 1) { // Path ends with a '/'. |
| 214 | url_filename = url_path_elements.at(url_path_number_of_elements - 2) + |
| 215 | kForwardSlash; |
| 216 | url_path_number_of_elements--; |
| 217 | } |
| 218 | DCHECK(url_path_number_of_elements != 0); |
| 219 | |
| 220 | const size_t kMaxNumberOfUrlPathElementsAllowed = 1024; |
| 221 | if (url_path_number_of_elements <= 1 || |
| 222 | url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) { |
| 223 | // No path to elide, or too long of a path (could overflow in loop below) |
| 224 | // Just elide this as a text string. |
| 225 | return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 226 | available_pixel_width, gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 227 | } |
| 228 | |
| 229 | // Start eliding the path and replacing elements by ".../". |
| 230 | const base::string16 kEllipsisAndSlash = |
| 231 | base::string16(kEllipsisUTF16) + kForwardSlash; |
| 232 | const float pixel_width_ellipsis_slash = |
| 233 | GetStringWidthF(kEllipsisAndSlash, font_list); |
| 234 | |
| 235 | // Check with both subdomain and domain. |
| 236 | base::string16 elided_path = |
| 237 | ElideComponentizedPath(url_subdomain + url_domain, url_path_elements, |
| 238 | url_filename, url_query, font_list, |
| 239 | available_pixel_width); |
| 240 | if (!elided_path.empty()) |
| 241 | return elided_path; |
| 242 | |
| 243 | // Check with only domain. |
| 244 | // If a subdomain is present, add an ellipsis before domain. |
| 245 | // This is added only if the subdomain pixel width is larger than |
| 246 | // the pixel width of kEllipsis. Otherwise, subdomain remains, |
| 247 | // which means that this case has been resolved earlier. |
| 248 | base::string16 url_elided_domain = url_subdomain + url_domain; |
| 249 | if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) { |
| 250 | if (!url_subdomain.empty()) |
| 251 | url_elided_domain = kEllipsisAndSlash[0] + url_domain; |
| 252 | else |
| 253 | url_elided_domain = url_domain; |
| 254 | |
| 255 | elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements, |
| 256 | url_filename, url_query, font_list, |
| 257 | available_pixel_width); |
| 258 | |
| 259 | if (!elided_path.empty()) |
| 260 | return elided_path; |
| 261 | } |
| 262 | |
| 263 | // Return elided domain/.../filename anyway. |
| 264 | base::string16 final_elided_url_string(url_elided_domain); |
| 265 | const float url_elided_domain_width = GetStringWidthF(url_elided_domain, |
| 266 | font_list); |
| 267 | |
| 268 | // A hack to prevent trailing ".../...". |
| 269 | if ((available_pixel_width - url_elided_domain_width) > |
| 270 | pixel_width_ellipsis_slash + kPixelWidthDotsTrailer + |
| 271 | GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) { |
| 272 | final_elided_url_string += BuildPathFromComponents(base::string16(), |
| 273 | url_path_elements, url_filename, 1); |
| 274 | } else { |
| 275 | final_elided_url_string += url_path; |
| 276 | } |
| 277 | |
| 278 | return ElideText(final_elided_url_string, font_list, available_pixel_width, |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 279 | gfx::ELIDE_TAIL); |
[email protected] | ebc9b66 | 2014-01-30 03:37:33 | [diff] [blame] | 280 | } |
| 281 | |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 282 | base::string16 ElideHost(const GURL& url, |
| 283 | const gfx::FontList& font_list, |
| 284 | float available_pixel_width) { |
| 285 | base::string16 url_host; |
| 286 | base::string16 url_domain; |
| 287 | base::string16 url_subdomain; |
| 288 | SplitHost(url, &url_host, &url_domain, &url_subdomain); |
| 289 | |
| 290 | const float pixel_width_url_host = GetStringWidthF(url_host, font_list); |
| 291 | if (available_pixel_width >= pixel_width_url_host) |
| 292 | return url_host; |
| 293 | |
| 294 | if (url_subdomain.empty()) |
| 295 | return url_domain; |
| 296 | |
| 297 | const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); |
| 298 | float subdomain_width = available_pixel_width - pixel_width_url_domain; |
| 299 | if (subdomain_width <= 0) |
| 300 | return base::string16(kEllipsisUTF16) + kDot + url_domain; |
| 301 | |
[email protected] | f3ce621 | 2014-06-05 22:42:08 | [diff] [blame^] | 302 | const base::string16 elided_subdomain = ElideText( |
| 303 | url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD); |
[email protected] | 9d3e32b3 | 2014-02-04 16:01:47 | [diff] [blame] | 304 | return elided_subdomain + url_domain; |
| 305 | } |